epub-translator 0.0.7__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. epub_translator/__init__.py +4 -2
  2. epub_translator/data/fill.jinja +66 -0
  3. epub_translator/data/mmltex/README.md +67 -0
  4. epub_translator/data/mmltex/cmarkup.xsl +1106 -0
  5. epub_translator/data/mmltex/entities.xsl +459 -0
  6. epub_translator/data/mmltex/glayout.xsl +222 -0
  7. epub_translator/data/mmltex/mmltex.xsl +36 -0
  8. epub_translator/data/mmltex/scripts.xsl +375 -0
  9. epub_translator/data/mmltex/tables.xsl +130 -0
  10. epub_translator/data/mmltex/tokens.xsl +328 -0
  11. epub_translator/data/translate.jinja +15 -12
  12. epub_translator/epub/__init__.py +4 -2
  13. epub_translator/epub/common.py +43 -0
  14. epub_translator/epub/math.py +193 -0
  15. epub_translator/epub/placeholder.py +53 -0
  16. epub_translator/epub/spines.py +42 -0
  17. epub_translator/epub/toc.py +505 -0
  18. epub_translator/epub/zip.py +67 -0
  19. epub_translator/iter_sync.py +24 -0
  20. epub_translator/language.py +23 -0
  21. epub_translator/llm/__init__.py +2 -1
  22. epub_translator/llm/core.py +233 -0
  23. epub_translator/llm/error.py +38 -35
  24. epub_translator/llm/executor.py +159 -136
  25. epub_translator/llm/increasable.py +28 -28
  26. epub_translator/llm/types.py +17 -0
  27. epub_translator/serial/__init__.py +2 -0
  28. epub_translator/serial/chunk.py +52 -0
  29. epub_translator/serial/segment.py +17 -0
  30. epub_translator/serial/splitter.py +50 -0
  31. epub_translator/template.py +35 -33
  32. epub_translator/translator.py +208 -178
  33. epub_translator/utils.py +7 -0
  34. epub_translator/xml/__init__.py +4 -3
  35. epub_translator/xml/deduplication.py +38 -0
  36. epub_translator/xml/firendly/__init__.py +2 -0
  37. epub_translator/xml/firendly/decoder.py +75 -0
  38. epub_translator/xml/firendly/encoder.py +84 -0
  39. epub_translator/xml/firendly/parser.py +177 -0
  40. epub_translator/xml/firendly/tag.py +118 -0
  41. epub_translator/xml/firendly/transform.py +36 -0
  42. epub_translator/xml/xml.py +52 -0
  43. epub_translator/xml/xml_like.py +231 -0
  44. epub_translator/xml_translator/__init__.py +3 -0
  45. epub_translator/xml_translator/const.py +2 -0
  46. epub_translator/xml_translator/fill.py +128 -0
  47. epub_translator/xml_translator/format.py +282 -0
  48. epub_translator/xml_translator/fragmented.py +125 -0
  49. epub_translator/xml_translator/group.py +183 -0
  50. epub_translator/xml_translator/progressive_locking.py +256 -0
  51. epub_translator/xml_translator/submitter.py +102 -0
  52. epub_translator/xml_translator/text_segment.py +263 -0
  53. epub_translator/xml_translator/translator.py +179 -0
  54. epub_translator/xml_translator/utils.py +29 -0
  55. epub_translator-0.1.1.dist-info/METADATA +283 -0
  56. epub_translator-0.1.1.dist-info/RECORD +58 -0
  57. epub_translator/data/format.jinja +0 -33
  58. epub_translator/epub/content_parser.py +0 -162
  59. epub_translator/epub/html/__init__.py +0 -1
  60. epub_translator/epub/html/dom_operator.py +0 -68
  61. epub_translator/epub/html/empty_tags.py +0 -23
  62. epub_translator/epub/html/file.py +0 -80
  63. epub_translator/epub/html/texts_searcher.py +0 -46
  64. epub_translator/llm/node.py +0 -201
  65. epub_translator/translation/__init__.py +0 -2
  66. epub_translator/translation/chunk.py +0 -118
  67. epub_translator/translation/splitter.py +0 -78
  68. epub_translator/translation/store.py +0 -36
  69. epub_translator/translation/translation.py +0 -231
  70. epub_translator/translation/types.py +0 -45
  71. epub_translator/translation/utils.py +0 -11
  72. epub_translator/xml/decoder.py +0 -71
  73. epub_translator/xml/encoder.py +0 -95
  74. epub_translator/xml/parser.py +0 -172
  75. epub_translator/xml/tag.py +0 -93
  76. epub_translator/xml/transform.py +0 -34
  77. epub_translator/xml/utils.py +0 -12
  78. epub_translator/zip_context.py +0 -74
  79. epub_translator-0.0.7.dist-info/METADATA +0 -170
  80. epub_translator-0.0.7.dist-info/RECORD +0 -36
  81. {epub_translator-0.0.7.dist-info → epub_translator-0.1.1.dist-info}/LICENSE +0 -0
  82. {epub_translator-0.0.7.dist-info → epub_translator-0.1.1.dist-info}/WHEEL +0 -0
@@ -1,80 +0,0 @@
1
- import re
2
-
3
- from typing import Iterable
4
- from xml.etree.ElementTree import fromstring, tostring, Element
5
- from .dom_operator import read_texts, write_texts
6
- from .empty_tags import to_xml, to_html
7
-
8
-
9
- _FILE_HEAD_PATTERN = re.compile(r"^<\?xml.*?\?>[\s]*<!DOCTYPE.*?>")
10
- _XMLNS_IN_TAG = re.compile(r"\{[^}]+\}")
11
- _BRACES = re.compile(r"(\{|\})")
12
-
13
- class HTMLFile:
14
- def __init__(self, file_content: str):
15
- match = re.match(_FILE_HEAD_PATTERN, file_content)
16
- xml_content = re.sub(_FILE_HEAD_PATTERN, "", to_xml(file_content))
17
- self._head: str = match.group() if match else None
18
- self._root: Element = fromstring(xml_content)
19
- self._xmlns: str | None = self._extract_xmlns(self._root)
20
- self._texts_length: int | None = None
21
-
22
- def _extract_xmlns(self, root: Element) -> str | None:
23
- root_xmlns: str | None = None
24
- for i, element in enumerate(_all_elements(root)):
25
- need_clean_xmlns = True
26
- match = re.match(_XMLNS_IN_TAG, element.tag)
27
-
28
- if match:
29
- xmlns = re.sub(_BRACES, "", match.group())
30
- if i == 0:
31
- root_xmlns = xmlns
32
- elif root_xmlns != xmlns:
33
- need_clean_xmlns = False
34
- if need_clean_xmlns:
35
- element.tag = re.sub(_XMLNS_IN_TAG, "", element.tag)
36
-
37
- return root_xmlns
38
-
39
- def read_texts(self) -> list[str]:
40
- texts = list(read_texts(self._root))
41
- self._texts_length = len(texts)
42
- return texts
43
-
44
- def write_texts(self, texts: Iterable[str], append: bool):
45
- write_texts(self._root, texts, append)
46
-
47
- @property
48
- def texts_length(self) -> int:
49
- if self._texts_length is None:
50
- self._texts_length = 0
51
- for _ in read_texts(self._root):
52
- self._texts_length += 1
53
- return self._texts_length
54
-
55
- @property
56
- def file_content(self) -> str:
57
- file_content: str
58
- if self._xmlns is None:
59
- file_content = tostring(self._root, encoding="unicode")
60
- file_content = to_html(file_content)
61
- else:
62
- root = Element(
63
- self._root.tag,
64
- attrib={**self._root.attrib, "xmlns": self._xmlns},
65
- )
66
- root.extend(self._root)
67
- # XHTML disable <tag/> (we need replace them with <tag></tag>)
68
- for element in _all_elements(root):
69
- if element.text is None:
70
- element.text = ""
71
- file_content = tostring(root, encoding="unicode")
72
-
73
- if self._head is not None:
74
- file_content = self._head + file_content
75
- return file_content
76
-
77
- def _all_elements(parent: Element):
78
- yield parent
79
- for child in parent:
80
- yield from _all_elements(child)
@@ -1,46 +0,0 @@
1
- from typing import Generator, TypeGuard
2
- from enum import auto, Enum
3
- from xml.etree.ElementTree import Element
4
-
5
-
6
- class TextPosition(Enum):
7
- WHOLE_DOM = auto()
8
- TEXT = auto()
9
- TAIL = auto()
10
-
11
- # element, position, parent
12
- TextDescription = tuple[Element, TextPosition, Element | None]
13
-
14
- _IGNORE_TAGS = (
15
- "title", "link", "style", "css", "img", "script", "metadata",
16
- "{http://www.w3.org/1998/Math/MathML}math", # TODO: 公式是正文,也要读进去,暂时忽略避免扰乱得了。
17
- )
18
-
19
- _TEXT_LEAF_TAGS = (
20
- "a", "b", "br", "hr", "span", "em", "strong", "label",
21
- )
22
-
23
- def search_texts(element: Element, parent: Element | None = None) -> Generator[TextDescription, None, None]:
24
- if element.tag in _IGNORE_TAGS:
25
- return
26
-
27
- if any(c.tag not in _TEXT_LEAF_TAGS for c in element):
28
- if _is_not_empty_str(element.text):
29
- yield element, TextPosition.TEXT, parent
30
- for child in element:
31
- if child.tag in _TEXT_LEAF_TAGS:
32
- yield child, TextPosition.WHOLE_DOM, element
33
- else:
34
- yield from search_texts(child, element)
35
- if _is_not_empty_str(child.tail):
36
- yield child, TextPosition.TAIL, element
37
- else:
38
- yield element, TextPosition.WHOLE_DOM, parent
39
-
40
- def _is_not_empty_str(text: str | None) -> TypeGuard[str]:
41
- if text is None:
42
- return False
43
- for char in text:
44
- if char not in (" ", "\n"):
45
- return True
46
- return False
@@ -1,201 +0,0 @@
1
- import datetime
2
-
3
- from os import PathLike
4
- from pathlib import Path
5
- from typing import cast, Any, TypeVar, Generator, Sequence, Callable
6
- from importlib.resources import files
7
- from jinja2 import Environment, Template
8
- from xml.etree.ElementTree import Element
9
- from pydantic import SecretStr
10
- from logging import getLogger, DEBUG, Formatter, Logger, FileHandler
11
- from tiktoken import get_encoding, Encoding
12
- from langchain_core.messages import SystemMessage, HumanMessage
13
-
14
- from ..template import create_env
15
- from ..xml import decode_friendly, encode_friendly
16
- from .increasable import Increasable
17
- from .executor import LLMExecutor
18
-
19
-
20
- R = TypeVar("R")
21
-
22
- class LLM:
23
- def __init__(
24
- self,
25
- key: str,
26
- url: str,
27
- model: str,
28
- token_encoding: str,
29
- timeout: float | None = None,
30
- top_p: float | tuple[float, float] | None = None,
31
- temperature: float | tuple[float, float] | None = None,
32
- retry_times: int = 5,
33
- retry_interval_seconds: float = 6.0,
34
- log_dir_path: PathLike | None = None,
35
- ):
36
- prompts_path = files("epub_translator") / "data"
37
- self._templates: dict[str, Template] = {}
38
- self._encoding: Encoding = get_encoding(token_encoding)
39
- self._env: Environment = create_env(prompts_path)
40
- self._logger_save_path: Path | None = None
41
-
42
- if log_dir_path is not None:
43
- self._logger_save_path = Path(log_dir_path)
44
- if not self._logger_save_path.exists():
45
- self._logger_save_path.mkdir(parents=True, exist_ok=True)
46
- elif not self._logger_save_path.is_dir():
47
- self._logger_save_path = None
48
-
49
- self._executor = LLMExecutor(
50
- url=url,
51
- model=model,
52
- api_key=cast(SecretStr, key),
53
- timeout=timeout,
54
- top_p=Increasable(top_p),
55
- temperature=Increasable(temperature),
56
- retry_times=retry_times,
57
- retry_interval_seconds=retry_interval_seconds,
58
- create_logger=self._create_logger,
59
- )
60
-
61
- def _create_logger(self) -> Logger | None:
62
- if self._logger_save_path is None:
63
- return None
64
-
65
- now = datetime.datetime.now(datetime.timezone.utc)
66
- timestamp = now.strftime("%Y-%m-%d %H-%M-%S %f")
67
- file_path = self._logger_save_path / f"request {timestamp}.log"
68
- logger = getLogger(f"LLM Request {timestamp}")
69
- logger.setLevel(DEBUG)
70
- handler = FileHandler(file_path, encoding="utf-8")
71
- handler.setLevel(DEBUG)
72
- handler.setFormatter(Formatter("%(asctime)s %(message)s", "%H:%M:%S"))
73
- logger.addHandler(handler)
74
-
75
- return logger
76
-
77
- def request_text(
78
- self,
79
- template_name: str,
80
- text_tag: str,
81
- user_data: Element | str,
82
- parser: Callable[[str], R],
83
- max_tokens: int | None = None,
84
- params: dict[str, Any] | None = None,
85
- ) -> R:
86
-
87
- if params is None:
88
- params = {}
89
-
90
- def parse_response(response: str) -> R:
91
- text = next(self._search_quotes(text_tag.lower(), response), None)
92
- if text is None:
93
- raise ValueError(f"No valid {text_tag} response found")
94
- return parser(text)
95
-
96
- return self._executor.request(
97
- input=self._create_input(template_name, user_data, params),
98
- parser=parse_response,
99
- max_tokens=max_tokens,
100
- )
101
-
102
- def request_xml(
103
- self,
104
- template_name: str,
105
- user_data: Element | str,
106
- parser: Callable[[Element], R],
107
- max_tokens: int | None = None,
108
- params: dict[str, Any] | None = None,
109
- ) -> R:
110
-
111
- if params is None:
112
- params = {}
113
-
114
- def parse_response(response: str) -> R:
115
- element = next(decode_friendly(response, "response"), None)
116
- if element is None:
117
- raise ValueError("No valid XML response found")
118
- return parser(element)
119
-
120
- return self._executor.request(
121
- input=self._create_input(template_name, user_data, params),
122
- parser=parse_response,
123
- max_tokens=max_tokens,
124
- )
125
-
126
- def _create_input(self, template_name: str, user_data: Element | str, params: dict[str, Any]):
127
- data: str
128
- if isinstance(user_data, Element):
129
- data = encode_friendly(user_data)
130
- data = f"```XML\n{data}\n```"
131
- else:
132
- data = user_data
133
-
134
- template = self._template(template_name)
135
- prompt = template.render(**params)
136
- return [
137
- SystemMessage(content=prompt),
138
- HumanMessage(content=data)
139
- ]
140
-
141
- def prompt_tokens_count(self, template_name: str, params: dict[str, Any]) -> int:
142
- template = self._template(template_name)
143
- prompt = template.render(**params)
144
- return len(self._encoding.encode(prompt))
145
-
146
- def encode_tokens(self, text: str) -> list[int]:
147
- return self._encoding.encode(text)
148
-
149
- def decode_tokens(self, tokens: Sequence[int]) -> str:
150
- return self._encoding.decode(tokens)
151
-
152
- def count_tokens_count(self, text: str) -> int:
153
- return len(self._encoding.encode(text))
154
-
155
- def _template(self, template_name: str) -> Template:
156
- template = self._templates.get(template_name, None)
157
- if template is None:
158
- template = self._env.get_template(template_name)
159
- self._templates[template_name] = template
160
- return template
161
-
162
- def _search_quotes(self, kind: str, response: str) -> Generator[str, None, None]:
163
- start_marker = f"```{kind}"
164
- end_marker = "```"
165
- start_index = 0
166
-
167
- while True:
168
- start_index = self._find_ignore_case(
169
- raw=response,
170
- sub=start_marker,
171
- start=start_index,
172
- )
173
- if start_index == -1:
174
- break
175
-
176
- end_index = self._find_ignore_case(
177
- raw=response,
178
- sub=end_marker,
179
- start=start_index + len(start_marker),
180
- )
181
- if end_index == -1:
182
- break
183
-
184
- extracted_text = response[start_index + len(start_marker):end_index].strip()
185
- yield extracted_text
186
- start_index = end_index + len(end_marker)
187
-
188
- def _find_ignore_case(self, raw: str, sub: str, start: int = 0):
189
- if not sub:
190
- return 0 if 0 >= start else -1
191
-
192
- raw_len, sub_len = len(raw), len(sub)
193
- for i in range(start, raw_len - sub_len + 1):
194
- match = True
195
- for j in range(sub_len):
196
- if raw[i + j].lower() != sub[j].lower():
197
- match = False
198
- break
199
- if match:
200
- return i
201
- return -1
@@ -1,2 +0,0 @@
1
- from .types import Incision, Fragment, Language
2
- from .translation import translate, ProgressReporter
@@ -1,118 +0,0 @@
1
- from dataclasses import dataclass
2
- from typing import Iterator, Iterable, Generator
3
- from hashlib import sha512
4
- from ..llm import LLM
5
- from .types import Fragment, Language
6
-
7
-
8
- @dataclass
9
- class Chunk:
10
- index: int
11
- hash: bytes
12
- head: list[str]
13
- body: list[str]
14
- tail: list[str]
15
- tokens_count: int
16
-
17
- @dataclass
18
- class ChunkRange:
19
- index: int
20
- head_remain_tokens: int
21
- tail_remain_tokens: int
22
- head_index: int
23
- body_index: int
24
- tail_index: int
25
- fragments_count: int
26
- tokens_count: int
27
-
28
- def match(self, index: int) -> bool:
29
- return self.head_index <= index < self.head_index + self.fragments_count
30
-
31
- def match_fragments(
32
- llm: LLM,
33
- target_language: Language,
34
- chunk_ranges_iter: Iterator[ChunkRange],
35
- fragments_iter: Iterator[Fragment],
36
- ) -> Generator[Chunk, None, None]:
37
-
38
- for range, texts in _match_range_and_texts(
39
- chunk_range_iter=chunk_ranges_iter,
40
- fragments_iter=fragments_iter,
41
- ):
42
- head_length = range.body_index - range.head_index
43
- body_length = range.tail_index - range.body_index
44
- head = texts[:head_length]
45
- body = texts[head_length:head_length + body_length]
46
- tail = texts[head_length + body_length:]
47
-
48
- hash = _hash_texts_list(target_language, (head, body, tail))
49
- head = _crop_extra_texts(llm, head, True, range.head_remain_tokens)
50
- tail = _crop_extra_texts(llm, tail, False, range.tail_remain_tokens)
51
-
52
- yield Chunk(
53
- hash=hash,
54
- head=head,
55
- body=body,
56
- tail=tail,
57
- index=range.index,
58
- tokens_count=range.tokens_count,
59
- )
60
-
61
- def _match_range_and_texts(
62
- chunk_range_iter: Iterator[ChunkRange],
63
- fragments_iter: Iterator[Fragment],
64
- ) -> Generator[tuple[ChunkRange, list[str]], None, None]:
65
-
66
- next_chunk_range: ChunkRange | None = None
67
- matched_chunk_ranges: list[tuple[ChunkRange, list[str]]] = []
68
-
69
- for index, fragment in enumerate(fragments_iter):
70
- while True:
71
- if next_chunk_range is None:
72
- next_chunk_range = next(chunk_range_iter, None)
73
- if next_chunk_range is None:
74
- break
75
- if not next_chunk_range.match(index):
76
- break
77
- matched_chunk_ranges.append((next_chunk_range, []))
78
- next_chunk_range = None
79
-
80
- if matched_chunk_ranges:
81
- next_matched_chunks: list[tuple[ChunkRange, list[str]]] = []
82
- for chunk_range, texts in matched_chunk_ranges:
83
- if chunk_range.match(index):
84
- texts.append(fragment.text)
85
- next_matched_chunks.append((chunk_range, texts))
86
- else:
87
- yield chunk_range, texts
88
- matched_chunk_ranges = next_matched_chunks
89
-
90
- yield from matched_chunk_ranges
91
-
92
- def _hash_texts_list(target_language: Language, texts_iterable: Iterable[list[str]]) -> bytes:
93
- m = sha512()
94
- m.update(target_language.value.encode("utf-8"))
95
- for texts in texts_iterable:
96
- for text in texts:
97
- m.update(b"\x00")
98
- m.update(text.encode("utf-8"))
99
- return m.digest()
100
-
101
- def _crop_extra_texts(llm: LLM, texts: list[str], crop_left: bool, remain_tokens_count: int):
102
- tokens_list: list[list[int]] = [llm.encode_tokens(text) for text in texts]
103
- remain_texts: list[str] = []
104
-
105
- for tokens in (reversed(tokens_list) if crop_left else tokens_list):
106
- tokens_count = len(tokens)
107
- if remain_tokens_count >= tokens_count:
108
- remain_tokens_count -= tokens_count
109
- remain_texts.append(llm.decode_tokens(tokens))
110
- if remain_tokens_count == 0:
111
- break
112
- else:
113
- remain_tokens = tokens[-remain_tokens_count:] if crop_left else tokens[:remain_tokens_count]
114
- remain_texts.append(llm.decode_tokens(remain_tokens))
115
-
116
- if crop_left:
117
- remain_texts.reverse()
118
- return remain_texts
@@ -1,78 +0,0 @@
1
- from typing import Iterator, Generator
2
- from resource_segmentation import split, Resource, Segment
3
-
4
- from ..llm import LLM
5
- from .types import Fragment, Incision
6
- from .chunk import ChunkRange
7
-
8
-
9
- def split_into_chunks(llm: LLM, fragments_iter: Iterator[Fragment], max_chunk_tokens_count: int):
10
- for index, group in enumerate(split(
11
- resources=_gen_resources(llm, fragments_iter),
12
- max_segment_count=max_chunk_tokens_count,
13
- gap_rate=0.15,
14
- tail_rate=0.5,
15
- border_incision=Incision.IMPOSSIBLE,
16
- )):
17
- head_index: int
18
- tail_index: int
19
- fragments_count: int
20
- body_index, body_end_index, body_tokens_count = _group_part(group.body)
21
-
22
- if group.head:
23
- head_index, head_end_index, _ = _group_part(group.head)
24
- assert head_end_index + 1 == body_index, "Head must be continuous with body"
25
- else:
26
- head_index = body_index
27
-
28
- if group.tail:
29
- tail_index, tail_end_index, _ = _group_part(group.tail)
30
- fragments_count = tail_end_index - head_index + 1
31
- assert body_end_index + 1 == tail_index, "Body must be continuous with tail"
32
- else:
33
- tail_index = body_end_index + 1
34
- fragments_count = tail_index - head_index
35
-
36
- yield ChunkRange(
37
- index=index,
38
- head_remain_tokens=group.head_remain_count,
39
- tail_remain_tokens=group.tail_remain_count,
40
- head_index=head_index,
41
- body_index=body_index,
42
- tail_index=tail_index,
43
- fragments_count=fragments_count,
44
- tokens_count=body_tokens_count,
45
- )
46
-
47
- def _gen_resources(llm: LLM, fragments_iter: Iterator[Fragment]) -> Generator[Resource[int], None, None]:
48
- for index, fragment in enumerate(fragments_iter):
49
- yield Resource(
50
- count=llm.count_tokens_count(fragment.text),
51
- start_incision=fragment.start_incision,
52
- end_incision=fragment.end_incision,
53
- payload=index,
54
- )
55
-
56
- def _group_part(target: list[Resource[int] | Segment[int]]) -> tuple[int, int, int]:
57
- start_index: int | None = None
58
- previous_index: int = 0
59
- tokens_count: int = 0
60
- for resource in _iter_group_part(target):
61
- index = resource.payload
62
- if start_index is None:
63
- start_index = index
64
- else:
65
- assert index == previous_index + 1, "Resources in group part must be continuous"
66
- previous_index = index
67
- tokens_count += resource.count
68
-
69
- assert start_index is not None, "Group part must contain at least one resource"
70
- return start_index, previous_index, tokens_count
71
-
72
- def _iter_group_part(target: list[Resource[int] | Segment[int]]) -> Generator[Resource[int], None, None]:
73
- for item in target:
74
- if isinstance(item, Resource):
75
- yield item
76
- elif isinstance(item, Segment):
77
- for resource in item.resources:
78
- yield resource
@@ -1,36 +0,0 @@
1
- from shutil import rmtree
2
- from pathlib import Path
3
- from typing import Iterable
4
-
5
-
6
- class Store:
7
- def __init__(self, directory: Path):
8
- self._directory = directory
9
-
10
- def get(self, chunk_hash: bytes) -> list[str] | None:
11
- file_path = self._file_path(chunk_hash)
12
- if not file_path.exists() or not file_path.is_file():
13
- return None
14
- with file_path.open("r", encoding="utf-8") as file:
15
- return file.read().split("\n")
16
-
17
- def put(self, chunk_hash: bytes, lines: Iterable[str]):
18
- file_path = self._file_path(chunk_hash)
19
- if file_path.exists():
20
- if file_path.is_file():
21
- file_path.unlink()
22
- else:
23
- rmtree(file_path)
24
-
25
- file_path.parent.mkdir(parents=True, exist_ok=True)
26
- with file_path.open("w", encoding="utf-8") as file:
27
- is_first_line = True
28
- for line in lines:
29
- if is_first_line:
30
- is_first_line = False
31
- else:
32
- file.write("\n")
33
- file.write(line)
34
-
35
- def _file_path(self, chunk_hash: bytes) -> Path:
36
- return self._directory / f"{chunk_hash.hex()}.chunk"