epub-translator 0.0.7__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. epub_translator/__init__.py +4 -2
  2. epub_translator/data/fill.jinja +66 -0
  3. epub_translator/data/mmltex/README.md +67 -0
  4. epub_translator/data/mmltex/cmarkup.xsl +1106 -0
  5. epub_translator/data/mmltex/entities.xsl +459 -0
  6. epub_translator/data/mmltex/glayout.xsl +222 -0
  7. epub_translator/data/mmltex/mmltex.xsl +36 -0
  8. epub_translator/data/mmltex/scripts.xsl +375 -0
  9. epub_translator/data/mmltex/tables.xsl +130 -0
  10. epub_translator/data/mmltex/tokens.xsl +328 -0
  11. epub_translator/data/translate.jinja +15 -12
  12. epub_translator/epub/__init__.py +4 -2
  13. epub_translator/epub/common.py +43 -0
  14. epub_translator/epub/math.py +193 -0
  15. epub_translator/epub/placeholder.py +53 -0
  16. epub_translator/epub/spines.py +42 -0
  17. epub_translator/epub/toc.py +505 -0
  18. epub_translator/epub/zip.py +67 -0
  19. epub_translator/iter_sync.py +24 -0
  20. epub_translator/language.py +23 -0
  21. epub_translator/llm/__init__.py +2 -1
  22. epub_translator/llm/core.py +233 -0
  23. epub_translator/llm/error.py +38 -35
  24. epub_translator/llm/executor.py +159 -136
  25. epub_translator/llm/increasable.py +28 -28
  26. epub_translator/llm/types.py +17 -0
  27. epub_translator/serial/__init__.py +2 -0
  28. epub_translator/serial/chunk.py +52 -0
  29. epub_translator/serial/segment.py +17 -0
  30. epub_translator/serial/splitter.py +50 -0
  31. epub_translator/template.py +35 -33
  32. epub_translator/translator.py +208 -178
  33. epub_translator/utils.py +7 -0
  34. epub_translator/xml/__init__.py +4 -3
  35. epub_translator/xml/deduplication.py +38 -0
  36. epub_translator/xml/firendly/__init__.py +2 -0
  37. epub_translator/xml/firendly/decoder.py +75 -0
  38. epub_translator/xml/firendly/encoder.py +84 -0
  39. epub_translator/xml/firendly/parser.py +177 -0
  40. epub_translator/xml/firendly/tag.py +118 -0
  41. epub_translator/xml/firendly/transform.py +36 -0
  42. epub_translator/xml/xml.py +52 -0
  43. epub_translator/xml/xml_like.py +231 -0
  44. epub_translator/xml_translator/__init__.py +3 -0
  45. epub_translator/xml_translator/const.py +2 -0
  46. epub_translator/xml_translator/fill.py +128 -0
  47. epub_translator/xml_translator/format.py +282 -0
  48. epub_translator/xml_translator/fragmented.py +125 -0
  49. epub_translator/xml_translator/group.py +183 -0
  50. epub_translator/xml_translator/progressive_locking.py +256 -0
  51. epub_translator/xml_translator/submitter.py +102 -0
  52. epub_translator/xml_translator/text_segment.py +263 -0
  53. epub_translator/xml_translator/translator.py +179 -0
  54. epub_translator/xml_translator/utils.py +29 -0
  55. epub_translator-0.1.1.dist-info/METADATA +283 -0
  56. epub_translator-0.1.1.dist-info/RECORD +58 -0
  57. epub_translator/data/format.jinja +0 -33
  58. epub_translator/epub/content_parser.py +0 -162
  59. epub_translator/epub/html/__init__.py +0 -1
  60. epub_translator/epub/html/dom_operator.py +0 -68
  61. epub_translator/epub/html/empty_tags.py +0 -23
  62. epub_translator/epub/html/file.py +0 -80
  63. epub_translator/epub/html/texts_searcher.py +0 -46
  64. epub_translator/llm/node.py +0 -201
  65. epub_translator/translation/__init__.py +0 -2
  66. epub_translator/translation/chunk.py +0 -118
  67. epub_translator/translation/splitter.py +0 -78
  68. epub_translator/translation/store.py +0 -36
  69. epub_translator/translation/translation.py +0 -231
  70. epub_translator/translation/types.py +0 -45
  71. epub_translator/translation/utils.py +0 -11
  72. epub_translator/xml/decoder.py +0 -71
  73. epub_translator/xml/encoder.py +0 -95
  74. epub_translator/xml/parser.py +0 -172
  75. epub_translator/xml/tag.py +0 -93
  76. epub_translator/xml/transform.py +0 -34
  77. epub_translator/xml/utils.py +0 -12
  78. epub_translator/zip_context.py +0 -74
  79. epub_translator-0.0.7.dist-info/METADATA +0 -170
  80. epub_translator-0.0.7.dist-info/RECORD +0 -36
  81. {epub_translator-0.0.7.dist-info → epub_translator-0.1.1.dist-info}/LICENSE +0 -0
  82. {epub_translator-0.0.7.dist-info → epub_translator-0.1.1.dist-info}/WHEEL +0 -0
@@ -0,0 +1,505 @@
1
+ from dataclasses import dataclass, field
2
+ from pathlib import Path
3
+ from xml.etree import ElementTree as ET
4
+ from xml.etree.ElementTree import Element
5
+
6
+ from ..xml.xml import plain_text
7
+ from .common import extract_namespace, find_opf_path, strip_namespace
8
+ from .zip import Zip
9
+
10
+
11
+ @dataclass
12
+ class Toc:
13
+ """
14
+ EPUB 2.0 对应关系:
15
+ - title <-> <navLabel><text>
16
+ - href <-> <content src> (不包含 # 后的部分)
17
+ - fragment <-> <content src> (# 后的部分)
18
+ - children <-> 嵌套的 <navPoint>
19
+ - id <-> <navPoint id>
20
+
21
+ EPUB 3.0 对应关系:
22
+ - title <-> <a> 标签的文本内容
23
+ - href <-> <a href> (不包含 # 后的部分)
24
+ - fragment <-> <a href> (# 后的部分)
25
+ - children <-> 嵌套的 <ol><li>
26
+ - id <-> <li id> 或 <a id>
27
+ """
28
+
29
+ title: str
30
+ href: str | None = None
31
+ fragment: str | None = None
32
+ id: str | None = None
33
+ children: list["Toc"] = field(default_factory=list)
34
+
35
+ @property
36
+ def full_href(self) -> str | None:
37
+ if self.href is None:
38
+ return None
39
+ if self.fragment:
40
+ return f"{self.href}#{self.fragment}"
41
+ return self.href
42
+
43
+
44
+ def read_toc(zip: Zip) -> list[Toc]:
45
+ version = _detect_epub_version(zip)
46
+ toc_path = _find_toc_path(zip, version)
47
+
48
+ if toc_path is None:
49
+ return []
50
+
51
+ if version == 2:
52
+ return _read_ncx_toc(zip, toc_path)
53
+ else:
54
+ return _read_nav_toc(zip, toc_path)
55
+
56
+
57
+ def write_toc(zip: Zip, toc: list[Toc]) -> None:
58
+ version = _detect_epub_version(zip)
59
+ toc_path = _find_toc_path(zip, version)
60
+
61
+ if toc_path is None:
62
+ raise ValueError("Cannot find TOC file in EPUB")
63
+
64
+ if version == 2:
65
+ _write_ncx_toc(zip, toc_path, toc)
66
+ else:
67
+ _write_nav_toc(zip, toc_path, toc)
68
+
69
+
70
+ def _detect_epub_version(zip: Zip) -> int:
71
+ opf_path = find_opf_path(zip)
72
+ with zip.read(opf_path) as f:
73
+ content = f.read()
74
+ root = ET.fromstring(content)
75
+
76
+ # 检查 package 元素的 version 属性
77
+ version_str = root.get("version", "2.0")
78
+
79
+ if version_str.startswith("3"):
80
+ return 3
81
+ else:
82
+ return 2
83
+
84
+
85
+ def _find_toc_path(zip: Zip, version: int) -> Path | None:
86
+ opf_path = find_opf_path(zip)
87
+ opf_dir = opf_path.parent
88
+
89
+ with zip.read(opf_path) as f:
90
+ content = f.read()
91
+ root = ET.fromstring(content)
92
+ strip_namespace(root) # 移除命名空间前缀以简化 XPath
93
+
94
+ manifest = root.find(".//manifest")
95
+ if manifest is None:
96
+ return None
97
+
98
+ if version == 2:
99
+ # EPUB 2: 查找 NCX 文件 (media-type="application/x-dtbncx+xml")
100
+ for item in manifest.findall("item"):
101
+ media_type = item.get("media-type")
102
+ if media_type == "application/x-dtbncx+xml":
103
+ href = item.get("href")
104
+ if href:
105
+ return opf_dir / href
106
+ else:
107
+ # EPUB 3: 查找 nav 文件 (properties="nav")
108
+ for item in manifest.findall("item"):
109
+ properties = item.get("properties", "")
110
+ if "nav" in properties.split():
111
+ href = item.get("href")
112
+ if href:
113
+ return opf_dir / href
114
+
115
+ return None
116
+
117
+
118
+ def _read_ncx_toc(zip: Zip, ncx_path: Path) -> list[Toc]:
119
+ with zip.read(ncx_path) as f:
120
+ content = f.read()
121
+ root = ET.fromstring(content)
122
+ strip_namespace(root) # 移除命名空间前缀以简化 XPath
123
+
124
+ nav_map = root.find(".//navMap")
125
+ if nav_map is None:
126
+ return []
127
+
128
+ result = []
129
+ for nav_point in nav_map.findall("navPoint"):
130
+ toc_item = _parse_nav_point(nav_point)
131
+ if toc_item:
132
+ result.append(toc_item)
133
+
134
+ return result
135
+
136
+
137
+ def _parse_nav_point(nav_point: Element) -> Toc | None:
138
+ nav_id = nav_point.get("id")
139
+ nav_label = nav_point.find("navLabel")
140
+ if nav_label is None:
141
+ return None
142
+
143
+ text_elem = nav_label.find("text")
144
+ if text_elem is None:
145
+ return None
146
+
147
+ title = plain_text(text_elem).strip()
148
+ if not title:
149
+ return None
150
+
151
+ content_elem = nav_point.find("content")
152
+ href = None
153
+ fragment = None
154
+
155
+ if content_elem is not None:
156
+ src = content_elem.get("src")
157
+ if src:
158
+ href, fragment = _split_href(src)
159
+
160
+ children = []
161
+ for child_nav_point in nav_point.findall("navPoint"):
162
+ child_toc = _parse_nav_point(child_nav_point)
163
+ if child_toc:
164
+ children.append(child_toc)
165
+
166
+ return Toc(
167
+ title=title,
168
+ href=href,
169
+ fragment=fragment,
170
+ id=nav_id,
171
+ children=children,
172
+ )
173
+
174
+
175
+ def _write_ncx_toc(zip: Zip, ncx_path: Path, toc_list: list[Toc]) -> None:
176
+ with zip.read(ncx_path) as f:
177
+ content = f.read()
178
+ root = ET.fromstring(content)
179
+ ns = extract_namespace(root.tag)
180
+ nav_map = root.find(f".//{{{ns}}}navMap" if ns else ".//navMap")
181
+ if nav_map is None:
182
+ raise ValueError("Cannot find navMap in NCX file")
183
+ _update_nav_points(nav_map, toc_list, ns)
184
+ tree = ET.ElementTree(root)
185
+ with zip.replace(ncx_path) as out:
186
+ tree.write(out, encoding="utf-8", xml_declaration=True)
187
+
188
+
189
+ def _update_nav_points(parent: Element, toc_list: list[Toc], ns: str | None, start_play_order: int = 1) -> int:
190
+ tag_prefix = f"{{{ns}}}" if ns else ""
191
+ nav_point_tag = f"{tag_prefix}navPoint"
192
+ existing_nav_points = [elem for elem in parent if elem.tag == nav_point_tag]
193
+ matched_pairs = _match_toc_with_elements(toc_list, existing_nav_points)
194
+ for nav_point in existing_nav_points:
195
+ parent.remove(nav_point)
196
+
197
+ play_order = start_play_order
198
+ for toc, existing_elem in matched_pairs:
199
+ if existing_elem is not None:
200
+ nav_point = existing_elem
201
+ _update_nav_point_content(nav_point, toc, ns, play_order)
202
+ else:
203
+ nav_point = _create_nav_point(toc, ns, play_order)
204
+
205
+ parent.append(nav_point)
206
+ play_order += 1
207
+ play_order = _update_nav_points(nav_point, toc.children, ns, play_order)
208
+
209
+ return play_order
210
+
211
+
212
+ def _update_nav_point_content(nav_point: Element, toc: Toc, ns: str | None, play_order: int) -> None:
213
+ tag_prefix = f"{{{ns}}}" if ns else ""
214
+ if toc.id:
215
+ nav_point.set("id", toc.id)
216
+
217
+ nav_point.set("playOrder", str(play_order))
218
+
219
+ nav_label = nav_point.find(f"{tag_prefix}navLabel")
220
+ if nav_label is not None:
221
+ text_elem = nav_label.find(f"{tag_prefix}text")
222
+ if text_elem is not None:
223
+ text_elem.text = toc.title
224
+
225
+ content_elem = nav_point.find(f"{tag_prefix}content")
226
+ if content_elem is not None and toc.href is not None:
227
+ full_href = toc.full_href
228
+ if full_href:
229
+ content_elem.set("src", full_href)
230
+
231
+
232
+ def _create_nav_point(toc: Toc, ns: str | None, play_order: int) -> Element:
233
+ tag_prefix = f"{{{ns}}}" if ns else ""
234
+
235
+ nav_point = Element(f"{tag_prefix}navPoint")
236
+ if toc.id:
237
+ nav_point.set("id", toc.id)
238
+ else:
239
+ nav_point.set("id", f"navPoint-{play_order}")
240
+ nav_point.set("playOrder", str(play_order))
241
+
242
+ nav_label = Element(f"{tag_prefix}navLabel")
243
+ text_elem = Element(f"{tag_prefix}text")
244
+ text_elem.text = toc.title
245
+ nav_label.append(text_elem)
246
+ nav_point.append(nav_label)
247
+
248
+ if toc.href is not None:
249
+ content_elem = Element(f"{tag_prefix}content")
250
+ full_href = toc.full_href
251
+ if full_href:
252
+ content_elem.set("src", full_href)
253
+ nav_point.append(content_elem)
254
+
255
+ return nav_point
256
+
257
+
258
+ def _read_nav_toc(zip: Zip, nav_path: Path) -> list[Toc]:
259
+ with zip.read(nav_path) as f:
260
+ content = f.read()
261
+ root = ET.fromstring(content)
262
+
263
+ strip_namespace(root)
264
+
265
+ nav_elem = None
266
+ for nav in root.findall(".//nav"):
267
+ epub_type = nav.get("{http://www.idpf.org/2007/ops}type") or nav.get("type")
268
+ if epub_type == "toc":
269
+ nav_elem = nav
270
+ break
271
+
272
+ if nav_elem is None:
273
+ return []
274
+
275
+ ol = nav_elem.find(".//ol")
276
+ if ol is None:
277
+ return []
278
+
279
+ result = []
280
+ for li in ol.findall("li"):
281
+ toc_item = _parse_nav_li(li)
282
+ if toc_item:
283
+ result.append(toc_item)
284
+
285
+ return result
286
+
287
+
288
+ def _parse_nav_li(li: Element) -> Toc | None:
289
+ li_id = li.get("id")
290
+ a = li.find("a")
291
+ if a is None:
292
+ span = li.find("span")
293
+ if span is not None:
294
+ title = plain_text(span).strip()
295
+ if not title:
296
+ return None
297
+ href = None
298
+ fragment = None
299
+ a_id = None
300
+ else:
301
+ return None
302
+ else:
303
+ title = plain_text(a).strip()
304
+ if not title:
305
+ return None
306
+
307
+ a_id = a.get("id")
308
+ href_attr = a.get("href")
309
+
310
+ if href_attr:
311
+ href, fragment = _split_href(href_attr)
312
+ else:
313
+ href = None
314
+ fragment = None
315
+
316
+ final_id = li_id if li_id else (a_id if "a_id" in locals() else None)
317
+ children = []
318
+ child_ol = li.find("ol")
319
+ if child_ol is not None:
320
+ for child_li in child_ol.findall("li"):
321
+ child_toc = _parse_nav_li(child_li)
322
+ if child_toc:
323
+ children.append(child_toc)
324
+
325
+ return Toc(
326
+ title=title,
327
+ href=href,
328
+ fragment=fragment,
329
+ id=final_id,
330
+ children=children,
331
+ )
332
+
333
+
334
+ def _write_nav_toc(zip: Zip, nav_path: Path, toc_list: list[Toc]) -> None:
335
+ with zip.read(nav_path) as f:
336
+ content = f.read()
337
+ root = ET.fromstring(content)
338
+ ns = extract_namespace(root.tag)
339
+ nav_elem = None
340
+ for nav in root.findall(f".//{{{ns}}}nav" if ns else ".//nav"):
341
+ epub_type = nav.get("{http://www.idpf.org/2007/ops}type") or nav.get("type") or nav.get(f"{{{ns}}}type")
342
+ if epub_type == "toc":
343
+ nav_elem = nav
344
+ break
345
+
346
+ if nav_elem is None:
347
+ raise ValueError("Cannot find nav element with type='toc'")
348
+
349
+ ol = nav_elem.find(f".//{{{ns}}}ol" if ns else ".//ol")
350
+ if ol is None:
351
+ raise ValueError("Cannot find ol in nav element")
352
+
353
+ _update_nav_lis(ol, toc_list, ns)
354
+
355
+ tree = ET.ElementTree(root)
356
+ with zip.replace(nav_path) as out:
357
+ tree.write(out, encoding="utf-8", xml_declaration=True)
358
+
359
+
360
+ def _update_nav_lis(ol: Element, toc_list: list[Toc], ns: str | None) -> None:
361
+ tag_prefix = f"{{{ns}}}" if ns else ""
362
+ li_tag = f"{tag_prefix}li"
363
+ existing_lis = [elem for elem in ol if elem.tag == li_tag]
364
+ matched_pairs = _match_toc_with_elements(toc_list, existing_lis)
365
+
366
+ for li in existing_lis:
367
+ ol.remove(li)
368
+
369
+ for toc, existing_elem in matched_pairs:
370
+ if existing_elem is not None:
371
+ li = existing_elem
372
+ _update_nav_li_content(li, toc, ns)
373
+ else:
374
+ li = _create_nav_li(toc, ns)
375
+
376
+ ol.append(li)
377
+
378
+ if toc.children:
379
+ child_ol = li.find(f"{tag_prefix}ol")
380
+ if child_ol is None:
381
+ child_ol = Element(f"{tag_prefix}ol")
382
+ li.append(child_ol)
383
+ _update_nav_lis(child_ol, toc.children, ns)
384
+
385
+
386
+ def _update_nav_li_content(li: Element, toc: Toc, ns: str | None) -> None:
387
+ tag_prefix = f"{{{ns}}}" if ns else ""
388
+ if toc.id:
389
+ li.set("id", toc.id)
390
+
391
+ a = li.find(f"{tag_prefix}a")
392
+ span = li.find(f"{tag_prefix}span")
393
+
394
+ if toc.href is not None:
395
+ if a is not None:
396
+ a.text = toc.title
397
+ full_href = toc.full_href
398
+ if full_href:
399
+ a.set("href", full_href)
400
+ elif span is not None:
401
+ li.remove(span)
402
+ a = Element(f"{tag_prefix}a")
403
+ a.text = toc.title
404
+ full_href = toc.full_href
405
+ if full_href:
406
+ a.set("href", full_href)
407
+ li.insert(0, a)
408
+ else:
409
+ if span is not None:
410
+ span.text = toc.title
411
+ elif a is not None:
412
+ li.remove(a)
413
+ span = Element(f"{tag_prefix}span")
414
+ span.text = toc.title
415
+ li.insert(0, span)
416
+
417
+
418
+ def _create_nav_li(toc: Toc, ns: str | None) -> Element:
419
+ tag_prefix = f"{{{ns}}}" if ns else ""
420
+ li = Element(f"{tag_prefix}li")
421
+
422
+ if toc.id:
423
+ li.set("id", toc.id)
424
+
425
+ if toc.href is not None:
426
+ a = Element(f"{tag_prefix}a")
427
+ a.text = toc.title
428
+ full_href = toc.full_href
429
+ if full_href:
430
+ a.set("href", full_href)
431
+ li.append(a)
432
+ else:
433
+ span = Element(f"{tag_prefix}span")
434
+ span.text = toc.title
435
+ li.append(span)
436
+
437
+ return li
438
+
439
+
440
+ def _split_href(href: str) -> tuple[str | None, str | None]:
441
+ if "#" in href:
442
+ parts = href.split("#", 1)
443
+ return parts[0] if parts[0] else None, parts[1] if parts[1] else None
444
+ else:
445
+ return href, None
446
+
447
+
448
+ def _match_toc_with_elements(toc_list: list[Toc], elements: list[Element]) -> list[tuple[Toc, Element | None]]:
449
+ """
450
+ 使用混合策略匹配 Toc 对象和 XML 元素
451
+
452
+ 策略优先级:
453
+ 1. 通过 id 匹配
454
+ 2. 通过 href 匹配
455
+ 3. 通过位置匹配
456
+ """
457
+ result = []
458
+ used_elements = set()
459
+
460
+ for toc in toc_list:
461
+ matched = None
462
+ if toc.id:
463
+ for i, elem in enumerate(elements):
464
+ if i in used_elements:
465
+ continue
466
+ elem_id = elem.get("id")
467
+ if elem_id == toc.id:
468
+ matched = elem
469
+ used_elements.add(i)
470
+ break
471
+ result.append((toc, matched))
472
+
473
+ for i, (toc, matched) in enumerate(result):
474
+ if matched is None and toc.href:
475
+ for j, elem in enumerate(elements):
476
+ if j in used_elements:
477
+ continue
478
+ elem_href = _extract_href_from_element(elem)
479
+ if elem_href and elem_href == toc.full_href:
480
+ result[i] = (toc, elem)
481
+ used_elements.add(j)
482
+ break
483
+
484
+ unmatched_indices = [i for i, (_, matched) in enumerate(result) if matched is None]
485
+ available_elements = [elem for j, elem in enumerate(elements) if j not in used_elements]
486
+
487
+ for i, elem in zip(unmatched_indices, available_elements):
488
+ toc, _ = result[i]
489
+ result[i] = (toc, elem)
490
+
491
+ return result
492
+
493
+
494
+ def _extract_href_from_element(elem: Element) -> str | None:
495
+ # NCX 格式:查找 content/@src
496
+ content = elem.find(".//content")
497
+ if content is not None:
498
+ return content.get("src")
499
+
500
+ # nav 格式:查找 a/@href
501
+ a = elem.find(".//a")
502
+ if a is not None:
503
+ return a.get("href")
504
+
505
+ return None
@@ -0,0 +1,67 @@
1
+ import zipfile
2
+ from pathlib import Path
3
+ from typing import IO
4
+
5
+ _BUFFER_SIZE = 8192 # 8KB
6
+
7
+
8
+ class Zip:
9
+ def __init__(self, source_path: Path, target_path: Path) -> None:
10
+ source_zip: zipfile.ZipFile | None = None
11
+ target_zip: zipfile.ZipFile | None = None
12
+ try:
13
+ source_zip = zipfile.ZipFile(source_path, "r")
14
+ target_zip = zipfile.ZipFile(target_path, "w", zipfile.ZIP_DEFLATED)
15
+ except Exception:
16
+ if source_zip:
17
+ source_zip.close()
18
+ if target_zip:
19
+ target_zip.close()
20
+ raise
21
+ self._source_zip: zipfile.ZipFile = source_zip
22
+ self._target_zip: zipfile.ZipFile = target_zip
23
+ self._processed_files: set[Path] = set()
24
+
25
+ def __enter__(self):
26
+ return self
27
+
28
+ def __exit__(self, _exc_type, _exc_val, _exc_tb):
29
+ try:
30
+ if _exc_type is None:
31
+ all_files = self._source_zip.namelist()
32
+ for file_path in all_files:
33
+ if file_path.endswith("/"):
34
+ continue
35
+ if Path(file_path) not in self._processed_files:
36
+ self.migrate(Path(file_path))
37
+ finally:
38
+ self._target_zip.close()
39
+ self._source_zip.close()
40
+
41
+ return False
42
+
43
+ def list_files(self, prefix_path: Path | None = None) -> list[Path]:
44
+ all_files = self._source_zip.namelist()
45
+ if prefix_path is None:
46
+ return [Path(f) for f in all_files]
47
+ prefix = str(prefix_path)
48
+ if not prefix.endswith("/"):
49
+ prefix += "/"
50
+ return [Path(f) for f in all_files if f.startswith(prefix)]
51
+
52
+ def migrate(self, path: Path):
53
+ with self.read(path) as source_file:
54
+ with self._target_zip.open(str(path), "w") as target_file:
55
+ while True:
56
+ chunk = source_file.read(_BUFFER_SIZE)
57
+ if not chunk:
58
+ break
59
+ target_file.write(chunk)
60
+ self._processed_files.add(path)
61
+
62
+ def read(self, path: Path) -> IO[bytes]:
63
+ return self._source_zip.open(str(path), "r")
64
+
65
+ def replace(self, path: Path) -> IO[bytes]:
66
+ self._processed_files.add(path)
67
+ return self._target_zip.open(str(path), "w")
@@ -0,0 +1,24 @@
1
+ from collections.abc import Generator, Iterable
2
+ from typing import Generic, TypeVar
3
+
4
+ T = TypeVar("T")
5
+
6
+
7
+ class IterSync(Generic[T]):
8
+ def __init__(self) -> None:
9
+ super().__init__()
10
+ self._queue: list[T] = []
11
+
12
+ @property
13
+ def tail(self) -> T | None:
14
+ if not self._queue:
15
+ return None
16
+ return self._queue[-1]
17
+
18
+ def take(self) -> T:
19
+ return self._queue.pop()
20
+
21
+ def iter(self, elements: Iterable[T]) -> Generator[T, None, None]:
22
+ for element in elements:
23
+ self._queue.insert(0, element)
24
+ yield element
@@ -0,0 +1,23 @@
1
+ CHINESE = "Simplified Chinese"
2
+ ENGLISH = "English"
3
+ JAPANESE = "Japanese"
4
+ KOREAN = "Korean"
5
+ SPANISH = "Spanish"
6
+ FRENCH = "French"
7
+ GERMAN = "German"
8
+ PORTUGUESE = "Portuguese"
9
+ TRADITIONAL_CHINESE = "Traditional Chinese"
10
+ RUSSIAN = "Russian"
11
+ ITALIAN = "Italian"
12
+ ARABIC = "Arabic"
13
+ HINDI = "Hindi"
14
+ DUTCH = "Dutch"
15
+ POLISH = "Polish"
16
+ TURKISH = "Turkish"
17
+ VIETNAMESE = "Vietnamese"
18
+ THAI = "Thai"
19
+ INDONESIAN = "Indonesian"
20
+ SWEDISH = "Swedish"
21
+ DANISH = "Danish"
22
+ NORWEGIAN = "Norwegian"
23
+ FINNISH = "Finnish"
@@ -1 +1,2 @@
1
- from .node import LLM
1
+ from .core import LLM
2
+ from .types import *