epub-translator 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epub_translator/__init__.py +1 -2
- epub_translator/data/translate.jinja +3 -0
- epub_translator/epub/__init__.py +1 -1
- epub_translator/llm/context.py +10 -1
- epub_translator/llm/core.py +30 -3
- epub_translator/segment/__init__.py +1 -0
- epub_translator/segment/inline_segment.py +11 -1
- epub_translator/segment/text_segment.py +5 -10
- epub_translator/segment/utils.py +0 -16
- epub_translator/translation/__init__.py +2 -0
- epub_translator/{epub_transcode.py → translation/epub_transcode.py} +2 -2
- epub_translator/{punctuation.py → translation/punctuation.py} +1 -1
- epub_translator/{translator.py → translation/translator.py} +8 -6
- epub_translator/{xml_interrupter.py → translation/xml_interrupter.py} +52 -28
- epub_translator/xml/__init__.py +1 -1
- epub_translator/xml/inline.py +48 -2
- epub_translator/xml_translator/concurrency.py +52 -0
- epub_translator/xml_translator/score.py +164 -0
- epub_translator/xml_translator/stream_mapper.py +145 -114
- epub_translator/xml_translator/submitter.py +5 -5
- epub_translator/xml_translator/translator.py +12 -18
- {epub_translator-0.1.5.dist-info → epub_translator-0.1.7.dist-info}/METADATA +37 -9
- epub_translator-0.1.7.dist-info/RECORD +63 -0
- epub_translator/data/mmltex/README.md +0 -67
- epub_translator/data/mmltex/cmarkup.xsl +0 -1106
- epub_translator/data/mmltex/entities.xsl +0 -459
- epub_translator/data/mmltex/glayout.xsl +0 -222
- epub_translator/data/mmltex/mmltex.xsl +0 -36
- epub_translator/data/mmltex/scripts.xsl +0 -375
- epub_translator/data/mmltex/tables.xsl +0 -130
- epub_translator/data/mmltex/tokens.xsl +0 -328
- epub_translator-0.1.5.dist-info/RECORD +0 -68
- /epub_translator/{language.py → translation/language.py} +0 -0
- /epub_translator/xml/{firendly → friendly}/__init__.py +0 -0
- /epub_translator/xml/{firendly → friendly}/decoder.py +0 -0
- /epub_translator/xml/{firendly → friendly}/encoder.py +0 -0
- /epub_translator/xml/{firendly → friendly}/parser.py +0 -0
- /epub_translator/xml/{firendly → friendly}/tag.py +0 -0
- /epub_translator/xml/{firendly → friendly}/transform.py +0 -0
- {epub_translator-0.1.5.dist-info → epub_translator-0.1.7.dist-info}/LICENSE +0 -0
- {epub_translator-0.1.5.dist-info → epub_translator-0.1.7.dist-info}/WHEEL +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: epub-translator
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.7
|
|
4
4
|
Summary: Translate the epub book using LLM. The translated book will retain the original text and list the translated text side by side with the original text.
|
|
5
5
|
License: MIT
|
|
6
6
|
Keywords: epub,llm,translation,translator
|
|
@@ -24,6 +24,7 @@ Classifier: Topic :: Software Development :: Localization
|
|
|
24
24
|
Classifier: Topic :: Text Processing :: Markup
|
|
25
25
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
26
26
|
Requires-Dist: jinja2 (>=3.1.6,<4.0.0)
|
|
27
|
+
Requires-Dist: mathml2latex (>=0.2.12,<0.3.0)
|
|
27
28
|
Requires-Dist: openai (>=2.14.0,<3.0.0)
|
|
28
29
|
Requires-Dist: resource-segmentation (>=0.0.7,<0.1.0)
|
|
29
30
|
Requires-Dist: tiktoken (>=0.12.0,<1.0.0)
|
|
@@ -59,6 +60,13 @@ Translate EPUB books using Large Language Models while preserving the original t
|
|
|
59
60
|
- **Flexible LLM Support**: Works with any OpenAI-compatible API endpoint
|
|
60
61
|
- **Caching**: Built-in caching for progress recovery when translation fails
|
|
61
62
|
|
|
63
|
+
## Use Cases
|
|
64
|
+
|
|
65
|
+
- **Language Learning**: Read books in their original language with side-by-side translations
|
|
66
|
+
- **Academic Research**: Access foreign literature with bilingual references
|
|
67
|
+
- **Content Localization**: Prepare books for international audiences
|
|
68
|
+
- **Cross-Cultural Reading**: Enjoy literature while understanding cultural nuances
|
|
69
|
+
|
|
62
70
|
## Installation
|
|
63
71
|
|
|
64
72
|
```bash
|
|
@@ -156,7 +164,8 @@ translate(
|
|
|
156
164
|
submit: SubmitKind, # How to insert translations (REPLACE, APPEND_TEXT, or APPEND_BLOCK)
|
|
157
165
|
user_prompt: str | None = None, # Custom translation instructions
|
|
158
166
|
max_retries: int = 5, # Maximum retries for failed translations
|
|
159
|
-
max_group_tokens: int =
|
|
167
|
+
max_group_tokens: int = 2600, # Maximum tokens per translation group
|
|
168
|
+
concurrency: int = 1, # Number of concurrent translation tasks (default: 1)
|
|
160
169
|
llm: LLM | None = None, # Single LLM instance for both translation and filling
|
|
161
170
|
translation_llm: LLM | None = None, # LLM instance for translation (overrides llm)
|
|
162
171
|
fill_llm: LLM | None = None, # LLM instance for XML filling (overrides llm)
|
|
@@ -356,13 +365,6 @@ llm = LLM(
|
|
|
356
365
|
)
|
|
357
366
|
```
|
|
358
367
|
|
|
359
|
-
## Use Cases
|
|
360
|
-
|
|
361
|
-
- **Language Learning**: Read books in their original language with side-by-side translations
|
|
362
|
-
- **Academic Research**: Access foreign literature with bilingual references
|
|
363
|
-
- **Content Localization**: Prepare books for international audiences
|
|
364
|
-
- **Cross-Cultural Reading**: Enjoy literature while understanding cultural nuances
|
|
365
|
-
|
|
366
368
|
## Advanced Features
|
|
367
369
|
|
|
368
370
|
### Custom Translation Prompts
|
|
@@ -394,6 +396,32 @@ llm = LLM(
|
|
|
394
396
|
)
|
|
395
397
|
```
|
|
396
398
|
|
|
399
|
+
### Concurrent Translation
|
|
400
|
+
|
|
401
|
+
Speed up translation by processing multiple text segments concurrently. Use the `concurrency` parameter to control how many translation tasks run in parallel:
|
|
402
|
+
|
|
403
|
+
```python
|
|
404
|
+
translate(
|
|
405
|
+
source_path="source.epub",
|
|
406
|
+
target_path="translated.epub",
|
|
407
|
+
target_language="English",
|
|
408
|
+
submit=SubmitKind.APPEND_BLOCK,
|
|
409
|
+
llm=llm,
|
|
410
|
+
concurrency=4, # Process 4 segments concurrently
|
|
411
|
+
)
|
|
412
|
+
```
|
|
413
|
+
|
|
414
|
+
**Performance Tips:**
|
|
415
|
+
|
|
416
|
+
- Start with `concurrency=4` and adjust based on your API rate limits and system resources
|
|
417
|
+
- Higher concurrency values can significantly reduce translation time for large books
|
|
418
|
+
- The translation order is preserved regardless of concurrency settings
|
|
419
|
+
- Monitor your API provider's rate limits to avoid throttling
|
|
420
|
+
|
|
421
|
+
**Thread Safety:**
|
|
422
|
+
|
|
423
|
+
When using `concurrency > 1`, ensure that any custom callback functions (`on_progress`, `on_fill_failed`) are thread-safe. Built-in callbacks are thread-safe by default.
|
|
424
|
+
|
|
397
425
|
## Related Projects
|
|
398
426
|
|
|
399
427
|
### PDF Craft
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
epub_translator/__init__.py,sha256=JsiOUPpk5k7q8mXIgnRQWdVVnkJww_KDTg7jXsP7_C4,222
|
|
2
|
+
epub_translator/data/fill.jinja,sha256=zSytA8Vhp2i6YBZ09F1z9iPJq1-jUaiphoXqTNZwnvo,6964
|
|
3
|
+
epub_translator/data/translate.jinja,sha256=MVAWvgO9kybEFi0zMiZLEWwuRUL3l8PrwJdsoueQeCs,855
|
|
4
|
+
epub_translator/epub/__init__.py,sha256=aZawPakdkEquL4kRRpyCTdoSQ82l7FGqY4Uw6-ndoGA,154
|
|
5
|
+
epub_translator/epub/common.py,sha256=4-SpTe8iot9hMfyXILmlUFvYVNYqPAHL5hn1fr2wgis,1180
|
|
6
|
+
epub_translator/epub/math.py,sha256=-Q2LJQxxjgQZQUe_WlJA9tjzLqgqtw2ZmbGbHsPRp2U,5422
|
|
7
|
+
epub_translator/epub/metadata.py,sha256=DXSimY2iZNBA2juIaKtB-4CHHSYJiDK7PPhfenV4dto,3511
|
|
8
|
+
epub_translator/epub/spines.py,sha256=bP2IsobZm7zs4z10iXGc9SmgAFSIq9pJc8HE-V0aW9Y,1331
|
|
9
|
+
epub_translator/epub/toc.py,sha256=TKJfyDT4svFkXd6JCNZk2ZEYc9q-5DXnV3zY2UKo8nE,14891
|
|
10
|
+
epub_translator/epub/zip.py,sha256=-3LI8f-ksgU8xCy28NjBOKyQPE8PhPEUPqIKZE1p8dw,2364
|
|
11
|
+
epub_translator/llm/__init__.py,sha256=YcFYYnXmXyX0RUyC-PDbj5k7Woygp_XOpTI3vDiNSPM,75
|
|
12
|
+
epub_translator/llm/context.py,sha256=8-0UnrZIaNshR_imy_ed_UpOK7H1a6dOsG-boaYOX8k,4186
|
|
13
|
+
epub_translator/llm/core.py,sha256=wQwt6oG68ZN_iQOaytBiPXOC7sI62XII_A4dOHdAt_s,5979
|
|
14
|
+
epub_translator/llm/error.py,sha256=4efAIQL14DFSvAnSTUfgdAbZRqaWBqOfUGsSfvxa5zM,1503
|
|
15
|
+
epub_translator/llm/executor.py,sha256=A0IjQ-s9wBJuhAZAAydneb9zBXWnu2J9inR2Q8F-GDE,5533
|
|
16
|
+
epub_translator/llm/increasable.py,sha256=8XkKeI1hiHlpMHj8dQ4fW0BkViSx4hH8QfbQsy-5SDw,1297
|
|
17
|
+
epub_translator/llm/types.py,sha256=c-dMAIvlG4R3la3mUTWEw5xei-sIYKmQeBja7mirxcI,219
|
|
18
|
+
epub_translator/segment/__init__.py,sha256=nCHNaHASElKTbC8HEAQkI1Y12m6kEdX5uJVvVvHKtFg,595
|
|
19
|
+
epub_translator/segment/block_segment.py,sha256=psNKA_HMIcwZtoug8AtnAcV9_mQ2WXLnXqFsekHzt2g,4570
|
|
20
|
+
epub_translator/segment/common.py,sha256=gGWYQaJ0tGnWCuF1me9TOo-Q_DrZVakCu2patyFIOs0,714
|
|
21
|
+
epub_translator/segment/inline_segment.py,sha256=nrRKoJ-vblsNITJeixrCgIOkVQyUXrchMg0XYU_8pLo,14563
|
|
22
|
+
epub_translator/segment/text_segment.py,sha256=DHv8bkBpVYVUI01hOIf9Jdc7D3v7SdbLD1MeL4MUh4U,6267
|
|
23
|
+
epub_translator/segment/utils.py,sha256=_tlIA1I7rYz9_q-oQ5cPZWPmhTObCXjksQzRtX3beXY,636
|
|
24
|
+
epub_translator/serial/__init__.py,sha256=b3IMVmWcUwEqHKcGmey88b057pyz5ct946CaUZi4LB4,67
|
|
25
|
+
epub_translator/serial/chunk.py,sha256=FrTaHikVOd6bLYumnEriTaAQ_DIDLjHm16gh-wBVR9k,1495
|
|
26
|
+
epub_translator/serial/segment.py,sha256=uEz-ke1KcYrON-68FaUEzMG2CzHlMjvbC11F3ZT4yH0,446
|
|
27
|
+
epub_translator/serial/splitter.py,sha256=Nq0sxPXos8ez7QBG01sOKjnYKbeBWUBHflZGtqenVm8,1726
|
|
28
|
+
epub_translator/template.py,sha256=0CqRmj3nTtPshw0NmTr2ECqelops2MMyX94fMrE-HKs,1587
|
|
29
|
+
epub_translator/translation/__init__.py,sha256=R0c0ZngocOC-Qczs0a8JYAdAcCu2gv3FLcSrUyhwDMo,74
|
|
30
|
+
epub_translator/translation/epub_transcode.py,sha256=_pRzmQgDrlfsibalkUogVi0F0Qy_uuYfKhZk3nP5pkA,2747
|
|
31
|
+
epub_translator/translation/language.py,sha256=88osG0JNYxOkxBjg5Pm-P0Mhiyxf6GqdxoPW12HW0PE,493
|
|
32
|
+
epub_translator/translation/punctuation.py,sha256=TPCGjEmlAyN3G11VuXdHn-pvUkuWDwWqbTNzw-ij60E,813
|
|
33
|
+
epub_translator/translation/translator.py,sha256=WC4Yqx-ffhxBhqzMAujE_NQG7BsDwgn95UMNG7OkUSo,6487
|
|
34
|
+
epub_translator/translation/xml_interrupter.py,sha256=c0wwfZzGpOkfKJ5v5p-lNgYlcqD0B6i2e6cQq-Tl0lI,8426
|
|
35
|
+
epub_translator/utils.py,sha256=BfZWrYjzDNQ4cFrgvRNzd4i1CKLtPxS8Z4LBHhqEV78,914
|
|
36
|
+
epub_translator/xml/__init__.py,sha256=qluFTfZYlPmOie8nR2C5O0tZ3UbCQEoEoR-Fq-__79c,160
|
|
37
|
+
epub_translator/xml/const.py,sha256=Re2TYmpwG7-jVVgSq3R_K-uYhvAYzcXcRmLFkwCPD9Y,19
|
|
38
|
+
epub_translator/xml/deduplication.py,sha256=TaMbzeA70VvUQV0X1wcQFVbuMEPJUtj9Hq6iWlUmtAQ,1152
|
|
39
|
+
epub_translator/xml/friendly/__init__.py,sha256=I5jhnhFWoHvojLsYXH4jfR4Gi8lKFZ3yQ56ze5hEe1M,74
|
|
40
|
+
epub_translator/xml/friendly/decoder.py,sha256=xRQ5LnSunmYbba_0oT39oUr86-sLYAHYMUGmlseIu2U,2467
|
|
41
|
+
epub_translator/xml/friendly/encoder.py,sha256=evjvw6oE-oCud44IsJ-YZVHn6dtUzjNYX25ljaZP6vY,2417
|
|
42
|
+
epub_translator/xml/friendly/parser.py,sha256=QlMHA0nfPJbNyx6IwRFrYVw7okuvzDB42NXCauIFV-o,6560
|
|
43
|
+
epub_translator/xml/friendly/tag.py,sha256=ahaGoYttuAlnFxLFFgTV51KUZSpUiHho-COZX14nxN8,3308
|
|
44
|
+
epub_translator/xml/friendly/transform.py,sha256=5tG1MJmzrXIR_Z5gmRxwcoKvXBzJBVH0ELeaRsG-8w0,1201
|
|
45
|
+
epub_translator/xml/inline.py,sha256=fT_zm2NqHraJEwYXBHyqo9XjBFQUWRJO7YHB1rerkAc,2945
|
|
46
|
+
epub_translator/xml/self_closing.py,sha256=41ofGUdss9yU51IVwI4It6hKfzh8YcxIR_j-ohD19LE,5240
|
|
47
|
+
epub_translator/xml/utils.py,sha256=7tQ6L5P0_JXhxONeG64hEeeL5mKjA6NKS1H1Q9B1Cac,1062
|
|
48
|
+
epub_translator/xml/xml.py,sha256=qQ5Wk1-KVVHE4TX25zGOR7fINsGkXnoq-qyKKNl5no4,1675
|
|
49
|
+
epub_translator/xml/xml_like.py,sha256=jBK4UUgXXWRYnfYlCH1MUAjGHWBQAbUj8HsYqvTTWvA,8890
|
|
50
|
+
epub_translator/xml_translator/__init__.py,sha256=lqts1mJL_WfojDnMAQ5OM7TbT6u9X3H-X4C_avHzvXM,128
|
|
51
|
+
epub_translator/xml_translator/callbacks.py,sha256=IoZrsaivd2W76cHFupwv6auVxgEWHcBN2MHQJYcWoJ8,1324
|
|
52
|
+
epub_translator/xml_translator/common.py,sha256=hSPptgPp7j6dm47imELB5DgmEbzTEyJD6WEeELOOc50,38
|
|
53
|
+
epub_translator/xml_translator/concurrency.py,sha256=ACwoDHNX3xChL0On5yvUSFT8By7aoHoKor94k6A8nuY,1502
|
|
54
|
+
epub_translator/xml_translator/hill_climbing.py,sha256=1jvilOkTLzwljJA4Nrel8yU2XGvOXpueUJTK7RAp-XY,4272
|
|
55
|
+
epub_translator/xml_translator/score.py,sha256=TkXDmr-29p8SzuAp68u_vFDE69y1TyId9S20HT1T_xs,5311
|
|
56
|
+
epub_translator/xml_translator/stream_mapper.py,sha256=nk8iRUHAUQA2B35_y-JOCo6il8MSxXikWvyl-WA8WAA,10662
|
|
57
|
+
epub_translator/xml_translator/submitter.py,sha256=_ic2_JBPdEd2nMSu2mtQ5OzqpGv0zGrvYaicVUXAiUQ,14159
|
|
58
|
+
epub_translator/xml_translator/translator.py,sha256=7Ja1jFbmjIgHcmI9V6gg_K0t7qb6in9mhRn54a7qhZ8,9497
|
|
59
|
+
epub_translator/xml_translator/validation.py,sha256=-OKlSZuD__sjAiEpGAO93YQme4ZDSPmoPjRsAMOCEjc,16668
|
|
60
|
+
epub_translator-0.1.7.dist-info/LICENSE,sha256=5RF32sL3LtMOJIErdDKp1ZEYPGXS8WPpsiSz_jMBnGI,1066
|
|
61
|
+
epub_translator-0.1.7.dist-info/METADATA,sha256=-ySr_REjXY7qG5dZslElcQeIQ_U8r-ggkVtKytZBYMI,15684
|
|
62
|
+
epub_translator-0.1.7.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
63
|
+
epub_translator-0.1.7.dist-info/RECORD,,
|
|
@@ -1,67 +0,0 @@
|
|
|
1
|
-
# XSLT MathML Library
|
|
2
|
-
|
|
3
|
-
This directory contains XSLT stylesheets from the **XSLT MathML Library 2.1.2**, a set of XSLT stylesheets to transform MathML 2.0 to LaTeX.
|
|
4
|
-
|
|
5
|
-
**Note**: These files are included for reference purposes. Our project uses a custom Python implementation to convert MathML to LaTeX, but we keep these XSLT files as a reference for understanding MathML element mappings and conversion rules.
|
|
6
|
-
|
|
7
|
-
## File Manifest
|
|
8
|
-
|
|
9
|
-
- `mmltex.xsl` - Main stylesheet
|
|
10
|
-
- `tokens.xsl` - Token elements (mi, mn, mo, etc.)
|
|
11
|
-
- `glayout.xsl` - Layout elements (mfrac, msqrt, etc.)
|
|
12
|
-
- `scripts.xsl` - Script elements (msub, msup, etc.)
|
|
13
|
-
- `tables.xsl` - Table elements (mtable, mtr, mtd)
|
|
14
|
-
- `entities.xsl` - Entity definitions
|
|
15
|
-
- `cmarkup.xsl` - Content markup elements
|
|
16
|
-
|
|
17
|
-
## Original Project Information
|
|
18
|
-
|
|
19
|
-
**Original Author**: Vasil Yaroshevich
|
|
20
|
-
|
|
21
|
-
**Original Website**: http://www.raleigh.ru/MathML/mmltex/
|
|
22
|
-
|
|
23
|
-
**Archived Links**:
|
|
24
|
-
- Sourceforge Project: https://sourceforge.net/projects/xsltml/files/xsltml/
|
|
25
|
-
- Archived Documentation: https://web.archive.org/web/20160109063934/http://www.raleigh.ru/MathML/mmltex/index.php
|
|
26
|
-
- Google Translated (English): https://translate.google.com/translate?sl=ru&tl=en&u=https%3A%2F%2Fweb.archive.org%2Fweb%2F20160114170851%2Fhttp%3A%2F%2Fwww.raleigh.ru%2FMathML%2Fmmltex%2Findex.php
|
|
27
|
-
|
|
28
|
-
---
|
|
29
|
-
|
|
30
|
-
## Copyright
|
|
31
|
-
|
|
32
|
-
Copyright (C) 2001-2003 Vasil Yaroshevich
|
|
33
|
-
|
|
34
|
-
Permission is hereby granted, free of charge, to any person
|
|
35
|
-
obtaining a copy of this software and associated documentation
|
|
36
|
-
files (the "Software"), to deal in the Software without
|
|
37
|
-
restriction, including without limitation the rights to use,
|
|
38
|
-
copy, modify, merge, publish, distribute, sublicense, and/or
|
|
39
|
-
sell copies of the Software, and to permit persons to whom the
|
|
40
|
-
Software is furnished to do so, subject to the following
|
|
41
|
-
conditions:
|
|
42
|
-
|
|
43
|
-
The above copyright notice and this permission notice shall be
|
|
44
|
-
included in all copies or substantial portions of the Software.
|
|
45
|
-
|
|
46
|
-
Except as contained in this notice, the names of individuals
|
|
47
|
-
credited with contribution to this software shall not be used in
|
|
48
|
-
advertising or otherwise to promote the sale, use or other
|
|
49
|
-
dealings in this Software without prior written authorization
|
|
50
|
-
from the individuals in question.
|
|
51
|
-
|
|
52
|
-
Any stylesheet derived from this Software that is publically
|
|
53
|
-
distributed will be identified with a different name and the
|
|
54
|
-
version strings in any derived Software will be changed so that
|
|
55
|
-
no possibility of confusion between the derived package and this
|
|
56
|
-
Software will exist.
|
|
57
|
-
|
|
58
|
-
## Warranty
|
|
59
|
-
|
|
60
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
61
|
-
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
62
|
-
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
63
|
-
NONINFRINGEMENT. IN NO EVENT SHALL NORMAN WALSH OR ANY OTHER
|
|
64
|
-
CONTRIBUTOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
65
|
-
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
66
|
-
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
67
|
-
OTHER DEALINGS IN THE SOFTWARE.
|