epub-generator 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epub_generator/__init__.py +5 -0
- epub_generator/context.py +4 -5
- epub_generator/data/nav.xhtml.jinja +1 -1
- epub_generator/data/style.css.jinja +22 -0
- epub_generator/generation/gen_asset.py +87 -35
- epub_generator/generation/gen_chapter.py +8 -68
- epub_generator/generation/gen_content.py +59 -0
- epub_generator/generation/gen_epub.py +49 -43
- epub_generator/generation/gen_nav.py +40 -62
- epub_generator/generation/gen_toc.py +57 -40
- epub_generator/generation/xml_utils.py +15 -0
- epub_generator/i18n.py +2 -0
- epub_generator/types.py +20 -9
- epub_generator/validate.py +224 -0
- {epub_generator-0.1.4.dist-info → epub_generator-0.1.6.dist-info}/METADATA +1 -1
- epub_generator-0.1.6.dist-info/RECORD +27 -0
- epub_generator-0.1.4.dist-info/RECORD +0 -25
- {epub_generator-0.1.4.dist-info → epub_generator-0.1.6.dist-info}/LICENSE +0 -0
- {epub_generator-0.1.4.dist-info → epub_generator-0.1.6.dist-info}/WHEEL +0 -0
epub_generator/__init__.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from .generation import generate_epub
|
|
2
2
|
from .options import LaTeXRender, TableRender
|
|
3
3
|
from .types import (
|
|
4
|
+
BasicAsset,
|
|
4
5
|
BookMeta,
|
|
5
6
|
Chapter,
|
|
6
7
|
ChapterGetter,
|
|
@@ -16,10 +17,13 @@ from .types import (
|
|
|
16
17
|
TextKind,
|
|
17
18
|
TocItem,
|
|
18
19
|
)
|
|
20
|
+
from .validate import InvalidUnicodeError
|
|
19
21
|
|
|
20
22
|
__all__ = [
|
|
21
23
|
# Main API function
|
|
22
24
|
"generate_epub",
|
|
25
|
+
# Validation
|
|
26
|
+
"InvalidUnicodeError",
|
|
23
27
|
# Options
|
|
24
28
|
"TableRender",
|
|
25
29
|
"LaTeXRender",
|
|
@@ -35,6 +39,7 @@ __all__ = [
|
|
|
35
39
|
"Table",
|
|
36
40
|
"Formula",
|
|
37
41
|
"HTMLTag",
|
|
42
|
+
"BasicAsset",
|
|
38
43
|
"Image",
|
|
39
44
|
"Footnote",
|
|
40
45
|
"Mark",
|
epub_generator/context.py
CHANGED
|
@@ -55,15 +55,14 @@ class Context:
|
|
|
55
55
|
nodes = list(self._hash_to_node.values())
|
|
56
56
|
nodes.sort(key=lambda node: node.file_name)
|
|
57
57
|
return [(node.file_name, node.media_type) for node in nodes]
|
|
58
|
+
|
|
59
|
+
@property
|
|
60
|
+
def chapters_with_mathml(self) -> set[str]:
|
|
61
|
+
return self._chapters_with_mathml
|
|
58
62
|
|
|
59
63
|
def mark_chapter_has_mathml(self, chapter_file_name: str) -> None:
|
|
60
|
-
"""Mark a chapter as containing MathML content for EPUB 3.0 manifest properties."""
|
|
61
64
|
self._chapters_with_mathml.add(chapter_file_name)
|
|
62
65
|
|
|
63
|
-
def chapter_has_mathml(self, chapter_file_name: str) -> bool:
|
|
64
|
-
"""Check if a chapter contains MathML content."""
|
|
65
|
-
return chapter_file_name in self._chapters_with_mathml
|
|
66
|
-
|
|
67
66
|
def use_asset(
|
|
68
67
|
self,
|
|
69
68
|
source_path: Path,
|
|
@@ -65,4 +65,26 @@ span.formula-inline img {
|
|
|
65
65
|
vertical-align: middle;
|
|
66
66
|
margin: 0 0.2em;
|
|
67
67
|
max-height: 1.2em;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
div.asset {
|
|
71
|
+
page-break-inside: avoid;
|
|
72
|
+
margin: 1em 0;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
div.asset-title {
|
|
76
|
+
text-align: center;
|
|
77
|
+
font-weight: 600;
|
|
78
|
+
font-size: 0.95em;
|
|
79
|
+
color: #333;
|
|
80
|
+
margin-bottom: 0.5em;
|
|
81
|
+
font-style: italic;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
div.asset-caption {
|
|
85
|
+
text-align: center;
|
|
86
|
+
font-size: 0.9em;
|
|
87
|
+
color: #666;
|
|
88
|
+
margin-top: 0.5em;
|
|
89
|
+
font-style: italic;
|
|
68
90
|
}
|
|
@@ -8,7 +8,8 @@ from latex2mathml.converter import convert
|
|
|
8
8
|
|
|
9
9
|
from ..context import Context
|
|
10
10
|
from ..options import LaTeXRender, TableRender
|
|
11
|
-
from ..types import Formula, Image, Table
|
|
11
|
+
from ..types import BasicAsset, Formula, Image, Table
|
|
12
|
+
from .gen_content import render_html_tag, render_inline_content
|
|
12
13
|
|
|
13
14
|
_MEDIA_TYPE_MAP = {
|
|
14
15
|
".png": "image/png",
|
|
@@ -18,25 +19,40 @@ _MEDIA_TYPE_MAP = {
|
|
|
18
19
|
".svg": "image/svg+xml",
|
|
19
20
|
}
|
|
20
21
|
|
|
21
|
-
def process_table(context: Context, table: Table) -> Element | None:
|
|
22
|
-
if context.table_render == TableRender.CLIPPING:
|
|
23
|
-
return None
|
|
24
|
-
try:
|
|
25
|
-
wrapped_html = f"<div>{table.html_content}</div>"
|
|
26
|
-
parsed = fromstring(wrapped_html)
|
|
27
|
-
wrapper = Element("div", attrib={"class": "alt-wrapper"})
|
|
28
22
|
|
|
29
|
-
|
|
30
|
-
|
|
23
|
+
def render_inline_formula(context: Context, formula: Formula) -> Element | None:
|
|
24
|
+
return _render_formula(
|
|
25
|
+
context=context,
|
|
26
|
+
formula=formula,
|
|
27
|
+
inline_mode=True,
|
|
28
|
+
)
|
|
31
29
|
|
|
32
|
-
|
|
33
|
-
|
|
30
|
+
|
|
31
|
+
def render_asset_block(context: Context, block: Table | Formula | Image) -> Element | None:
|
|
32
|
+
element: Element | None = None
|
|
33
|
+
if isinstance(block, Table):
|
|
34
|
+
element = _render_table(context, block)
|
|
35
|
+
elif isinstance(block, Formula):
|
|
36
|
+
element = _render_formula(context, block, inline_mode=False)
|
|
37
|
+
elif isinstance(block, Image):
|
|
38
|
+
element = _process_image(context, block)
|
|
39
|
+
return element
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _render_table(context: Context, table: Table) -> Element | None:
|
|
43
|
+
if context.table_render == TableRender.CLIPPING:
|
|
34
44
|
return None
|
|
35
45
|
|
|
46
|
+
return _wrap_asset_content(
|
|
47
|
+
context=context,
|
|
48
|
+
asset=table,
|
|
49
|
+
content_element=render_html_tag(context, table.html_content),
|
|
50
|
+
)
|
|
51
|
+
|
|
36
52
|
|
|
37
|
-
def
|
|
38
|
-
context: Context,
|
|
39
|
-
formula: Formula,
|
|
53
|
+
def _render_formula(
|
|
54
|
+
context: Context,
|
|
55
|
+
formula: Formula,
|
|
40
56
|
inline_mode: bool,
|
|
41
57
|
) -> Element | None:
|
|
42
58
|
|
|
@@ -47,9 +63,10 @@ def process_formula(
|
|
|
47
63
|
if not latex_expr:
|
|
48
64
|
return None
|
|
49
65
|
|
|
66
|
+
content_element = None
|
|
50
67
|
if context.latex_render == LaTeXRender.MATHML:
|
|
51
|
-
|
|
52
|
-
latex=latex_expr,
|
|
68
|
+
content_element = _latex2mathml(
|
|
69
|
+
latex=latex_expr,
|
|
53
70
|
inline_mode=inline_mode,
|
|
54
71
|
)
|
|
55
72
|
elif context.latex_render == LaTeXRender.SVG:
|
|
@@ -64,31 +81,40 @@ def process_formula(
|
|
|
64
81
|
img_element = Element("img")
|
|
65
82
|
img_element.set("src", f"../assets/{file_name}")
|
|
66
83
|
img_element.set("alt", "formula")
|
|
84
|
+
content_element = img_element
|
|
67
85
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
else:
|
|
71
|
-
wrapper = Element("div", attrib={"class": "alt-wrapper"})
|
|
86
|
+
if content_element is None:
|
|
87
|
+
return None
|
|
72
88
|
|
|
73
|
-
|
|
74
|
-
|
|
89
|
+
return _wrap_asset_content(
|
|
90
|
+
context=context,
|
|
91
|
+
asset=formula,
|
|
92
|
+
content_element=content_element,
|
|
93
|
+
inline_mode=inline_mode,
|
|
94
|
+
)
|
|
75
95
|
|
|
76
|
-
return None
|
|
77
96
|
|
|
78
|
-
def
|
|
97
|
+
def _process_image(context: Context, image: Image) -> Element:
|
|
79
98
|
file_ext = image.path.suffix or ".png"
|
|
80
99
|
file_name = context.use_asset(
|
|
81
|
-
source_path=image.path,
|
|
82
|
-
media_type=_MEDIA_TYPE_MAP.get(file_ext.lower(), "image/png"),
|
|
100
|
+
source_path=image.path,
|
|
101
|
+
media_type=_MEDIA_TYPE_MAP.get(file_ext.lower(), "image/png"),
|
|
83
102
|
file_ext=file_ext,
|
|
84
103
|
)
|
|
85
104
|
img_element = Element("img")
|
|
86
105
|
img_element.set("src", f"../assets/{file_name}")
|
|
87
|
-
img_element.set("alt",
|
|
106
|
+
img_element.set("alt", "") # Empty alt text, use caption instead
|
|
88
107
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
108
|
+
return _wrap_asset_content(
|
|
109
|
+
context=context,
|
|
110
|
+
asset=image,
|
|
111
|
+
content_element=img_element,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
def _normalize_expression(expression: str) -> str:
|
|
115
|
+
expression = expression.replace("\n", "")
|
|
116
|
+
expression = expression.strip()
|
|
117
|
+
return expression
|
|
92
118
|
|
|
93
119
|
|
|
94
120
|
_ESCAPE_UNICODE_PATTERN = re.compile(r"&#x([0-9A-Fa-f]{5});")
|
|
@@ -148,9 +174,35 @@ def _latex_formula2svg(latex: str, font_size: int = 12):
|
|
|
148
174
|
return output.getvalue()
|
|
149
175
|
except Exception:
|
|
150
176
|
return None
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _wrap_asset_content(
|
|
180
|
+
context: Context,
|
|
181
|
+
asset: BasicAsset,
|
|
182
|
+
content_element: Element,
|
|
183
|
+
inline_mode: bool = False,
|
|
184
|
+
) -> Element:
|
|
185
|
+
|
|
186
|
+
if inline_mode:
|
|
187
|
+
wrapper = Element("span", attrib={"class": "formula-inline"})
|
|
188
|
+
else:
|
|
189
|
+
wrapper = Element("div", attrib={"class": "alt-wrapper"})
|
|
151
190
|
|
|
191
|
+
wrapper.append(content_element)
|
|
152
192
|
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
193
|
+
if not asset.title and not asset.caption:
|
|
194
|
+
return wrapper
|
|
195
|
+
|
|
196
|
+
container = Element("div", attrib={"class": "asset"})
|
|
197
|
+
if asset.title:
|
|
198
|
+
title_div = Element("div", attrib={"class": "asset-title"})
|
|
199
|
+
render_inline_content(context, title_div, asset.title)
|
|
200
|
+
container.append(title_div)
|
|
201
|
+
|
|
202
|
+
container.append(wrapper)
|
|
203
|
+
if asset.caption:
|
|
204
|
+
caption_div = Element("div", attrib={"class": "asset-caption"})
|
|
205
|
+
render_inline_content(context, caption_div, asset.caption)
|
|
206
|
+
container.append(caption_div)
|
|
207
|
+
|
|
208
|
+
return container
|
|
@@ -7,14 +7,13 @@ from ..types import (
|
|
|
7
7
|
Chapter,
|
|
8
8
|
ContentBlock,
|
|
9
9
|
Formula,
|
|
10
|
-
HTMLTag,
|
|
11
10
|
Image,
|
|
12
|
-
Mark,
|
|
13
11
|
Table,
|
|
14
12
|
TextBlock,
|
|
15
13
|
TextKind,
|
|
16
14
|
)
|
|
17
|
-
from .gen_asset import
|
|
15
|
+
from .gen_asset import render_asset_block
|
|
16
|
+
from .gen_content import render_inline_content
|
|
18
17
|
from .xml_utils import serialize_element, set_epub_type
|
|
19
18
|
|
|
20
19
|
_MAX_HEADING_LEVEL = 6 # HTML standard defines heading levels from h1 to h6
|
|
@@ -91,7 +90,10 @@ def _render_footnotes(
|
|
|
91
90
|
|
|
92
91
|
|
|
93
92
|
def _render_content_block(context: Context, block: ContentBlock) -> Element | None:
|
|
94
|
-
if isinstance(block,
|
|
93
|
+
if isinstance(block, Table | Formula | Image):
|
|
94
|
+
return render_asset_block(context, block)
|
|
95
|
+
|
|
96
|
+
elif isinstance(block, TextBlock):
|
|
95
97
|
if block.kind == TextKind.HEADLINE:
|
|
96
98
|
heading_level = min(block.level + 1, _MAX_HEADING_LEVEL)
|
|
97
99
|
container = Element(f"h{heading_level}")
|
|
@@ -102,7 +104,7 @@ def _render_content_block(context: Context, block: ContentBlock) -> Element | No
|
|
|
102
104
|
else:
|
|
103
105
|
raise ValueError(f"Unknown TextKind: {block.kind}")
|
|
104
106
|
|
|
105
|
-
|
|
107
|
+
render_inline_content(
|
|
106
108
|
context=context,
|
|
107
109
|
parent=container,
|
|
108
110
|
content=block.content,
|
|
@@ -113,68 +115,6 @@ def _render_content_block(context: Context, block: ContentBlock) -> Element | No
|
|
|
113
115
|
return blockquote
|
|
114
116
|
|
|
115
117
|
return container
|
|
116
|
-
|
|
117
|
-
elif isinstance(block, Table):
|
|
118
|
-
return process_table(context, block)
|
|
119
|
-
|
|
120
|
-
elif isinstance(block, Formula):
|
|
121
|
-
return process_formula(context, block, inline_mode=False)
|
|
122
|
-
|
|
123
|
-
elif isinstance(block, Image):
|
|
124
|
-
return process_image(context, block)
|
|
125
|
-
|
|
118
|
+
|
|
126
119
|
else:
|
|
127
120
|
return None
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
def _render_text_content(context: Context, parent: Element, content: list[str | Mark | Formula | HTMLTag]) -> None:
|
|
131
|
-
"""Render text content with inline citation marks."""
|
|
132
|
-
current_element = parent
|
|
133
|
-
for item in content:
|
|
134
|
-
if isinstance(item, str):
|
|
135
|
-
if current_element is parent:
|
|
136
|
-
if parent.text is None:
|
|
137
|
-
parent.text = item
|
|
138
|
-
else:
|
|
139
|
-
parent.text += item
|
|
140
|
-
else:
|
|
141
|
-
if current_element.tail is None:
|
|
142
|
-
current_element.tail = item
|
|
143
|
-
else:
|
|
144
|
-
current_element.tail += item
|
|
145
|
-
|
|
146
|
-
elif isinstance(item, HTMLTag):
|
|
147
|
-
tag_element = Element(item.name)
|
|
148
|
-
for attr, value in item.attributes:
|
|
149
|
-
tag_element.set(attr, value)
|
|
150
|
-
_render_text_content(
|
|
151
|
-
context=context,
|
|
152
|
-
parent=tag_element,
|
|
153
|
-
content=item.content,
|
|
154
|
-
)
|
|
155
|
-
parent.append(tag_element)
|
|
156
|
-
current_element = tag_element
|
|
157
|
-
|
|
158
|
-
elif isinstance(item, Formula):
|
|
159
|
-
formula_element = process_formula(
|
|
160
|
-
context=context,
|
|
161
|
-
formula=item,
|
|
162
|
-
inline_mode=True,
|
|
163
|
-
)
|
|
164
|
-
if formula_element is not None:
|
|
165
|
-
parent.append(formula_element)
|
|
166
|
-
current_element = formula_element
|
|
167
|
-
|
|
168
|
-
elif isinstance(item, Mark):
|
|
169
|
-
# EPUB 3.0 noteref with semantic attributes
|
|
170
|
-
anchor = Element("a")
|
|
171
|
-
anchor.attrib = {
|
|
172
|
-
"id": f"ref-{item.id}",
|
|
173
|
-
"href": f"#fn-{item.id}",
|
|
174
|
-
"class": "super",
|
|
175
|
-
}
|
|
176
|
-
# Set epub:type using utility function (avoids global namespace pollution)
|
|
177
|
-
set_epub_type(anchor, "noteref")
|
|
178
|
-
anchor.text = f"[{item.id}]"
|
|
179
|
-
parent.append(anchor)
|
|
180
|
-
current_element = anchor
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
from xml.etree.ElementTree import Element
|
|
2
|
+
|
|
3
|
+
from ..context import Context
|
|
4
|
+
from ..types import Formula, HTMLTag, Mark
|
|
5
|
+
from .xml_utils import set_epub_type
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def render_inline_content(
|
|
9
|
+
context: Context,
|
|
10
|
+
parent: Element,
|
|
11
|
+
content: list[str | Mark | Formula | HTMLTag]
|
|
12
|
+
) -> None:
|
|
13
|
+
current_element = parent
|
|
14
|
+
for item in content:
|
|
15
|
+
if isinstance(item, str):
|
|
16
|
+
if current_element is parent:
|
|
17
|
+
if parent.text is None:
|
|
18
|
+
parent.text = item
|
|
19
|
+
else:
|
|
20
|
+
parent.text += item
|
|
21
|
+
else:
|
|
22
|
+
if current_element.tail is None:
|
|
23
|
+
current_element.tail = item
|
|
24
|
+
else:
|
|
25
|
+
current_element.tail += item
|
|
26
|
+
|
|
27
|
+
elif isinstance(item, HTMLTag):
|
|
28
|
+
tag_element = render_html_tag(context, item)
|
|
29
|
+
parent.append(tag_element)
|
|
30
|
+
current_element = tag_element
|
|
31
|
+
|
|
32
|
+
elif isinstance(item, Formula):
|
|
33
|
+
from .gen_asset import render_inline_formula # avoid circular import
|
|
34
|
+
formula_element = render_inline_formula(context, item)
|
|
35
|
+
if formula_element is not None:
|
|
36
|
+
parent.append(formula_element)
|
|
37
|
+
current_element = formula_element
|
|
38
|
+
|
|
39
|
+
elif isinstance(item, Mark):
|
|
40
|
+
# EPUB 3.0 noteref with semantic attributes
|
|
41
|
+
anchor = Element("a")
|
|
42
|
+
anchor.attrib = {
|
|
43
|
+
"id": f"ref-{item.id}",
|
|
44
|
+
"href": f"#fn-{item.id}",
|
|
45
|
+
"class": "super",
|
|
46
|
+
}
|
|
47
|
+
set_epub_type(anchor, "noteref")
|
|
48
|
+
anchor.text = f"[{item.id}]"
|
|
49
|
+
parent.append(anchor)
|
|
50
|
+
current_element = anchor
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def render_html_tag(context: Context, tag: HTMLTag) -> Element:
|
|
54
|
+
"""Convert HTMLTag to XML Element with full inline content support."""
|
|
55
|
+
element = Element(tag.name)
|
|
56
|
+
for attr, value in tag.attributes:
|
|
57
|
+
element.set(attr, value)
|
|
58
|
+
render_inline_content(context, element, tag.content)
|
|
59
|
+
return element
|
|
@@ -9,10 +9,11 @@ from ..context import Context, Template
|
|
|
9
9
|
from ..html_tag import search_content
|
|
10
10
|
from ..i18n import I18N
|
|
11
11
|
from ..options import LaTeXRender, TableRender
|
|
12
|
-
from ..types import Chapter, EpubData, Formula, TextBlock
|
|
12
|
+
from ..types import BasicAsset, Chapter, ContentBlock, EpubData, Formula, TextBlock
|
|
13
|
+
from ..validate import validate_chapter, validate_epub_data
|
|
13
14
|
from .gen_chapter import generate_chapter
|
|
14
15
|
from .gen_nav import gen_nav
|
|
15
|
-
from .gen_toc import
|
|
16
|
+
from .gen_toc import TocPoint, gen_toc, iter_toc
|
|
16
17
|
|
|
17
18
|
|
|
18
19
|
def generate_epub(
|
|
@@ -23,13 +24,14 @@ def generate_epub(
|
|
|
23
24
|
latex_render: LaTeXRender = LaTeXRender.MATHML,
|
|
24
25
|
assert_not_aborted: Callable[[], None] = lambda: None,
|
|
25
26
|
) -> None:
|
|
27
|
+
# Validate epub_data for invalid Unicode characters before processing
|
|
28
|
+
validate_epub_data(epub_data)
|
|
29
|
+
|
|
26
30
|
i18n = I18N(lan)
|
|
27
31
|
template = Template()
|
|
28
32
|
epub_file_path = Path(epub_file_path)
|
|
29
|
-
|
|
30
|
-
# Generate navigation points from TOC structure
|
|
31
33
|
has_cover = epub_data.cover_image_path is not None
|
|
32
|
-
|
|
34
|
+
toc_points = gen_toc(epub_data=epub_data)
|
|
33
35
|
|
|
34
36
|
epub_file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
35
37
|
|
|
@@ -49,7 +51,7 @@ def generate_epub(
|
|
|
49
51
|
_write_chapters_from_data(
|
|
50
52
|
context=context,
|
|
51
53
|
i18n=i18n,
|
|
52
|
-
|
|
54
|
+
toc_points=toc_points,
|
|
53
55
|
epub_data=epub_data,
|
|
54
56
|
latex_render=latex_render,
|
|
55
57
|
assert_not_aborted=assert_not_aborted,
|
|
@@ -58,7 +60,7 @@ def generate_epub(
|
|
|
58
60
|
template=template,
|
|
59
61
|
i18n=i18n,
|
|
60
62
|
epub_data=epub_data,
|
|
61
|
-
|
|
63
|
+
toc_points=toc_points,
|
|
62
64
|
has_cover=has_cover,
|
|
63
65
|
)
|
|
64
66
|
file.writestr(
|
|
@@ -71,7 +73,7 @@ def generate_epub(
|
|
|
71
73
|
context=context,
|
|
72
74
|
i18n=i18n,
|
|
73
75
|
epub_data=epub_data,
|
|
74
|
-
|
|
76
|
+
toc_points=toc_points,
|
|
75
77
|
)
|
|
76
78
|
assert_not_aborted()
|
|
77
79
|
|
|
@@ -81,6 +83,7 @@ def generate_epub(
|
|
|
81
83
|
epub_data=epub_data,
|
|
82
84
|
)
|
|
83
85
|
|
|
86
|
+
|
|
84
87
|
def _write_assets_from_data(
|
|
85
88
|
context: Context,
|
|
86
89
|
i18n: I18N,
|
|
@@ -104,62 +107,69 @@ def _write_assets_from_data(
|
|
|
104
107
|
arcname="OEBPS/assets/cover.png",
|
|
105
108
|
)
|
|
106
109
|
|
|
110
|
+
|
|
107
111
|
def _write_chapters_from_data(
|
|
108
112
|
context: Context,
|
|
109
113
|
i18n: I18N,
|
|
110
|
-
|
|
114
|
+
toc_points: list[TocPoint],
|
|
111
115
|
epub_data: EpubData,
|
|
112
116
|
latex_render: LaTeXRender,
|
|
113
117
|
assert_not_aborted: Callable[[], None],
|
|
114
118
|
):
|
|
115
|
-
|
|
116
|
-
chapter =
|
|
119
|
+
for file_name, get_chapter in _search_chapters(epub_data, toc_points):
|
|
120
|
+
chapter = get_chapter()
|
|
121
|
+
# Validate chapter content for invalid Unicode characters
|
|
122
|
+
validate_chapter(chapter, context=f"Chapter '{file_name}'")
|
|
117
123
|
data = generate_chapter(context, chapter, i18n)
|
|
118
124
|
context.file.writestr(
|
|
119
|
-
zinfo_or_arcname="OEBPS/Text/
|
|
125
|
+
zinfo_or_arcname="OEBPS/Text/" + file_name,
|
|
120
126
|
data=data.encode("utf-8"),
|
|
121
127
|
)
|
|
122
128
|
if latex_render == LaTeXRender.MATHML and _chapter_has_formula(chapter):
|
|
123
|
-
context.mark_chapter_has_mathml(
|
|
129
|
+
context.mark_chapter_has_mathml(file_name)
|
|
124
130
|
assert_not_aborted()
|
|
125
131
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
data=data.encode("utf-8"),
|
|
133
|
-
)
|
|
134
|
-
if latex_render == LaTeXRender.MATHML and _chapter_has_formula(chapter):
|
|
135
|
-
context.mark_chapter_has_mathml(nav_point.file_name)
|
|
136
|
-
assert_not_aborted()
|
|
132
|
+
|
|
133
|
+
def _search_chapters(epub_data: EpubData, toc_points: list[TocPoint]):
|
|
134
|
+
if epub_data.get_head is not None:
|
|
135
|
+
yield "head.xhtml", epub_data.get_head
|
|
136
|
+
for ref in iter_toc(toc_points):
|
|
137
|
+
yield ref.file_name, ref.get_chapter
|
|
137
138
|
|
|
138
139
|
|
|
139
140
|
def _chapter_has_formula(chapter: Chapter) -> bool:
|
|
140
|
-
"""Check if chapter contains any formulas (block-level or inline)."""
|
|
141
141
|
for element in chapter.elements:
|
|
142
|
-
if
|
|
142
|
+
if _content_block_has_formula(element):
|
|
143
143
|
return True
|
|
144
|
-
if isinstance(element, TextBlock):
|
|
145
|
-
for item in search_content(element.content):
|
|
146
|
-
if isinstance(item, Formula):
|
|
147
|
-
return True
|
|
148
144
|
for footnote in chapter.footnotes:
|
|
149
145
|
for content_block in footnote.contents:
|
|
150
|
-
if
|
|
146
|
+
if _content_block_has_formula(content_block):
|
|
151
147
|
return True
|
|
152
|
-
if isinstance(content_block, TextBlock):
|
|
153
|
-
for item in search_content(content_block.content):
|
|
154
|
-
if isinstance(item, Formula):
|
|
155
|
-
return True
|
|
156
148
|
return False
|
|
157
149
|
|
|
150
|
+
|
|
151
|
+
def _content_block_has_formula(content_block: ContentBlock) -> bool:
|
|
152
|
+
if isinstance(content_block, Formula):
|
|
153
|
+
return True
|
|
154
|
+
if isinstance(content_block, TextBlock):
|
|
155
|
+
for item in search_content(content_block.content):
|
|
156
|
+
if isinstance(item, Formula):
|
|
157
|
+
return True
|
|
158
|
+
if isinstance(content_block, BasicAsset):
|
|
159
|
+
for item in search_content(content_block.title):
|
|
160
|
+
if isinstance(item, Formula):
|
|
161
|
+
return True
|
|
162
|
+
for item in search_content(content_block.caption):
|
|
163
|
+
if isinstance(item, Formula):
|
|
164
|
+
return True
|
|
165
|
+
return False
|
|
166
|
+
|
|
167
|
+
|
|
158
168
|
def _write_basic_files(
|
|
159
169
|
context: Context,
|
|
160
170
|
i18n: I18N,
|
|
161
171
|
epub_data: EpubData,
|
|
162
|
-
|
|
172
|
+
toc_points: list[TocPoint],
|
|
163
173
|
):
|
|
164
174
|
meta = epub_data.meta
|
|
165
175
|
has_cover = epub_data.cover_image_path is not None
|
|
@@ -175,22 +185,18 @@ def _write_basic_files(
|
|
|
175
185
|
else:
|
|
176
186
|
modified_timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
177
187
|
|
|
178
|
-
|
|
179
|
-
nav_point.file_name
|
|
180
|
-
for nav_point in nav_points
|
|
181
|
-
if context.chapter_has_mathml(nav_point.file_name)
|
|
182
|
-
}
|
|
188
|
+
toc_refs = list(iter_toc(toc_points))
|
|
183
189
|
content = context.template.render(
|
|
184
190
|
template="content.opf",
|
|
185
191
|
meta=meta,
|
|
186
192
|
i18n=i18n,
|
|
187
193
|
ISBN=isbn,
|
|
188
194
|
modified_timestamp=modified_timestamp,
|
|
189
|
-
nav_points=
|
|
195
|
+
nav_points=toc_refs,
|
|
190
196
|
has_head_chapter=has_head_chapter,
|
|
191
197
|
has_cover=has_cover,
|
|
192
198
|
asset_files=context.used_files,
|
|
193
|
-
chapters_with_mathml=chapters_with_mathml,
|
|
199
|
+
chapters_with_mathml=context.chapters_with_mathml,
|
|
194
200
|
)
|
|
195
201
|
context.file.writestr(
|
|
196
202
|
zinfo_or_arcname="OEBPS/content.opf",
|
|
@@ -1,26 +1,30 @@
|
|
|
1
|
-
from
|
|
1
|
+
from xml.etree.ElementTree import Element
|
|
2
2
|
|
|
3
3
|
from ..context import Template
|
|
4
4
|
from ..i18n import I18N
|
|
5
|
-
from ..types import BookMeta, EpubData
|
|
6
|
-
from .gen_toc import
|
|
5
|
+
from ..types import BookMeta, EpubData
|
|
6
|
+
from .gen_toc import TocPoint, iter_toc
|
|
7
|
+
from .xml_utils import indent, serialize_element
|
|
7
8
|
|
|
8
9
|
|
|
9
10
|
def gen_nav(
|
|
10
11
|
template: Template,
|
|
11
12
|
i18n: I18N,
|
|
12
13
|
epub_data: EpubData,
|
|
13
|
-
|
|
14
|
+
toc_points: list[TocPoint],
|
|
14
15
|
has_cover: bool = False,
|
|
15
16
|
) -> str:
|
|
16
17
|
meta: BookMeta | None = epub_data.meta
|
|
17
18
|
has_head_chapter = epub_data.get_head is not None
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
19
|
+
toc_body = "\n".join(_render_toc_item(toc_points))
|
|
20
|
+
first_ref = next(iter_toc(toc_points), None)
|
|
21
|
+
|
|
22
|
+
first_chapter_file: str = ""
|
|
23
|
+
head_chapter_title: str = ""
|
|
24
|
+
if first_ref:
|
|
25
|
+
first_chapter_file = first_ref.file_name
|
|
21
26
|
if has_head_chapter and epub_data.get_head:
|
|
22
|
-
|
|
23
|
-
head_chapter_title = "Preface" # Default title
|
|
27
|
+
head_chapter_title = i18n.preface
|
|
24
28
|
|
|
25
29
|
return template.render(
|
|
26
30
|
template="nav.xhtml",
|
|
@@ -29,64 +33,38 @@ def gen_nav(
|
|
|
29
33
|
has_cover=has_cover,
|
|
30
34
|
has_head_chapter=has_head_chapter,
|
|
31
35
|
head_chapter_title=head_chapter_title,
|
|
32
|
-
|
|
36
|
+
toc_body=toc_body,
|
|
33
37
|
first_chapter_file=first_chapter_file,
|
|
34
38
|
)
|
|
35
39
|
|
|
36
40
|
|
|
37
|
-
def
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
)
|
|
42
|
-
nav_point_index = 0
|
|
43
|
-
|
|
44
|
-
html_parts = []
|
|
45
|
-
for chapters_list in (prefaces, chapters):
|
|
46
|
-
for toc_item in chapters_list:
|
|
47
|
-
nav_point_index, item_html = _generate_toc_item(
|
|
48
|
-
toc_item, nav_points, nav_point_index
|
|
49
|
-
)
|
|
50
|
-
html_parts.append(item_html)
|
|
51
|
-
|
|
52
|
-
return "\n".join(html_parts)
|
|
53
|
-
|
|
41
|
+
def _render_toc_item(toc_points: list[TocPoint]):
|
|
42
|
+
for toc_point in toc_points:
|
|
43
|
+
element = _create_toc_li_element(toc_point)
|
|
44
|
+
element = indent(element)
|
|
45
|
+
yield serialize_element(element)
|
|
54
46
|
|
|
55
|
-
def _generate_toc_item(
|
|
56
|
-
toc_item: TocItem,
|
|
57
|
-
nav_points: list[NavPoint],
|
|
58
|
-
nav_point_index: int,
|
|
59
|
-
) -> tuple[int, str]:
|
|
60
|
-
title_escaped = escape(toc_item.title)
|
|
61
|
-
file_name = None
|
|
62
|
-
if toc_item.get_chapter is not None and nav_point_index < len(nav_points):
|
|
63
|
-
file_name = nav_points[nav_point_index].file_name
|
|
64
|
-
nav_point_index += 1
|
|
65
47
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
nav_point_index, child_html = _generate_toc_item(
|
|
69
|
-
child, nav_points, nav_point_index
|
|
70
|
-
)
|
|
71
|
-
children_html.append(child_html)
|
|
48
|
+
def _create_toc_li_element(toc_point: TocPoint) -> Element:
|
|
49
|
+
li = Element("li")
|
|
72
50
|
|
|
73
|
-
if
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
if file_name:
|
|
81
|
-
html_parts = [f' <li>\n <a href="Text/{file_name}">{title_escaped}</a>']
|
|
51
|
+
if toc_point.ref is not None:
|
|
52
|
+
file_name = toc_point.ref.file_name
|
|
53
|
+
link = Element("a")
|
|
54
|
+
link.set("href", f"Text/{file_name}")
|
|
55
|
+
link.text = toc_point.title
|
|
56
|
+
li.append(link)
|
|
82
57
|
else:
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
58
|
+
span = Element("span")
|
|
59
|
+
span.text = toc_point.title
|
|
60
|
+
li.append(span)
|
|
61
|
+
|
|
62
|
+
# 递归处理子节点
|
|
63
|
+
if toc_point.children:
|
|
64
|
+
ol = Element("ol")
|
|
65
|
+
for child in toc_point.children:
|
|
66
|
+
child_li = _create_toc_li_element(child)
|
|
67
|
+
ol.append(child_li)
|
|
68
|
+
li.append(ol)
|
|
69
|
+
|
|
70
|
+
return li
|
|
@@ -1,36 +1,57 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
|
-
from typing import Any, Callable
|
|
2
|
+
from typing import Any, Callable, Generator
|
|
3
3
|
|
|
4
4
|
from ..types import EpubData, TocItem
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
@dataclass
|
|
8
|
-
class
|
|
9
|
-
|
|
10
|
-
file_name: str
|
|
8
|
+
class TocPoint:
|
|
9
|
+
title: str
|
|
11
10
|
order: int
|
|
12
|
-
|
|
11
|
+
ref: "TocPointRef | None"
|
|
12
|
+
children: list["TocPoint"]
|
|
13
|
+
|
|
14
|
+
@property
|
|
15
|
+
def is_placeholder(self) -> bool:
|
|
16
|
+
"""是否为占位节点(无对应文件)"""
|
|
17
|
+
return self.ref is None
|
|
18
|
+
|
|
19
|
+
@property
|
|
20
|
+
def has_file(self) -> bool:
|
|
21
|
+
"""是否有对应的 XHTML 文件"""
|
|
22
|
+
return self.ref is not None
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class TocPointRef:
|
|
26
|
+
part_id: str
|
|
27
|
+
file_name: str
|
|
28
|
+
get_chapter: Callable[[], Any]
|
|
13
29
|
|
|
14
30
|
|
|
15
|
-
def
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
31
|
+
def iter_toc(toc_points: list[TocPoint]) -> Generator[TocPointRef, None, None]:
|
|
32
|
+
for toc_point in toc_points:
|
|
33
|
+
if toc_point.ref:
|
|
34
|
+
yield toc_point.ref
|
|
35
|
+
yield from iter_toc(toc_point.children)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def gen_toc(epub_data: EpubData) -> list[TocPoint]:
|
|
19
39
|
prefaces = epub_data.prefaces
|
|
20
40
|
chapters = epub_data.chapters
|
|
21
41
|
|
|
22
|
-
|
|
23
|
-
has_cover=has_cover,
|
|
42
|
+
toc_point_generation = _TocPointGenerator(
|
|
24
43
|
chapters_count=(
|
|
25
44
|
_count_toc_items(prefaces) +
|
|
26
45
|
_count_toc_items(chapters)
|
|
27
46
|
),
|
|
28
47
|
)
|
|
48
|
+
toc_points: list[TocPoint] = []
|
|
29
49
|
for chapters_list in (prefaces, chapters):
|
|
30
50
|
for toc_item in chapters_list:
|
|
31
|
-
|
|
51
|
+
toc_point = toc_point_generation.generate(toc_item)
|
|
52
|
+
toc_points.append(toc_point)
|
|
32
53
|
|
|
33
|
-
return
|
|
54
|
+
return toc_points
|
|
34
55
|
|
|
35
56
|
|
|
36
57
|
def _count_toc_items(items: list[TocItem]) -> int:
|
|
@@ -50,39 +71,35 @@ def _max_depth_toc_items(items: list[TocItem]) -> int:
|
|
|
50
71
|
return max_depth
|
|
51
72
|
|
|
52
73
|
|
|
53
|
-
class
|
|
54
|
-
def __init__(self,
|
|
55
|
-
self.
|
|
56
|
-
self._next_order: int = 2 if has_cover else 1
|
|
74
|
+
class _TocPointGenerator:
|
|
75
|
+
def __init__(self, chapters_count: int):
|
|
76
|
+
self._next_order: int = 0
|
|
57
77
|
self._next_id: int = 1
|
|
58
78
|
self._digits = len(str(chapters_count))
|
|
59
79
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
return self._nav_points
|
|
63
|
-
|
|
64
|
-
def generate(self, toc_item: TocItem) -> None:
|
|
65
|
-
self._create_nav_point(toc_item)
|
|
80
|
+
def generate(self, toc_item: TocItem) -> TocPoint:
|
|
81
|
+
return self._create_toc_point(toc_item)
|
|
66
82
|
|
|
67
|
-
def
|
|
68
|
-
|
|
83
|
+
def _create_toc_point(self, toc_item: TocItem) -> TocPoint:
|
|
84
|
+
ref: TocPointRef | None = None
|
|
69
85
|
if toc_item.get_chapter is not None:
|
|
70
|
-
|
|
86
|
+
part_id = self._next_id
|
|
71
87
|
self._next_id += 1
|
|
72
|
-
part_id = str(
|
|
73
|
-
|
|
74
|
-
|
|
88
|
+
part_id = str(part_id).zfill(self._digits)
|
|
89
|
+
ref = TocPointRef(
|
|
90
|
+
part_id=part_id,
|
|
75
91
|
file_name=f"part{part_id}.xhtml",
|
|
76
|
-
order=self._next_order,
|
|
77
92
|
get_chapter=toc_item.get_chapter,
|
|
78
93
|
)
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
94
|
+
order = self._next_order # 确保 order 以中序遍历为顺序
|
|
95
|
+
self._next_order += 1
|
|
96
|
+
|
|
97
|
+
return TocPoint(
|
|
98
|
+
title=toc_item.title,
|
|
99
|
+
order=order,
|
|
100
|
+
ref=ref,
|
|
101
|
+
children=[
|
|
102
|
+
self._create_toc_point(child)
|
|
103
|
+
for child in toc_item.children
|
|
104
|
+
],
|
|
105
|
+
)
|
|
@@ -29,3 +29,18 @@ def serialize_element(element: Element) -> str:
|
|
|
29
29
|
xml_string = xml_string.replace(f"{ns_prefix}:", f"{prefix}:")
|
|
30
30
|
|
|
31
31
|
return xml_string
|
|
32
|
+
|
|
33
|
+
def indent(elem: Element, level: int = 0) -> Element:
|
|
34
|
+
indent_str = " " * level
|
|
35
|
+
next_indent_str = " " * (level + 1)
|
|
36
|
+
if len(elem):
|
|
37
|
+
if not elem.text or not elem.text.strip():
|
|
38
|
+
elem.text = "\n" + next_indent_str
|
|
39
|
+
for i, child in enumerate(elem):
|
|
40
|
+
indent(child, level + 1)
|
|
41
|
+
if not child.tail or not child.tail.strip():
|
|
42
|
+
if i == len(elem) - 1:
|
|
43
|
+
child.tail = "\n" + indent_str
|
|
44
|
+
else:
|
|
45
|
+
child.tail = "\n" + next_indent_str
|
|
46
|
+
return elem
|
epub_generator/i18n.py
CHANGED
|
@@ -9,9 +9,11 @@ class I18N:
|
|
|
9
9
|
self.table_of_contents: str = "目录"
|
|
10
10
|
self.landmarks: str = "路标"
|
|
11
11
|
self.start_of_content: str = "正文开始"
|
|
12
|
+
self.preface: str = "前言"
|
|
12
13
|
elif lan == "en":
|
|
13
14
|
self.unnamed: str = "Unnamed"
|
|
14
15
|
self.cover: str = "Cover"
|
|
15
16
|
self.table_of_contents: str = "Table of Contents"
|
|
16
17
|
self.landmarks: str = "Landmarks"
|
|
17
18
|
self.start_of_content: str = "Start of Content"
|
|
19
|
+
self.preface: str = "Preface"
|
epub_generator/types.py
CHANGED
|
@@ -84,30 +84,41 @@ class Mark:
|
|
|
84
84
|
"""Citation ID, matches Footnote.id"""
|
|
85
85
|
|
|
86
86
|
@dataclass
|
|
87
|
-
class
|
|
88
|
-
"""
|
|
89
|
-
html_content: str
|
|
90
|
-
"""HTML table markup"""
|
|
87
|
+
class BasicAsset:
|
|
88
|
+
"""Asset as a base class for other assets."""
|
|
91
89
|
|
|
90
|
+
title: list["str | Mark | Formula | HTMLTag"] = field(default_factory=list, kw_only=True)
|
|
91
|
+
"""Asset title (before content)"""
|
|
92
|
+
caption: list["str | Mark | Formula | HTMLTag"] = field(default_factory=list, kw_only=True)
|
|
93
|
+
"""Asset caption (after content)"""
|
|
92
94
|
|
|
93
95
|
@dataclass
|
|
94
|
-
class
|
|
96
|
+
class Table(BasicAsset):
|
|
97
|
+
"""Table representation."""
|
|
98
|
+
|
|
99
|
+
html_content: "HTMLTag"
|
|
100
|
+
"""HTML content of the table"""
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
@dataclass
|
|
104
|
+
class Formula(BasicAsset):
|
|
95
105
|
"""Mathematical formula."""
|
|
106
|
+
|
|
96
107
|
latex_expression: str
|
|
97
108
|
"""LaTeX expression"""
|
|
98
109
|
|
|
99
110
|
|
|
100
111
|
@dataclass
|
|
101
|
-
class Image:
|
|
112
|
+
class Image(BasicAsset):
|
|
102
113
|
"""Image reference."""
|
|
114
|
+
|
|
103
115
|
path: Path
|
|
104
116
|
"""Absolute path to the image file"""
|
|
105
117
|
|
|
106
|
-
alt_text: str = "image"
|
|
107
|
-
"""Alt text (defaults to "image")"""
|
|
108
|
-
|
|
109
118
|
@dataclass
|
|
110
119
|
class TextBlock:
|
|
120
|
+
"""Text block representation."""
|
|
121
|
+
|
|
111
122
|
kind: TextKind
|
|
112
123
|
"""Kind of text block."""
|
|
113
124
|
level: int
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
from .types import (
|
|
2
|
+
BasicAsset,
|
|
3
|
+
Chapter,
|
|
4
|
+
ContentBlock,
|
|
5
|
+
EpubData,
|
|
6
|
+
Footnote,
|
|
7
|
+
Formula,
|
|
8
|
+
HTMLTag,
|
|
9
|
+
Image,
|
|
10
|
+
Mark,
|
|
11
|
+
Table,
|
|
12
|
+
TextBlock,
|
|
13
|
+
TocItem,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class InvalidUnicodeError(Exception):
|
|
18
|
+
"""Raised when invalid Unicode characters (surrogates) are detected in EPUB data."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, field_path: str, invalid_char_info: str):
|
|
21
|
+
"""Initialize with field path and character information.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
field_path: Dot-separated path to the field containing invalid characters
|
|
25
|
+
invalid_char_info: Information about the invalid character(s)
|
|
26
|
+
"""
|
|
27
|
+
self.field_path = field_path
|
|
28
|
+
self.invalid_char_info = invalid_char_info
|
|
29
|
+
super().__init__(
|
|
30
|
+
f"Invalid Unicode character detected in {field_path}: {invalid_char_info}"
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def validate_epub_data(epub_data: EpubData) -> None:
|
|
35
|
+
"""Validate an EpubData object for invalid Unicode characters.
|
|
36
|
+
|
|
37
|
+
This function checks all string fields in the EPUB data structure including:
|
|
38
|
+
- Book metadata (title, description, authors, etc.)
|
|
39
|
+
- Table of contents titles (recursively)
|
|
40
|
+
- Chapter content is NOT validated here (use validate_chapter separately)
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
epub_data: EPUB data to validate
|
|
44
|
+
|
|
45
|
+
Raises:
|
|
46
|
+
InvalidUnicodeError: If surrogate characters are detected in any string field
|
|
47
|
+
"""
|
|
48
|
+
# Check metadata
|
|
49
|
+
if epub_data.meta:
|
|
50
|
+
meta = epub_data.meta
|
|
51
|
+
_check_string(meta.title, "EpubData.meta.title")
|
|
52
|
+
_check_string(meta.description, "EpubData.meta.description")
|
|
53
|
+
_check_string(meta.publisher, "EpubData.meta.publisher")
|
|
54
|
+
_check_string(meta.isbn, "EpubData.meta.isbn")
|
|
55
|
+
|
|
56
|
+
for i, author in enumerate(meta.authors):
|
|
57
|
+
_check_string(author, f"EpubData.meta.authors[{i}]")
|
|
58
|
+
|
|
59
|
+
for i, editor in enumerate(meta.editors):
|
|
60
|
+
_check_string(editor, f"EpubData.meta.editors[{i}]")
|
|
61
|
+
|
|
62
|
+
for i, translator in enumerate(meta.translators):
|
|
63
|
+
_check_string(translator, f"EpubData.meta.translators[{i}]")
|
|
64
|
+
|
|
65
|
+
# Check prefaces TOC
|
|
66
|
+
for i, preface in enumerate(epub_data.prefaces):
|
|
67
|
+
_check_toc_item(preface, f"EpubData.prefaces[{i}]")
|
|
68
|
+
|
|
69
|
+
# Check chapters TOC
|
|
70
|
+
for i, chapter_toc in enumerate(epub_data.chapters):
|
|
71
|
+
_check_toc_item(chapter_toc, f"EpubData.chapters[{i}]")
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def validate_chapter(chapter: Chapter, context: str = "Chapter") -> None:
|
|
75
|
+
"""Validate a Chapter object for invalid Unicode characters.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
chapter: Chapter to validate
|
|
79
|
+
context: Context string for error reporting (e.g., "Chapter", "chapters[0]")
|
|
80
|
+
|
|
81
|
+
Raises:
|
|
82
|
+
InvalidUnicodeError: If surrogate characters are detected in any string field
|
|
83
|
+
"""
|
|
84
|
+
# Check main content elements
|
|
85
|
+
for i, element in enumerate(chapter.elements):
|
|
86
|
+
_check_content_block(element, f"{context}.elements[{i}]")
|
|
87
|
+
|
|
88
|
+
# Check footnotes
|
|
89
|
+
for i, footnote in enumerate(chapter.footnotes):
|
|
90
|
+
_check_footnote(footnote, f"{context}.footnotes[{i}]")
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _check_string(value: str | None, field_path: str) -> None:
|
|
94
|
+
"""Check if a string contains surrogate characters.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
value: String to check
|
|
98
|
+
field_path: Path to the field for error reporting
|
|
99
|
+
|
|
100
|
+
Raises:
|
|
101
|
+
InvalidUnicodeError: If surrogate characters are detected
|
|
102
|
+
"""
|
|
103
|
+
if value is None:
|
|
104
|
+
return
|
|
105
|
+
|
|
106
|
+
for i, char in enumerate(value):
|
|
107
|
+
code_point = ord(char)
|
|
108
|
+
# Check for surrogate pair range (U+D800 to U+DFFF)
|
|
109
|
+
if 0xD800 <= code_point <= 0xDFFF:
|
|
110
|
+
raise InvalidUnicodeError(
|
|
111
|
+
field_path=field_path,
|
|
112
|
+
invalid_char_info=f"surrogate character U+{code_point:04X} at position {i}",
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _check_string_list(values: list[str | Mark | Formula | HTMLTag], field_path: str) -> None:
|
|
117
|
+
"""Recursively check a list that may contain strings, marks, formulas, or HTML tags.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
values: List to check
|
|
121
|
+
field_path: Path to the field for error reporting
|
|
122
|
+
|
|
123
|
+
Raises:
|
|
124
|
+
InvalidUnicodeError: If surrogate characters are detected
|
|
125
|
+
"""
|
|
126
|
+
for i, item in enumerate(values):
|
|
127
|
+
item_path = f"{field_path}[{i}]"
|
|
128
|
+
if isinstance(item, str):
|
|
129
|
+
_check_string(item, item_path)
|
|
130
|
+
elif isinstance(item, Mark):
|
|
131
|
+
pass # Mark only contains int ID
|
|
132
|
+
elif isinstance(item, Formula):
|
|
133
|
+
_check_string(item.latex_expression, f"{item_path}.latex_expression")
|
|
134
|
+
_check_string_list(item.title, f"{item_path}.title")
|
|
135
|
+
_check_string_list(item.caption, f"{item_path}.caption")
|
|
136
|
+
elif isinstance(item, HTMLTag):
|
|
137
|
+
_check_html_tag(item, item_path)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _check_html_tag(tag: HTMLTag, field_path: str) -> None:
|
|
141
|
+
"""Check an HTML tag for invalid characters.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
tag: HTML tag to check
|
|
145
|
+
field_path: Path to the field for error reporting
|
|
146
|
+
|
|
147
|
+
Raises:
|
|
148
|
+
InvalidUnicodeError: If surrogate characters are detected
|
|
149
|
+
"""
|
|
150
|
+
_check_string(tag.name, f"{field_path}.name")
|
|
151
|
+
|
|
152
|
+
for i, (attr_name, attr_value) in enumerate(tag.attributes):
|
|
153
|
+
_check_string(attr_name, f"{field_path}.attributes[{i}][0]")
|
|
154
|
+
_check_string(attr_value, f"{field_path}.attributes[{i}][1]")
|
|
155
|
+
|
|
156
|
+
_check_string_list(tag.content, f"{field_path}.content")
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _check_basic_asset(asset: BasicAsset, field_path: str) -> None:
|
|
160
|
+
"""Check BasicAsset (and subclasses) for invalid characters.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
asset: Asset to check
|
|
164
|
+
field_path: Path to the field for error reporting
|
|
165
|
+
|
|
166
|
+
Raises:
|
|
167
|
+
InvalidUnicodeError: If surrogate characters are detected
|
|
168
|
+
"""
|
|
169
|
+
_check_string_list(asset.title, f"{field_path}.title")
|
|
170
|
+
_check_string_list(asset.caption, f"{field_path}.caption")
|
|
171
|
+
|
|
172
|
+
if isinstance(asset, Formula):
|
|
173
|
+
_check_string(asset.latex_expression, f"{field_path}.latex_expression")
|
|
174
|
+
elif isinstance(asset, Table):
|
|
175
|
+
_check_html_tag(asset.html_content, f"{field_path}.html_content")
|
|
176
|
+
elif isinstance(asset, Image):
|
|
177
|
+
pass # Image only contains Path, no string content to check
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _check_content_block(block: ContentBlock, field_path: str) -> None:
|
|
181
|
+
"""Check a content block for invalid characters.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
block: Content block to check
|
|
185
|
+
field_path: Path to the field for error reporting
|
|
186
|
+
|
|
187
|
+
Raises:
|
|
188
|
+
InvalidUnicodeError: If surrogate characters are detected
|
|
189
|
+
"""
|
|
190
|
+
if isinstance(block, TextBlock):
|
|
191
|
+
_check_string_list(block.content, f"{field_path}.content")
|
|
192
|
+
elif isinstance(block, (Table, Formula, Image)):
|
|
193
|
+
_check_basic_asset(block, field_path)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _check_footnote(footnote: Footnote, field_path: str) -> None:
|
|
197
|
+
"""Check a footnote for invalid characters.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
footnote: Footnote to check
|
|
201
|
+
field_path: Path to the field for error reporting
|
|
202
|
+
|
|
203
|
+
Raises:
|
|
204
|
+
InvalidUnicodeError: If surrogate characters are detected
|
|
205
|
+
"""
|
|
206
|
+
for i, content_block in enumerate(footnote.contents):
|
|
207
|
+
_check_content_block(content_block, f"{field_path}.contents[{i}]")
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def _check_toc_item(item: TocItem, field_path: str) -> None:
|
|
211
|
+
"""Recursively check a TOC item for invalid characters.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
item: TOC item to check
|
|
215
|
+
field_path: Path to the field for error reporting
|
|
216
|
+
|
|
217
|
+
Raises:
|
|
218
|
+
InvalidUnicodeError: If surrogate characters are detected
|
|
219
|
+
"""
|
|
220
|
+
_check_string(item.title, f"{field_path}.title")
|
|
221
|
+
|
|
222
|
+
# Check nested children recursively
|
|
223
|
+
for i, child in enumerate(item.children):
|
|
224
|
+
_check_toc_item(child, f"{field_path}.children[{i}]")
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
epub_generator/__init__.py,sha256=G1P_GAUym94iv56PPK31641vlYrukUoMJZgWtmKscog,768
|
|
2
|
+
epub_generator/context.py,sha256=9jHRpnQsNooRUSBoY_tiQ7aQ_AMZmyKUO22gPoO8Koc,4324
|
|
3
|
+
epub_generator/data/container.xml.jinja,sha256=SkACyZgsAVUS5lmiCEhq3SpbFspYdyCnRNjWnLztLt0,252
|
|
4
|
+
epub_generator/data/content.opf.jinja,sha256=DDaR9GZnSBcpNk2BWUu56Uo_248TA91AxE4tKsBuKnQ,2839
|
|
5
|
+
epub_generator/data/cover.xhtml.jinja,sha256=heounlnHfOd-RNFIeytZQtAQ11ByPOiM1aB1lVyY6V4,328
|
|
6
|
+
epub_generator/data/mimetype.jinja,sha256=5GjjUNEUPrZI9gx7C9YDEQHsBUSjYcp07O8laskB9Is,20
|
|
7
|
+
epub_generator/data/nav.xhtml.jinja,sha256=zk5hf-MYoKxd4pcshZV5VliVrtDIgfH7n9f3-1L1cY0,1132
|
|
8
|
+
epub_generator/data/part.xhtml.jinja,sha256=FEQaUjHfCy7EJyyvYZj-6T-lkDcsmz1wvsk0b8LU3E0,558
|
|
9
|
+
epub_generator/data/style.css.jinja,sha256=n_DE-z97ikGzD3qufSwX_1iqkQcE_5kXiCIhyoXNjRA,1400
|
|
10
|
+
epub_generator/generation/__init__.py,sha256=UIscwHa8ocr2D1mk1KaP-zi3P1x9eYJzxTo0RJ2dnks,35
|
|
11
|
+
epub_generator/generation/gen_asset.py,sha256=WYwfGUvHM_CrwTuIIH7dYm-SL-vdhkTnvaZDymZxXzg,5978
|
|
12
|
+
epub_generator/generation/gen_chapter.py,sha256=P6kmB8hdQnJB6SCheHzu5cOmZrC5H0LqNV-uuuigX1M,3425
|
|
13
|
+
epub_generator/generation/gen_content.py,sha256=2ojjTgalveRnk1MXQaKsY53hPCgb7NHTwbMpLOXVrss,2018
|
|
14
|
+
epub_generator/generation/gen_epub.py,sha256=rxHBp4nP5OFi9SJBfiCrncV1fmhb0j3WKfUqofxJykc,6487
|
|
15
|
+
epub_generator/generation/gen_nav.py,sha256=_cjOP18C1CoTn_DELIB06pyMPZZ0CPbkk4oPEvICdKs,1955
|
|
16
|
+
epub_generator/generation/gen_toc.py,sha256=MK2iTYBpF8VUtPHpwz5JB_H6nWsKRKpVuLzRPYGy0nw,2864
|
|
17
|
+
epub_generator/generation/xml_utils.py,sha256=AVnU3AN6lmqWrdgaZTV7v77L9LonI7DX59BxkMZlef8,1822
|
|
18
|
+
epub_generator/html_tag.py,sha256=P_Y0uRStCEEh7cCtpvK4t432NEcY9OLntAznvdxUF5k,343
|
|
19
|
+
epub_generator/i18n.py,sha256=-L6J6hsy796_IQ4nLpNtAeXIkRM6oFSWSHDlRZXW8aA,705
|
|
20
|
+
epub_generator/options.py,sha256=Er1dnaNvzDSnZRSRJGSqhkJsv1XtsCW2Ym_hUc8o_QI,181
|
|
21
|
+
epub_generator/template.py,sha256=RdN2QRICIrYMzpxCU_x4m4V9WWZEP9VvT6QLp2YCm90,1556
|
|
22
|
+
epub_generator/types.py,sha256=gBrdi1KYOVEnI0qEp1slLsyUw_Sd7v09uHvN8_Hf9Z8,4440
|
|
23
|
+
epub_generator/validate.py,sha256=KBgvBsBuVnWTc4N-29cr2P92X0w_tGR4pMemk_KHy78,7544
|
|
24
|
+
epub_generator-0.1.6.dist-info/LICENSE,sha256=9Zt_a4mrzkvR2rc0UbqTgbboIjWuumDFgeQyKos0H2E,1066
|
|
25
|
+
epub_generator-0.1.6.dist-info/METADATA,sha256=JziMt9LukPRKo8rPy10qf9sIiiv98CgSxKoi7juHcYE,16555
|
|
26
|
+
epub_generator-0.1.6.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
27
|
+
epub_generator-0.1.6.dist-info/RECORD,,
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
epub_generator/__init__.py,sha256=CRtP7zjqNPxOA_m4S8Jgavuw_KgaKpoBW5kdgqUivLQ,648
|
|
2
|
-
epub_generator/context.py,sha256=AggXhRiOg70WZTLXF78LHA4coo3fa77OvKVe5wtgP6A,4495
|
|
3
|
-
epub_generator/data/container.xml.jinja,sha256=SkACyZgsAVUS5lmiCEhq3SpbFspYdyCnRNjWnLztLt0,252
|
|
4
|
-
epub_generator/data/content.opf.jinja,sha256=DDaR9GZnSBcpNk2BWUu56Uo_248TA91AxE4tKsBuKnQ,2839
|
|
5
|
-
epub_generator/data/cover.xhtml.jinja,sha256=heounlnHfOd-RNFIeytZQtAQ11ByPOiM1aB1lVyY6V4,328
|
|
6
|
-
epub_generator/data/mimetype.jinja,sha256=5GjjUNEUPrZI9gx7C9YDEQHsBUSjYcp07O8laskB9Is,20
|
|
7
|
-
epub_generator/data/nav.xhtml.jinja,sha256=FGunTu_cDJmSBxV8cfaIDjHVUNsyjogWg1jL4VK8ihU,1132
|
|
8
|
-
epub_generator/data/part.xhtml.jinja,sha256=FEQaUjHfCy7EJyyvYZj-6T-lkDcsmz1wvsk0b8LU3E0,558
|
|
9
|
-
epub_generator/data/style.css.jinja,sha256=HyGWoevaZD9xPDJeMQY_1xmM0f6aK0prmqoW3mhTGp0,1072
|
|
10
|
-
epub_generator/generation/__init__.py,sha256=UIscwHa8ocr2D1mk1KaP-zi3P1x9eYJzxTo0RJ2dnks,35
|
|
11
|
-
epub_generator/generation/gen_asset.py,sha256=0muwCvAohODC76F9G9_UBibRPQSpmMJkgQxlCsM7QcQ,4480
|
|
12
|
-
epub_generator/generation/gen_chapter.py,sha256=Irb0uJjK8Q5PnHoK4PFP7CIKKzbfhIK_4thvin6hg6g,5505
|
|
13
|
-
epub_generator/generation/gen_epub.py,sha256=zkG0U5_g3FY-D6zkYGqp844IgWYJhbAqf6CnX2Do71Y,6412
|
|
14
|
-
epub_generator/generation/gen_nav.py,sha256=D-ZNsbm26AEAovbXtx1wSwTfH4Q8H2WYfoYeQ1Sb9bk,2813
|
|
15
|
-
epub_generator/generation/gen_toc.py,sha256=yt7GYu8Rfz9aw_GPZFUl9H3BKd1za1hSm2hhp8wyI68,2488
|
|
16
|
-
epub_generator/generation/xml_utils.py,sha256=xMcNZl8CaV21XYx2yeykkHhvnq5N7yRHfIFu5KRlRHc,1261
|
|
17
|
-
epub_generator/html_tag.py,sha256=P_Y0uRStCEEh7cCtpvK4t432NEcY9OLntAznvdxUF5k,343
|
|
18
|
-
epub_generator/i18n.py,sha256=GQjpHO7t8_0rXNuoYmO-G7_9nCF7S5kluBG0ip_2jIA,622
|
|
19
|
-
epub_generator/options.py,sha256=Er1dnaNvzDSnZRSRJGSqhkJsv1XtsCW2Ym_hUc8o_QI,181
|
|
20
|
-
epub_generator/template.py,sha256=RdN2QRICIrYMzpxCU_x4m4V9WWZEP9VvT6QLp2YCm90,1556
|
|
21
|
-
epub_generator/types.py,sha256=Raz6MT-aIkMp6Yw9hTu4HP_ySg4kMC-YJ_o0cjYzu_A,4059
|
|
22
|
-
epub_generator-0.1.4.dist-info/LICENSE,sha256=9Zt_a4mrzkvR2rc0UbqTgbboIjWuumDFgeQyKos0H2E,1066
|
|
23
|
-
epub_generator-0.1.4.dist-info/METADATA,sha256=cpydosW4bVyknIbCxKP4DAhl-M8NSct7-9jX9M4BISw,16555
|
|
24
|
-
epub_generator-0.1.4.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
25
|
-
epub_generator-0.1.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|