epub-generator 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,10 +17,13 @@ from .types import (
17
17
  TextKind,
18
18
  TocItem,
19
19
  )
20
+ from .validate import InvalidUnicodeError
20
21
 
21
22
  __all__ = [
22
23
  # Main API function
23
24
  "generate_epub",
25
+ # Validation
26
+ "InvalidUnicodeError",
24
27
  # Options
25
28
  "TableRender",
26
29
  "LaTeXRender",
epub_generator/context.py CHANGED
@@ -18,6 +18,7 @@ class _AssetNode:
18
18
  media_type: str
19
19
  content_hash: str
20
20
 
21
+
21
22
  class Context:
22
23
  def __init__(
23
24
  self,
@@ -55,7 +56,7 @@ class Context:
55
56
  nodes = list(self._hash_to_node.values())
56
57
  nodes.sort(key=lambda node: node.file_name)
57
58
  return [(node.file_name, node.media_type) for node in nodes]
58
-
59
+
59
60
  @property
60
61
  def chapters_with_mathml(self) -> set[str]:
61
62
  return self._chapters_with_mathml
@@ -117,6 +118,7 @@ class Context:
117
118
  )
118
119
  return file_name
119
120
 
121
+
120
122
  class Template:
121
123
  def __init__(self):
122
124
  templates_path = cast(Path, files("epub_generator")) / "data"
@@ -134,7 +136,8 @@ class Template:
134
136
  self._templates[name] = template
135
137
  return template
136
138
 
139
+
137
140
  def _sha256_hash(data: bytes) -> str:
138
141
  hash256 = sha256()
139
142
  hash256.update(data)
140
- return hash256.hexdigest()
143
+ return hash256.hexdigest()
@@ -1 +1 @@
1
- from .gen_epub import generate_epub
1
+ from .gen_epub import generate_epub
@@ -22,13 +22,15 @@ _MEDIA_TYPE_MAP = {
22
22
 
23
23
  def render_inline_formula(context: Context, formula: Formula) -> Element | None:
24
24
  return _render_formula(
25
- context=context,
26
- formula=formula,
25
+ context=context,
26
+ formula=formula,
27
27
  inline_mode=True,
28
28
  )
29
29
 
30
30
 
31
- def render_asset_block(context: Context, block: Table | Formula | Image) -> Element | None:
31
+ def render_asset_block(
32
+ context: Context, block: Table | Formula | Image
33
+ ) -> Element | None:
32
34
  element: Element | None = None
33
35
  if isinstance(block, Table):
34
36
  element = _render_table(context, block)
@@ -44,17 +46,17 @@ def _render_table(context: Context, table: Table) -> Element | None:
44
46
  return None
45
47
 
46
48
  return _wrap_asset_content(
47
- context=context,
48
- asset=table,
49
+ context=context,
50
+ asset=table,
49
51
  content_element=render_html_tag(context, table.html_content),
50
52
  )
51
53
 
52
54
 
53
55
  def _render_formula(
54
- context: Context,
55
- formula: Formula,
56
- inline_mode: bool,
57
- ) -> Element | None:
56
+ context: Context,
57
+ formula: Formula,
58
+ inline_mode: bool,
59
+ ) -> Element | None:
58
60
 
59
61
  if context.latex_render == LaTeXRender.CLIPPING:
60
62
  return None
@@ -88,7 +90,7 @@ def _render_formula(
88
90
 
89
91
  return _wrap_asset_content(
90
92
  context=context,
91
- asset=formula,
93
+ asset=formula,
92
94
  content_element=content_element,
93
95
  inline_mode=inline_mode,
94
96
  )
@@ -106,11 +108,12 @@ def _process_image(context: Context, image: Image) -> Element:
106
108
  img_element.set("alt", "") # Empty alt text, use caption instead
107
109
 
108
110
  return _wrap_asset_content(
109
- context=context,
110
- asset=image,
111
+ context=context,
112
+ asset=image,
111
113
  content_element=img_element,
112
114
  )
113
115
 
116
+
114
117
  def _normalize_expression(expression: str) -> str:
115
118
  expression = expression.replace("\n", "")
116
119
  expression = expression.strip()
@@ -159,7 +162,9 @@ def _latex_formula2svg(latex: str, font_size: int = 12):
159
162
  plt.rc("text", usetex=True)
160
163
  plt.rc("font", size=font_size)
161
164
  fig, ax = plt.subplots()
162
- txt = ax.text(0.5, 0.5, f"${latex}$", ha="center", va="center", transform=ax.transAxes)
165
+ txt = ax.text(
166
+ 0.5, 0.5, f"${latex}$", ha="center", va="center", transform=ax.transAxes
167
+ )
163
168
  ax.axis("off")
164
169
  fig.canvas.draw()
165
170
  bbox = txt.get_window_extent(cast(Any, fig.canvas).get_renderer())
@@ -174,7 +179,7 @@ def _latex_formula2svg(latex: str, font_size: int = 12):
174
179
  return output.getvalue()
175
180
  except Exception:
176
181
  return None
177
-
182
+
178
183
 
179
184
  def _wrap_asset_content(
180
185
  context: Context,
@@ -182,7 +187,7 @@ def _wrap_asset_content(
182
187
  content_element: Element,
183
188
  inline_mode: bool = False,
184
189
  ) -> Element:
185
-
190
+
186
191
  if inline_mode:
187
192
  wrapper = Element("span", attrib={"class": "formula-inline"})
188
193
  else:
@@ -16,7 +16,7 @@ from .gen_asset import render_asset_block
16
16
  from .gen_content import render_inline_content
17
17
  from .xml_utils import serialize_element, set_epub_type
18
18
 
19
- _MAX_HEADING_LEVEL = 6 # HTML standard defines heading levels from h1 to h6
19
+ _MAX_HEADING_LEVEL = 6 # HTML standard defines heading levels from h1 to h6
20
20
 
21
21
 
22
22
  def generate_chapter(
@@ -28,15 +28,14 @@ def generate_chapter(
28
28
  template="part.xhtml",
29
29
  i18n=i18n,
30
30
  content=[
31
- serialize_element(child)
32
- for child in _render_contents(context, chapter)
31
+ serialize_element(child) for child in _render_contents(context, chapter)
33
32
  ],
34
33
  citations=[
35
- serialize_element(child)
36
- for child in _render_footnotes(context, chapter)
34
+ serialize_element(child) for child in _render_footnotes(context, chapter)
37
35
  ],
38
36
  )
39
37
 
38
+
40
39
  def _render_contents(
41
40
  context: Context,
42
41
  chapter: Chapter,
@@ -46,6 +45,7 @@ def _render_contents(
46
45
  if layout is not None:
47
46
  yield layout
48
47
 
48
+
49
49
  def _render_footnotes(
50
50
  context: Context,
51
51
  chapter: Chapter,
@@ -115,6 +115,6 @@ def _render_content_block(context: Context, block: ContentBlock) -> Element | No
115
115
  return blockquote
116
116
 
117
117
  return container
118
-
118
+
119
119
  else:
120
120
  return None
@@ -6,9 +6,7 @@ from .xml_utils import set_epub_type
6
6
 
7
7
 
8
8
  def render_inline_content(
9
- context: Context,
10
- parent: Element,
11
- content: list[str | Mark | Formula | HTMLTag]
9
+ context: Context, parent: Element, content: list[str | Mark | Formula | HTMLTag]
12
10
  ) -> None:
13
11
  current_element = parent
14
12
  for item in content:
@@ -31,6 +29,7 @@ def render_inline_content(
31
29
 
32
30
  elif isinstance(item, Formula):
33
31
  from .gen_asset import render_inline_formula # avoid circular import
32
+
34
33
  formula_element = render_inline_formula(context, item)
35
34
  if formula_element is not None:
36
35
  parent.append(formula_element)
@@ -56,4 +55,4 @@ def render_html_tag(context: Context, tag: HTMLTag) -> Element:
56
55
  for attr, value in tag.attributes:
57
56
  element.set(attr, value)
58
57
  render_inline_content(context, element, tag.content)
59
- return element
58
+ return element
@@ -6,13 +6,14 @@ from uuid import uuid4
6
6
  from zipfile import ZipFile
7
7
 
8
8
  from ..context import Context, Template
9
- from ..html_tag import search_content
10
9
  from ..i18n import I18N
11
10
  from ..options import LaTeXRender, TableRender
12
- from ..types import BasicAsset, Chapter, ContentBlock, EpubData, Formula, TextBlock
11
+ from ..types import EpubData
12
+ from ..validate import validate_chapter, validate_epub_data
13
13
  from .gen_chapter import generate_chapter
14
14
  from .gen_nav import gen_nav
15
15
  from .gen_toc import TocPoint, gen_toc, iter_toc
16
+ from .xml_utils import MATHML_NS
16
17
 
17
18
 
18
19
  def generate_epub(
@@ -23,6 +24,9 @@ def generate_epub(
23
24
  latex_render: LaTeXRender = LaTeXRender.MATHML,
24
25
  assert_not_aborted: Callable[[], None] = lambda: None,
25
26
  ) -> None:
27
+ # Validate epub_data for invalid Unicode characters before processing
28
+ validate_epub_data(epub_data)
29
+
26
30
  i18n = I18N(lan)
27
31
  template = Template()
28
32
  epub_file_path = Path(epub_file_path)
@@ -114,12 +118,14 @@ def _write_chapters_from_data(
114
118
  ):
115
119
  for file_name, get_chapter in _search_chapters(epub_data, toc_points):
116
120
  chapter = get_chapter()
121
+ # Validate chapter content for invalid Unicode characters
122
+ validate_chapter(chapter, context=f"Chapter '{file_name}'")
117
123
  data = generate_chapter(context, chapter, i18n)
118
124
  context.file.writestr(
119
125
  zinfo_or_arcname="OEBPS/Text/" + file_name,
120
126
  data=data.encode("utf-8"),
121
127
  )
122
- if latex_render == LaTeXRender.MATHML and _chapter_has_formula(chapter):
128
+ if latex_render == LaTeXRender.MATHML and MATHML_NS in data:
123
129
  context.mark_chapter_has_mathml(file_name)
124
130
  assert_not_aborted()
125
131
 
@@ -131,34 +137,6 @@ def _search_chapters(epub_data: EpubData, toc_points: list[TocPoint]):
131
137
  yield ref.file_name, ref.get_chapter
132
138
 
133
139
 
134
- def _chapter_has_formula(chapter: Chapter) -> bool:
135
- for element in chapter.elements:
136
- if _content_block_has_formula(element):
137
- return True
138
- for footnote in chapter.footnotes:
139
- for content_block in footnote.contents:
140
- if _content_block_has_formula(content_block):
141
- return True
142
- return False
143
-
144
-
145
- def _content_block_has_formula(content_block: ContentBlock) -> bool:
146
- if isinstance(content_block, Formula):
147
- return True
148
- if isinstance(content_block, TextBlock):
149
- for item in search_content(content_block.content):
150
- if isinstance(item, Formula):
151
- return True
152
- if isinstance(content_block, BasicAsset):
153
- for item in search_content(content_block.title):
154
- if isinstance(item, Formula):
155
- return True
156
- for item in search_content(content_block.caption):
157
- if isinstance(item, Formula):
158
- return True
159
- return False
160
-
161
-
162
140
  def _write_basic_files(
163
141
  context: Context,
164
142
  i18n: I18N,
@@ -21,6 +21,7 @@ class TocPoint:
21
21
  """是否有对应的 XHTML 文件"""
22
22
  return self.ref is not None
23
23
 
24
+
24
25
  @dataclass
25
26
  class TocPointRef:
26
27
  part_id: str
@@ -40,10 +41,7 @@ def gen_toc(epub_data: EpubData) -> list[TocPoint]:
40
41
  chapters = epub_data.chapters
41
42
 
42
43
  toc_point_generation = _TocPointGenerator(
43
- chapters_count=(
44
- _count_toc_items(prefaces) +
45
- _count_toc_items(chapters)
46
- ),
44
+ chapters_count=(_count_toc_items(prefaces) + _count_toc_items(chapters)),
47
45
  )
48
46
  toc_points: list[TocPoint] = []
49
47
  for chapters_list in (prefaces, chapters):
@@ -91,15 +89,12 @@ class _TocPointGenerator:
91
89
  file_name=f"part{part_id}.xhtml",
92
90
  get_chapter=toc_item.get_chapter,
93
91
  )
94
- order = self._next_order # 确保 order 以中序遍历为顺序
92
+ order = self._next_order # 确保 order 以中序遍历为顺序
95
93
  self._next_order += 1
96
94
 
97
95
  return TocPoint(
98
- title=toc_item.title,
96
+ title=toc_item.title,
99
97
  order=order,
100
- ref=ref,
101
- children=[
102
- self._create_toc_point(child)
103
- for child in toc_item.children
104
- ],
98
+ ref=ref,
99
+ children=[self._create_toc_point(child) for child in toc_item.children],
105
100
  )
@@ -1,19 +1,19 @@
1
1
  import re
2
- from typing import Container
3
2
  from xml.etree.ElementTree import Element, tostring
4
3
 
4
+ MATHML_NS = "http://www.w3.org/1998/Math/MathML"
5
5
  _EPUB_NS = "http://www.idpf.org/2007/ops"
6
- _MATHML_NS = "http://www.w3.org/1998/Math/MathML"
7
6
 
8
7
 
9
8
  def set_epub_type(element: Element, epub_type: str) -> None:
10
9
  element.set(f"{{{_EPUB_NS}}}type", epub_type)
11
10
 
11
+
12
12
  def serialize_element(element: Element) -> str:
13
13
  xml_string = tostring(element, encoding="unicode")
14
14
  for prefix, namespace_uri, keep_xmlns in (
15
15
  ("epub", _EPUB_NS, False), # EPUB namespace: remove xmlns (declared at root)
16
- ("m", _MATHML_NS, True), # MathML namespace: keep xmlns with clean prefix
16
+ ("m", MATHML_NS, True), # MathML namespace: keep xmlns with clean prefix
17
17
  ):
18
18
  xml_string = xml_string.replace(f"{{{namespace_uri}}}", f"{prefix}:")
19
19
  pattern = r"xmlns:(ns\d+)=\"" + re.escape(namespace_uri) + r"\""
@@ -22,33 +22,29 @@ def serialize_element(element: Element) -> str:
22
22
  for ns_prefix in matches:
23
23
  if keep_xmlns:
24
24
  xml_string = xml_string.replace(
25
- f" xmlns:{ns_prefix}=\"{namespace_uri}\"",
26
- f" xmlns:{prefix}=\"{namespace_uri}\""
25
+ f' xmlns:{ns_prefix}="{namespace_uri}"',
26
+ f' xmlns:{prefix}="{namespace_uri}"',
27
27
  )
28
28
  else:
29
- xml_string = xml_string.replace(f" xmlns:{ns_prefix}=\"{namespace_uri}\"", "")
29
+ xml_string = xml_string.replace(
30
+ f' xmlns:{ns_prefix}="{namespace_uri}"', ""
31
+ )
30
32
  xml_string = xml_string.replace(f"{ns_prefix}:", f"{prefix}:")
31
33
 
32
34
  return xml_string
33
35
 
34
- def indent(elem: Element, level: int = 0, skip_tags: Container[str] = ()) -> Element:
36
+
37
+ def indent(elem: Element, level: int = 0) -> Element:
35
38
  indent_str = " " * level
36
39
  next_indent_str = " " * (level + 1)
37
-
38
- if elem.tag in skip_tags:
39
- if level > 0 and (not elem.tail or not elem.tail.strip()):
40
- elem.tail = "\n" + indent_str
41
- return elem
42
-
43
40
  if len(elem):
44
41
  if not elem.text or not elem.text.strip():
45
42
  elem.text = "\n" + next_indent_str
46
43
  for i, child in enumerate(elem):
47
- indent(child, level + 1, skip_tags)
48
- if i < len(elem) - 1:
49
- child.tail = "\n" + next_indent_str
50
- else:
51
- child.tail = "\n" + indent_str
52
- elif level > 0 and (not elem.tail or not elem.tail.strip()):
53
- elem.tail = "\n" + indent_str
44
+ indent(child, level + 1)
45
+ if not child.tail or not child.tail.strip():
46
+ if i == len(elem) - 1:
47
+ child.tail = "\n" + indent_str
48
+ else:
49
+ child.tail = "\n" + next_indent_str
54
50
  return elem
@@ -3,9 +3,11 @@ from typing import Generator
3
3
  from .types import Formula, HTMLTag, Mark
4
4
 
5
5
 
6
- def search_content(content: list[str | Mark | Formula | HTMLTag]) -> Generator[str | Mark | Formula, None, None]:
6
+ def search_content(
7
+ content: list[str | Mark | Formula | HTMLTag],
8
+ ) -> Generator[str | Mark | Formula, None, None]:
7
9
  for child in content:
8
10
  if isinstance(child, HTMLTag):
9
11
  yield from search_content(child.content)
10
12
  else:
11
- yield child
13
+ yield child
epub_generator/types.py CHANGED
@@ -24,6 +24,7 @@ class EpubData:
24
24
  cover_image_path: Path | None = None
25
25
  """Cover image file path (optional, absolute path)"""
26
26
 
27
+
27
28
  @dataclass
28
29
  class BookMeta:
29
30
  """Book metadata information."""
@@ -57,9 +58,11 @@ class BookMeta:
57
58
  # Table of Contents structure
58
59
  # ============================================================================
59
60
 
61
+
60
62
  @dataclass
61
63
  class TocItem:
62
64
  """Table of contents item with title, content, and optional nested children."""
65
+
63
66
  title: str
64
67
  """Chapter title displayed in table of contents"""
65
68
 
@@ -69,6 +72,7 @@ class TocItem:
69
72
  children: "list[TocItem]" = field(default_factory=list)
70
73
  """Nested sub-chapters (recursive, optional)"""
71
74
 
75
+
72
76
  class TextKind(Enum):
73
77
  BODY = "body"
74
78
  """Regular paragraph."""
@@ -77,21 +81,29 @@ class TextKind(Enum):
77
81
  QUOTE = "quote"
78
82
  """Quoted text."""
79
83
 
84
+
80
85
  @dataclass
81
86
  class Mark:
82
87
  """Citation reference marker."""
88
+
83
89
  id: int
84
90
  """Citation ID, matches Footnote.id"""
85
91
 
92
+
86
93
  @dataclass
87
94
  class BasicAsset:
88
95
  """Asset as a base class for other assets."""
89
96
 
90
- title: list["str | Mark | Formula | HTMLTag"] = field(default_factory=list, kw_only=True)
97
+ title: list["str | Mark | Formula | HTMLTag"] = field(
98
+ default_factory=list, kw_only=True
99
+ )
91
100
  """Asset title (before content)"""
92
- caption: list["str | Mark | Formula | HTMLTag"] = field(default_factory=list, kw_only=True)
101
+ caption: list["str | Mark | Formula | HTMLTag"] = field(
102
+ default_factory=list, kw_only=True
103
+ )
93
104
  """Asset caption (after content)"""
94
105
 
106
+
95
107
  @dataclass
96
108
  class Table(BasicAsset):
97
109
  """Table representation."""
@@ -115,6 +127,7 @@ class Image(BasicAsset):
115
127
  path: Path
116
128
  """Absolute path to the image file"""
117
129
 
130
+
118
131
  @dataclass
119
132
  class TextBlock:
120
133
  """Text block representation."""
@@ -126,9 +139,11 @@ class TextBlock:
126
139
  content: list["str | Mark | Formula | HTMLTag"]
127
140
  """Text content with optional citation marks."""
128
141
 
142
+
129
143
  @dataclass
130
144
  class Footnote:
131
145
  """Footnote/citation section."""
146
+
132
147
  id: int
133
148
  """Footnote ID"""
134
149
 
@@ -142,17 +157,21 @@ class Footnote:
142
157
  ContentBlock = TextBlock | Table | Formula | Image
143
158
  """Union of all content blocks that appear in main chapter content."""
144
159
 
160
+
145
161
  @dataclass
146
162
  class Chapter:
147
163
  """Complete content of a single chapter."""
164
+
148
165
  elements: list[ContentBlock] = field(default_factory=list)
149
166
  """Main content blocks"""
150
167
 
151
168
  footnotes: list[Footnote] = field(default_factory=list)
152
169
  """Footnotes"""
153
170
 
171
+
154
172
  ChapterGetter = Callable[[], Chapter]
155
173
 
174
+
156
175
  @dataclass
157
176
  class HTMLTag:
158
177
  """Generic HTML tag representation."""
@@ -164,4 +183,4 @@ class HTMLTag:
164
183
  """List of (attribute, value) pairs"""
165
184
 
166
185
  content: list["str | Mark | Formula | HTMLTag"] = field(default_factory=list)
167
- """Inner HTML content"""
186
+ """Inner HTML content"""
@@ -0,0 +1,226 @@
1
+ from .types import (
2
+ BasicAsset,
3
+ Chapter,
4
+ ContentBlock,
5
+ EpubData,
6
+ Footnote,
7
+ Formula,
8
+ HTMLTag,
9
+ Image,
10
+ Mark,
11
+ Table,
12
+ TextBlock,
13
+ TocItem,
14
+ )
15
+
16
+
17
+ class InvalidUnicodeError(Exception):
18
+ """Raised when invalid Unicode characters (surrogates) are detected in EPUB data."""
19
+
20
+ def __init__(self, field_path: str, invalid_char_info: str):
21
+ """Initialize with field path and character information.
22
+
23
+ Args:
24
+ field_path: Dot-separated path to the field containing invalid characters
25
+ invalid_char_info: Information about the invalid character(s)
26
+ """
27
+ self.field_path = field_path
28
+ self.invalid_char_info = invalid_char_info
29
+ super().__init__(
30
+ f"Invalid Unicode character detected in {field_path}: {invalid_char_info}"
31
+ )
32
+
33
+
34
+ def validate_epub_data(epub_data: EpubData) -> None:
35
+ """Validate an EpubData object for invalid Unicode characters.
36
+
37
+ This function checks all string fields in the EPUB data structure including:
38
+ - Book metadata (title, description, authors, etc.)
39
+ - Table of contents titles (recursively)
40
+ - Chapter content is NOT validated here (use validate_chapter separately)
41
+
42
+ Args:
43
+ epub_data: EPUB data to validate
44
+
45
+ Raises:
46
+ InvalidUnicodeError: If surrogate characters are detected in any string field
47
+ """
48
+ # Check metadata
49
+ if epub_data.meta:
50
+ meta = epub_data.meta
51
+ _check_string(meta.title, "EpubData.meta.title")
52
+ _check_string(meta.description, "EpubData.meta.description")
53
+ _check_string(meta.publisher, "EpubData.meta.publisher")
54
+ _check_string(meta.isbn, "EpubData.meta.isbn")
55
+
56
+ for i, author in enumerate(meta.authors):
57
+ _check_string(author, f"EpubData.meta.authors[{i}]")
58
+
59
+ for i, editor in enumerate(meta.editors):
60
+ _check_string(editor, f"EpubData.meta.editors[{i}]")
61
+
62
+ for i, translator in enumerate(meta.translators):
63
+ _check_string(translator, f"EpubData.meta.translators[{i}]")
64
+
65
+ # Check prefaces TOC
66
+ for i, preface in enumerate(epub_data.prefaces):
67
+ _check_toc_item(preface, f"EpubData.prefaces[{i}]")
68
+
69
+ # Check chapters TOC
70
+ for i, chapter_toc in enumerate(epub_data.chapters):
71
+ _check_toc_item(chapter_toc, f"EpubData.chapters[{i}]")
72
+
73
+
74
+ def validate_chapter(chapter: Chapter, context: str = "Chapter") -> None:
75
+ """Validate a Chapter object for invalid Unicode characters.
76
+
77
+ Args:
78
+ chapter: Chapter to validate
79
+ context: Context string for error reporting (e.g., "Chapter", "chapters[0]")
80
+
81
+ Raises:
82
+ InvalidUnicodeError: If surrogate characters are detected in any string field
83
+ """
84
+ # Check main content elements
85
+ for i, element in enumerate(chapter.elements):
86
+ _check_content_block(element, f"{context}.elements[{i}]")
87
+
88
+ # Check footnotes
89
+ for i, footnote in enumerate(chapter.footnotes):
90
+ _check_footnote(footnote, f"{context}.footnotes[{i}]")
91
+
92
+
93
+ def _check_string(value: str | None, field_path: str) -> None:
94
+ """Check if a string contains surrogate characters.
95
+
96
+ Args:
97
+ value: String to check
98
+ field_path: Path to the field for error reporting
99
+
100
+ Raises:
101
+ InvalidUnicodeError: If surrogate characters are detected
102
+ """
103
+ if value is None:
104
+ return
105
+
106
+ for i, char in enumerate(value):
107
+ code_point = ord(char)
108
+ # Check for surrogate pair range (U+D800 to U+DFFF)
109
+ if 0xD800 <= code_point <= 0xDFFF:
110
+ raise InvalidUnicodeError(
111
+ field_path=field_path,
112
+ invalid_char_info=f"surrogate character U+{code_point:04X} at position {i}",
113
+ )
114
+
115
+
116
+ def _check_string_list(
117
+ values: list[str | Mark | Formula | HTMLTag], field_path: str
118
+ ) -> None:
119
+ """Recursively check a list that may contain strings, marks, formulas, or HTML tags.
120
+
121
+ Args:
122
+ values: List to check
123
+ field_path: Path to the field for error reporting
124
+
125
+ Raises:
126
+ InvalidUnicodeError: If surrogate characters are detected
127
+ """
128
+ for i, item in enumerate(values):
129
+ item_path = f"{field_path}[{i}]"
130
+ if isinstance(item, str):
131
+ _check_string(item, item_path)
132
+ elif isinstance(item, Mark):
133
+ pass # Mark only contains int ID
134
+ elif isinstance(item, Formula):
135
+ _check_string(item.latex_expression, f"{item_path}.latex_expression")
136
+ _check_string_list(item.title, f"{item_path}.title")
137
+ _check_string_list(item.caption, f"{item_path}.caption")
138
+ elif isinstance(item, HTMLTag):
139
+ _check_html_tag(item, item_path)
140
+
141
+
142
+ def _check_html_tag(tag: HTMLTag, field_path: str) -> None:
143
+ """Check an HTML tag for invalid characters.
144
+
145
+ Args:
146
+ tag: HTML tag to check
147
+ field_path: Path to the field for error reporting
148
+
149
+ Raises:
150
+ InvalidUnicodeError: If surrogate characters are detected
151
+ """
152
+ _check_string(tag.name, f"{field_path}.name")
153
+
154
+ for i, (attr_name, attr_value) in enumerate(tag.attributes):
155
+ _check_string(attr_name, f"{field_path}.attributes[{i}][0]")
156
+ _check_string(attr_value, f"{field_path}.attributes[{i}][1]")
157
+
158
+ _check_string_list(tag.content, f"{field_path}.content")
159
+
160
+
161
+ def _check_basic_asset(asset: BasicAsset, field_path: str) -> None:
162
+ """Check BasicAsset (and subclasses) for invalid characters.
163
+
164
+ Args:
165
+ asset: Asset to check
166
+ field_path: Path to the field for error reporting
167
+
168
+ Raises:
169
+ InvalidUnicodeError: If surrogate characters are detected
170
+ """
171
+ _check_string_list(asset.title, f"{field_path}.title")
172
+ _check_string_list(asset.caption, f"{field_path}.caption")
173
+
174
+ if isinstance(asset, Formula):
175
+ _check_string(asset.latex_expression, f"{field_path}.latex_expression")
176
+ elif isinstance(asset, Table):
177
+ _check_html_tag(asset.html_content, f"{field_path}.html_content")
178
+ elif isinstance(asset, Image):
179
+ pass # Image only contains Path, no string content to check
180
+
181
+
182
+ def _check_content_block(block: ContentBlock, field_path: str) -> None:
183
+ """Check a content block for invalid characters.
184
+
185
+ Args:
186
+ block: Content block to check
187
+ field_path: Path to the field for error reporting
188
+
189
+ Raises:
190
+ InvalidUnicodeError: If surrogate characters are detected
191
+ """
192
+ if isinstance(block, TextBlock):
193
+ _check_string_list(block.content, f"{field_path}.content")
194
+ elif isinstance(block, (Table, Formula, Image)):
195
+ _check_basic_asset(block, field_path)
196
+
197
+
198
+ def _check_footnote(footnote: Footnote, field_path: str) -> None:
199
+ """Check a footnote for invalid characters.
200
+
201
+ Args:
202
+ footnote: Footnote to check
203
+ field_path: Path to the field for error reporting
204
+
205
+ Raises:
206
+ InvalidUnicodeError: If surrogate characters are detected
207
+ """
208
+ for i, content_block in enumerate(footnote.contents):
209
+ _check_content_block(content_block, f"{field_path}.contents[{i}]")
210
+
211
+
212
+ def _check_toc_item(item: TocItem, field_path: str) -> None:
213
+ """Recursively check a TOC item for invalid characters.
214
+
215
+ Args:
216
+ item: TOC item to check
217
+ field_path: Path to the field for error reporting
218
+
219
+ Raises:
220
+ InvalidUnicodeError: If surrogate characters are detected
221
+ """
222
+ _check_string(item.title, f"{field_path}.title")
223
+
224
+ # Check nested children recursively
225
+ for i, child in enumerate(item.children):
226
+ _check_toc_item(child, f"{field_path}.children[{i}]")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: epub-generator
3
- Version: 0.1.5
3
+ Version: 0.1.7
4
4
  Summary: A simple Python EPUB 3.0 generator with a single API call
5
5
  License: MIT
6
6
  Keywords: epub,epub3,ebook,generator,publishing
@@ -0,0 +1,27 @@
1
+ epub_generator/__init__.py,sha256=G1P_GAUym94iv56PPK31641vlYrukUoMJZgWtmKscog,768
2
+ epub_generator/context.py,sha256=oS39IdttvQ2MR_jJS2DTG62YMbZtYr2c7iQ1jpyc9Rg,4324
3
+ epub_generator/data/container.xml.jinja,sha256=SkACyZgsAVUS5lmiCEhq3SpbFspYdyCnRNjWnLztLt0,252
4
+ epub_generator/data/content.opf.jinja,sha256=DDaR9GZnSBcpNk2BWUu56Uo_248TA91AxE4tKsBuKnQ,2839
5
+ epub_generator/data/cover.xhtml.jinja,sha256=heounlnHfOd-RNFIeytZQtAQ11ByPOiM1aB1lVyY6V4,328
6
+ epub_generator/data/mimetype.jinja,sha256=5GjjUNEUPrZI9gx7C9YDEQHsBUSjYcp07O8laskB9Is,20
7
+ epub_generator/data/nav.xhtml.jinja,sha256=zk5hf-MYoKxd4pcshZV5VliVrtDIgfH7n9f3-1L1cY0,1132
8
+ epub_generator/data/part.xhtml.jinja,sha256=FEQaUjHfCy7EJyyvYZj-6T-lkDcsmz1wvsk0b8LU3E0,558
9
+ epub_generator/data/style.css.jinja,sha256=n_DE-z97ikGzD3qufSwX_1iqkQcE_5kXiCIhyoXNjRA,1400
10
+ epub_generator/generation/__init__.py,sha256=tqkUQbu27fU2JZWpg17THnZzoIfCTelE0i70gmaPT6Q,36
11
+ epub_generator/generation/gen_asset.py,sha256=xJOtJrdrh_y0g78AoqxyES_s5g2cD-DY2I0bgLWQWHk,5975
12
+ epub_generator/generation/gen_chapter.py,sha256=UrnzjEcPIACYNrjM84AkgyidAg128y_V2n5HrcD1Egs,3400
13
+ epub_generator/generation/gen_content.py,sha256=QqUhfGyXOJ1_lf4aLrYyoZxbew5wChz-qegcsmNHgJs,2012
14
+ epub_generator/generation/gen_epub.py,sha256=Sd8tC-3GhBN90EF2IOlHnhUT9-yfcA3KqhQ3BQ9V4jo,5435
15
+ epub_generator/generation/gen_nav.py,sha256=_cjOP18C1CoTn_DELIB06pyMPZZ0CPbkk4oPEvICdKs,1955
16
+ epub_generator/generation/gen_toc.py,sha256=8pe06atBNDbrMcn32nGtV5NVYqAksuW9Z5KK1C8j6Ys,2784
17
+ epub_generator/generation/xml_utils.py,sha256=Ugbdj_2HUoTZz5LgrYviYVq1MPTL9KWMkruRCQbR4Ys,1852
18
+ epub_generator/html_tag.py,sha256=NmBgeUOHXsS8a74Z5O47iYDTsErW61BJiwyEEOfdyWQ,351
19
+ epub_generator/i18n.py,sha256=-L6J6hsy796_IQ4nLpNtAeXIkRM6oFSWSHDlRZXW8aA,705
20
+ epub_generator/options.py,sha256=Er1dnaNvzDSnZRSRJGSqhkJsv1XtsCW2Ym_hUc8o_QI,181
21
+ epub_generator/template.py,sha256=RdN2QRICIrYMzpxCU_x4m4V9WWZEP9VvT6QLp2YCm90,1556
22
+ epub_generator/types.py,sha256=O1PX8pGq1fyEiLA8gQ8Jaq1ldG5w3mYV7sCgTZowaDI,4484
23
+ epub_generator/validate.py,sha256=iZkHH5Xl_U5hGHusytb4sIQTomqI0BMHjKZL06c9lrI,7550
24
+ epub_generator-0.1.7.dist-info/LICENSE,sha256=9Zt_a4mrzkvR2rc0UbqTgbboIjWuumDFgeQyKos0H2E,1066
25
+ epub_generator-0.1.7.dist-info/METADATA,sha256=_RPI7Q8ixvTjibSkNFvnJ-oIPo25iuVkAHisTsjo3mw,16555
26
+ epub_generator-0.1.7.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
27
+ epub_generator-0.1.7.dist-info/RECORD,,
@@ -1,26 +0,0 @@
1
- epub_generator/__init__.py,sha256=5fFpZdgB4-FfXgCpE5IshBfrfrMaxNQK4SRKaKV2RdI,682
2
- epub_generator/context.py,sha256=9jHRpnQsNooRUSBoY_tiQ7aQ_AMZmyKUO22gPoO8Koc,4324
3
- epub_generator/data/container.xml.jinja,sha256=SkACyZgsAVUS5lmiCEhq3SpbFspYdyCnRNjWnLztLt0,252
4
- epub_generator/data/content.opf.jinja,sha256=DDaR9GZnSBcpNk2BWUu56Uo_248TA91AxE4tKsBuKnQ,2839
5
- epub_generator/data/cover.xhtml.jinja,sha256=heounlnHfOd-RNFIeytZQtAQ11ByPOiM1aB1lVyY6V4,328
6
- epub_generator/data/mimetype.jinja,sha256=5GjjUNEUPrZI9gx7C9YDEQHsBUSjYcp07O8laskB9Is,20
7
- epub_generator/data/nav.xhtml.jinja,sha256=zk5hf-MYoKxd4pcshZV5VliVrtDIgfH7n9f3-1L1cY0,1132
8
- epub_generator/data/part.xhtml.jinja,sha256=FEQaUjHfCy7EJyyvYZj-6T-lkDcsmz1wvsk0b8LU3E0,558
9
- epub_generator/data/style.css.jinja,sha256=n_DE-z97ikGzD3qufSwX_1iqkQcE_5kXiCIhyoXNjRA,1400
10
- epub_generator/generation/__init__.py,sha256=UIscwHa8ocr2D1mk1KaP-zi3P1x9eYJzxTo0RJ2dnks,35
11
- epub_generator/generation/gen_asset.py,sha256=WYwfGUvHM_CrwTuIIH7dYm-SL-vdhkTnvaZDymZxXzg,5978
12
- epub_generator/generation/gen_chapter.py,sha256=P6kmB8hdQnJB6SCheHzu5cOmZrC5H0LqNV-uuuigX1M,3425
13
- epub_generator/generation/gen_content.py,sha256=2ojjTgalveRnk1MXQaKsY53hPCgb7NHTwbMpLOXVrss,2018
14
- epub_generator/generation/gen_epub.py,sha256=I7u8rrrslF9xoyDUsALarB2iWzY9zjKM9ZOR1wLMX1E,6184
15
- epub_generator/generation/gen_nav.py,sha256=_cjOP18C1CoTn_DELIB06pyMPZZ0CPbkk4oPEvICdKs,1955
16
- epub_generator/generation/gen_toc.py,sha256=MK2iTYBpF8VUtPHpwz5JB_H6nWsKRKpVuLzRPYGy0nw,2864
17
- epub_generator/generation/xml_utils.py,sha256=kyHBWUihT5se5n_425BcEvBpsIK6yC52W25t012QUn0,2084
18
- epub_generator/html_tag.py,sha256=P_Y0uRStCEEh7cCtpvK4t432NEcY9OLntAznvdxUF5k,343
19
- epub_generator/i18n.py,sha256=-L6J6hsy796_IQ4nLpNtAeXIkRM6oFSWSHDlRZXW8aA,705
20
- epub_generator/options.py,sha256=Er1dnaNvzDSnZRSRJGSqhkJsv1XtsCW2Ym_hUc8o_QI,181
21
- epub_generator/template.py,sha256=RdN2QRICIrYMzpxCU_x4m4V9WWZEP9VvT6QLp2YCm90,1556
22
- epub_generator/types.py,sha256=gBrdi1KYOVEnI0qEp1slLsyUw_Sd7v09uHvN8_Hf9Z8,4440
23
- epub_generator-0.1.5.dist-info/LICENSE,sha256=9Zt_a4mrzkvR2rc0UbqTgbboIjWuumDFgeQyKos0H2E,1066
24
- epub_generator-0.1.5.dist-info/METADATA,sha256=cwIGyOGFrt0hvtw_FHaaTjeoy-l-FP-SGZC4zP0MJyw,16555
25
- epub_generator-0.1.5.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
26
- epub_generator-0.1.5.dist-info/RECORD,,