PyPI - epub-generator - Versions diffs - 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl - Mend

epub-generator 0.1.4py3-none-any.whl → 0.1.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

epub_generator/__init__.py +5 -0
epub_generator/context.py +4 -5
epub_generator/data/nav.xhtml.jinja +1 -1
epub_generator/data/style.css.jinja +22 -0
epub_generator/generation/gen_asset.py +87 -35
epub_generator/generation/gen_chapter.py +8 -68
epub_generator/generation/gen_content.py +59 -0
epub_generator/generation/gen_epub.py +49 -43
epub_generator/generation/gen_nav.py +40 -62
epub_generator/generation/gen_toc.py +57 -40
epub_generator/generation/xml_utils.py +15 -0
epub_generator/i18n.py +2 -0
epub_generator/types.py +20 -9
epub_generator/validate.py +224 -0
{epub_generator-0.1.4.dist-info → epub_generator-0.1.6.dist-info}/METADATA +1 -1
epub_generator-0.1.6.dist-info/RECORD +27 -0
epub_generator-0.1.4.dist-info/RECORD +0 -25
{epub_generator-0.1.4.dist-info → epub_generator-0.1.6.dist-info}/LICENSE +0 -0
{epub_generator-0.1.4.dist-info → epub_generator-0.1.6.dist-info}/WHEEL +0 -0

epub_generator/__init__.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from .generation import generate_epub
 from .options import LaTeXRender, TableRender
 from .types import (
+    BasicAsset,
     BookMeta,
     Chapter,
     ChapterGetter,
@@ -16,10 +17,13 @@ from .types import (
     TextKind,
     TocItem,
 )
+from .validate import InvalidUnicodeError
 __all__ = [
     # Main API function
     "generate_epub",
+    # Validation
+    "InvalidUnicodeError",
     # Options
     "TableRender",
     "LaTeXRender",
@@ -35,6 +39,7 @@ __all__ = [
     "Table",
     "Formula",
     "HTMLTag",
+    "BasicAsset",
     "Image",
     "Footnote",
     "Mark",

epub_generator/context.py CHANGED Viewed

@@ -55,15 +55,14 @@ class Context:
         nodes = list(self._hash_to_node.values())
         nodes.sort(key=lambda node: node.file_name)
         return [(node.file_name, node.media_type) for node in nodes]
+    @property
+    def chapters_with_mathml(self) -> set[str]:
+        return self._chapters_with_mathml
     def mark_chapter_has_mathml(self, chapter_file_name: str) -> None:
-        """Mark a chapter as containing MathML content for EPUB 3.0 manifest properties."""
         self._chapters_with_mathml.add(chapter_file_name)
-    def chapter_has_mathml(self, chapter_file_name: str) -> bool:
-        """Check if a chapter contains MathML content."""
-        return chapter_file_name in self._chapters_with_mathml
     def use_asset(
         self,
         source_path: Path,

epub_generator/data/nav.xhtml.jinja CHANGED Viewed

@@ -20,7 +20,7 @@
         <a href="Text/head.xhtml">{{ head_chapter_title }}</a>
       </li>
 {% endif %}
-{{ toc_list|safe }}
+{{ toc_body|safe }}
     </ol>
   </nav>

epub_generator/data/style.css.jinja CHANGED Viewed

@@ -65,4 +65,26 @@ span.formula-inline img {
   vertical-align: middle;
   margin: 0 0.2em;
   max-height: 1.2em;
+}
+div.asset {
+  page-break-inside: avoid;
+  margin: 1em 0;
+}
+div.asset-title {
+  text-align: center;
+  font-weight: 600;
+  font-size: 0.95em;
+  color: #333;
+  margin-bottom: 0.5em;
+  font-style: italic;
+}
+div.asset-caption {
+  text-align: center;
+  font-size: 0.9em;
+  color: #666;
+  margin-top: 0.5em;
+  font-style: italic;
 }

epub_generator/generation/gen_asset.py CHANGED Viewed

@@ -8,7 +8,8 @@ from latex2mathml.converter import convert
 from ..context import Context
 from ..options import LaTeXRender, TableRender
-from ..types import Formula, Image, Table
+from ..types import BasicAsset, Formula, Image, Table
+from .gen_content import render_html_tag, render_inline_content
 _MEDIA_TYPE_MAP = {
     ".png": "image/png",
@@ -18,25 +19,40 @@ _MEDIA_TYPE_MAP = {
     ".svg": "image/svg+xml",
 }
-def process_table(context: Context, table: Table) -> Element | None:
-    if context.table_render == TableRender.CLIPPING:
-        return None
-    try:
-        wrapped_html = f"<div>{table.html_content}</div>"
-        parsed = fromstring(wrapped_html)
-        wrapper = Element("div", attrib={"class": "alt-wrapper"})
-        for child in parsed:
-            wrapper.append(child)
+def render_inline_formula(context: Context, formula: Formula) -> Element | None:
+    return _render_formula(
+        context=context,
+        formula=formula,
+        inline_mode=True,
+    )
-        return wrapper if len(wrapper) > 0 else None
-    except Exception:
+def render_asset_block(context: Context, block: Table | Formula | Image) -> Element | None:
+    element: Element | None = None
+    if isinstance(block, Table):
+        element = _render_table(context, block)
+    elif isinstance(block, Formula):
+        element = _render_formula(context, block, inline_mode=False)
+    elif isinstance(block, Image):
+        element = _process_image(context, block)
+    return element
+def _render_table(context: Context, table: Table) -> Element | None:
+    if context.table_render == TableRender.CLIPPING:
         return None
+    return _wrap_asset_content(
+        context=context,
+        asset=table,
+        content_element=render_html_tag(context, table.html_content),
+    )
-def process_formula(
-        context: Context,
-        formula: Formula,
+def _render_formula(
+        context: Context,
+        formula: Formula,
         inline_mode: bool,
     ) -> Element | None:
@@ -47,9 +63,10 @@ def process_formula(
     if not latex_expr:
         return None
+    content_element = None
     if context.latex_render == LaTeXRender.MATHML:
-        return _latex2mathml(
-            latex=latex_expr,
+        content_element = _latex2mathml(
+            latex=latex_expr,
             inline_mode=inline_mode,
         )
     elif context.latex_render == LaTeXRender.SVG:
@@ -64,31 +81,40 @@ def process_formula(
         img_element = Element("img")
         img_element.set("src", f"../assets/{file_name}")
         img_element.set("alt", "formula")
+        content_element = img_element
-        if inline_mode:
-            wrapper = Element("span", attrib={"class": "formula-inline"})
-        else:
-            wrapper = Element("div", attrib={"class": "alt-wrapper"})
+    if content_element is None:
+        return None
-        wrapper.append(img_element)
-        return wrapper
+    return _wrap_asset_content(
+        context=context,
+        asset=formula,
+        content_element=content_element,
+        inline_mode=inline_mode,
+    )
-    return None
-def process_image(context: Context, image: Image) -> Element | None:
+def _process_image(context: Context, image: Image) -> Element:
     file_ext = image.path.suffix or ".png"
     file_name = context.use_asset(
-        source_path=image.path,
-        media_type=_MEDIA_TYPE_MAP.get(file_ext.lower(), "image/png"),
+        source_path=image.path,
+        media_type=_MEDIA_TYPE_MAP.get(file_ext.lower(), "image/png"),
         file_ext=file_ext,
     )
     img_element = Element("img")
     img_element.set("src", f"../assets/{file_name}")
-    img_element.set("alt", image.alt_text)
+    img_element.set("alt", "")  # Empty alt text, use caption instead
-    wrapper = Element("div", attrib={"class": "alt-wrapper"})
-    wrapper.append(img_element)
-    return wrapper
+    return _wrap_asset_content(
+        context=context,
+        asset=image,
+        content_element=img_element,
+    )
+def _normalize_expression(expression: str) -> str:
+    expression = expression.replace("\n", "")
+    expression = expression.strip()
+    return expression
 _ESCAPE_UNICODE_PATTERN = re.compile(r"&#x([0-9A-Fa-f]{5});")
@@ -148,9 +174,35 @@ def _latex_formula2svg(latex: str, font_size: int = 12):
         return output.getvalue()
     except Exception:
         return None
+def _wrap_asset_content(
+    context: Context,
+    asset: BasicAsset,
+    content_element: Element,
+    inline_mode: bool = False,
+) -> Element:
+    if inline_mode:
+        wrapper = Element("span", attrib={"class": "formula-inline"})
+    else:
+        wrapper = Element("div", attrib={"class": "alt-wrapper"})
+    wrapper.append(content_element)
-def _normalize_expression(expression: str) -> str:
-    expression = expression.replace("\n", "")
-    expression = expression.strip()
-    return expression
+    if not asset.title and not asset.caption:
+        return wrapper
+    container = Element("div", attrib={"class": "asset"})
+    if asset.title:
+        title_div = Element("div", attrib={"class": "asset-title"})
+        render_inline_content(context, title_div, asset.title)
+        container.append(title_div)
+    container.append(wrapper)
+    if asset.caption:
+        caption_div = Element("div", attrib={"class": "asset-caption"})
+        render_inline_content(context, caption_div, asset.caption)
+        container.append(caption_div)
+    return container

epub_generator/generation/gen_chapter.py CHANGED Viewed

@@ -7,14 +7,13 @@ from ..types import (
     Chapter,
     ContentBlock,
     Formula,
-    HTMLTag,
     Image,
-    Mark,
     Table,
     TextBlock,
     TextKind,
 )
-from .gen_asset import process_formula, process_image, process_table
+from .gen_asset import render_asset_block
+from .gen_content import render_inline_content
 from .xml_utils import serialize_element, set_epub_type
 _MAX_HEADING_LEVEL = 6 # HTML standard defines heading levels from h1 to h6
@@ -91,7 +90,10 @@ def _render_footnotes(
 def _render_content_block(context: Context, block: ContentBlock) -> Element | None:
-    if isinstance(block, TextBlock):
+    if isinstance(block, Table | Formula | Image):
+        return render_asset_block(context, block)
+    elif isinstance(block, TextBlock):
         if block.kind == TextKind.HEADLINE:
             heading_level = min(block.level + 1, _MAX_HEADING_LEVEL)
             container = Element(f"h{heading_level}")
@@ -102,7 +104,7 @@ def _render_content_block(context: Context, block: ContentBlock) -> Element | No
         else:
             raise ValueError(f"Unknown TextKind: {block.kind}")
-        _render_text_content(
+        render_inline_content(
             context=context,
             parent=container,
             content=block.content,
@@ -113,68 +115,6 @@ def _render_content_block(context: Context, block: ContentBlock) -> Element | No
             return blockquote
         return container
-    elif isinstance(block, Table):
-        return process_table(context, block)
-    elif isinstance(block, Formula):
-        return process_formula(context, block, inline_mode=False)
-    elif isinstance(block, Image):
-        return process_image(context, block)
     else:
         return None
-def _render_text_content(context: Context, parent: Element, content: list[str | Mark | Formula | HTMLTag]) -> None:
-    """Render text content with inline citation marks."""
-    current_element = parent
-    for item in content:
-        if isinstance(item, str):
-            if current_element is parent:
-                if parent.text is None:
-                    parent.text = item
-                else:
-                    parent.text += item
-            else:
-                if current_element.tail is None:
-                    current_element.tail = item
-                else:
-                    current_element.tail += item
-        elif isinstance(item, HTMLTag):
-            tag_element = Element(item.name)
-            for attr, value in item.attributes:
-                tag_element.set(attr, value)
-            _render_text_content(
-                context=context,
-                parent=tag_element,
-                content=item.content,
-            )
-            parent.append(tag_element)
-            current_element = tag_element
-        elif isinstance(item, Formula):
-            formula_element = process_formula(
-                context=context,
-                formula=item,
-                inline_mode=True,
-            )
-            if formula_element is not None:
-                parent.append(formula_element)
-                current_element = formula_element
-        elif isinstance(item, Mark):
-            # EPUB 3.0 noteref with semantic attributes
-            anchor = Element("a")
-            anchor.attrib = {
-                "id": f"ref-{item.id}",
-                "href": f"#fn-{item.id}",
-                "class": "super",
-            }
-            # Set epub:type using utility function (avoids global namespace pollution)
-            set_epub_type(anchor, "noteref")
-            anchor.text = f"[{item.id}]"
-            parent.append(anchor)
-            current_element = anchor

epub_generator/generation/gen_content.py ADDED Viewed

@@ -0,0 +1,59 @@
+from xml.etree.ElementTree import Element
+from ..context import Context
+from ..types import Formula, HTMLTag, Mark
+from .xml_utils import set_epub_type
+def render_inline_content(
+    context: Context,
+    parent: Element,
+    content: list[str | Mark | Formula | HTMLTag]
+) -> None:
+    current_element = parent
+    for item in content:
+        if isinstance(item, str):
+            if current_element is parent:
+                if parent.text is None:
+                    parent.text = item
+                else:
+                    parent.text += item
+            else:
+                if current_element.tail is None:
+                    current_element.tail = item
+                else:
+                    current_element.tail += item
+        elif isinstance(item, HTMLTag):
+            tag_element = render_html_tag(context, item)
+            parent.append(tag_element)
+            current_element = tag_element
+        elif isinstance(item, Formula):
+            from .gen_asset import render_inline_formula  # avoid circular import
+            formula_element = render_inline_formula(context, item)
+            if formula_element is not None:
+                parent.append(formula_element)
+                current_element = formula_element
+        elif isinstance(item, Mark):
+            # EPUB 3.0 noteref with semantic attributes
+            anchor = Element("a")
+            anchor.attrib = {
+                "id": f"ref-{item.id}",
+                "href": f"#fn-{item.id}",
+                "class": "super",
+            }
+            set_epub_type(anchor, "noteref")
+            anchor.text = f"[{item.id}]"
+            parent.append(anchor)
+            current_element = anchor
+def render_html_tag(context: Context, tag: HTMLTag) -> Element:
+    """Convert HTMLTag to XML Element with full inline content support."""
+    element = Element(tag.name)
+    for attr, value in tag.attributes:
+        element.set(attr, value)
+    render_inline_content(context, element, tag.content)
+    return element

epub_generator/generation/gen_epub.py CHANGED Viewed

@@ -9,10 +9,11 @@ from ..context import Context, Template
 from ..html_tag import search_content
 from ..i18n import I18N
 from ..options import LaTeXRender, TableRender
-from ..types import Chapter, EpubData, Formula, TextBlock
+from ..types import BasicAsset, Chapter, ContentBlock, EpubData, Formula, TextBlock
+from ..validate import validate_chapter, validate_epub_data
 from .gen_chapter import generate_chapter
 from .gen_nav import gen_nav
-from .gen_toc import NavPoint, gen_toc
+from .gen_toc import TocPoint, gen_toc, iter_toc
 def generate_epub(
@@ -23,13 +24,14 @@ def generate_epub(
     latex_render: LaTeXRender = LaTeXRender.MATHML,
     assert_not_aborted: Callable[[], None] = lambda: None,
 ) -> None:
+    # Validate epub_data for invalid Unicode characters before processing
+    validate_epub_data(epub_data)
     i18n = I18N(lan)
     template = Template()
     epub_file_path = Path(epub_file_path)
-    # Generate navigation points from TOC structure
     has_cover = epub_data.cover_image_path is not None
-    nav_points = gen_toc(epub_data=epub_data, has_cover=has_cover)
+    toc_points = gen_toc(epub_data=epub_data)
     epub_file_path.parent.mkdir(parents=True, exist_ok=True)
@@ -49,7 +51,7 @@ def generate_epub(
         _write_chapters_from_data(
             context=context,
             i18n=i18n,
-            nav_points=nav_points,
+            toc_points=toc_points,
             epub_data=epub_data,
             latex_render=latex_render,
             assert_not_aborted=assert_not_aborted,
@@ -58,7 +60,7 @@ def generate_epub(
             template=template,
             i18n=i18n,
             epub_data=epub_data,
-            nav_points=nav_points,
+            toc_points=toc_points,
             has_cover=has_cover,
         )
         file.writestr(
@@ -71,7 +73,7 @@ def generate_epub(
             context=context,
             i18n=i18n,
             epub_data=epub_data,
-            nav_points=nav_points,
+            toc_points=toc_points,
         )
         assert_not_aborted()
@@ -81,6 +83,7 @@ def generate_epub(
             epub_data=epub_data,
         )
 def _write_assets_from_data(
     context: Context,
     i18n: I18N,
@@ -104,62 +107,69 @@ def _write_assets_from_data(
                 arcname="OEBPS/assets/cover.png",
             )
 def _write_chapters_from_data(
     context: Context,
     i18n: I18N,
-    nav_points: list[NavPoint],
+    toc_points: list[TocPoint],
     epub_data: EpubData,
     latex_render: LaTeXRender,
     assert_not_aborted: Callable[[], None],
 ):
-    if epub_data.get_head is not None:
-        chapter = epub_data.get_head()
+    for file_name, get_chapter in _search_chapters(epub_data, toc_points):
+        chapter = get_chapter()
+        # Validate chapter content for invalid Unicode characters
+        validate_chapter(chapter, context=f"Chapter '{file_name}'")
         data = generate_chapter(context, chapter, i18n)
         context.file.writestr(
-            zinfo_or_arcname="OEBPS/Text/head.xhtml",
+            zinfo_or_arcname="OEBPS/Text/" + file_name,
             data=data.encode("utf-8"),
         )
         if latex_render == LaTeXRender.MATHML and _chapter_has_formula(chapter):
-            context.mark_chapter_has_mathml("head.xhtml")
+            context.mark_chapter_has_mathml(file_name)
         assert_not_aborted()
-    for nav_point in nav_points:
-        if nav_point.get_chapter is not None:
-            chapter = nav_point.get_chapter()
-            data = generate_chapter(context, chapter, i18n)
-            context.file.writestr(
-                zinfo_or_arcname="OEBPS/Text/" + nav_point.file_name,
-                data=data.encode("utf-8"),
-            )
-            if latex_render == LaTeXRender.MATHML and _chapter_has_formula(chapter):
-                context.mark_chapter_has_mathml(nav_point.file_name)
-            assert_not_aborted()
+def _search_chapters(epub_data: EpubData, toc_points: list[TocPoint]):
+    if epub_data.get_head is not None:
+        yield "head.xhtml", epub_data.get_head
+    for ref in iter_toc(toc_points):
+        yield ref.file_name, ref.get_chapter
 def _chapter_has_formula(chapter: Chapter) -> bool:
-    """Check if chapter contains any formulas (block-level or inline)."""
     for element in chapter.elements:
-        if isinstance(element, Formula):
+        if _content_block_has_formula(element):
             return True
-        if isinstance(element, TextBlock):
-            for item in search_content(element.content):
-                if isinstance(item, Formula):
-                    return True
     for footnote in chapter.footnotes:
         for content_block in footnote.contents:
-            if isinstance(content_block, Formula):
+            if _content_block_has_formula(content_block):
                 return True
-            if isinstance(content_block, TextBlock):
-                for item in search_content(content_block.content):
-                    if isinstance(item, Formula):
-                        return True
     return False
+def _content_block_has_formula(content_block: ContentBlock) -> bool:
+    if isinstance(content_block, Formula):
+        return True
+    if isinstance(content_block, TextBlock):
+        for item in search_content(content_block.content):
+            if isinstance(item, Formula):
+                return True
+    if isinstance(content_block, BasicAsset):
+        for item in search_content(content_block.title):
+            if isinstance(item, Formula):
+                return True
+        for item in search_content(content_block.caption):
+            if isinstance(item, Formula):
+                return True
+    return False
 def _write_basic_files(
     context: Context,
     i18n: I18N,
     epub_data: EpubData,
-    nav_points: list[NavPoint],
+    toc_points: list[TocPoint],
 ):
     meta = epub_data.meta
     has_cover = epub_data.cover_image_path is not None
@@ -175,22 +185,18 @@ def _write_basic_files(
     else:
         modified_timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
-    chapters_with_mathml = {
-        nav_point.file_name
-        for nav_point in nav_points
-        if context.chapter_has_mathml(nav_point.file_name)
-    }
+    toc_refs = list(iter_toc(toc_points))
     content = context.template.render(
         template="content.opf",
         meta=meta,
         i18n=i18n,
         ISBN=isbn,
         modified_timestamp=modified_timestamp,
-        nav_points=nav_points,
+        nav_points=toc_refs,
         has_head_chapter=has_head_chapter,
         has_cover=has_cover,
         asset_files=context.used_files,
-        chapters_with_mathml=chapters_with_mathml,
+        chapters_with_mathml=context.chapters_with_mathml,
     )
     context.file.writestr(
         zinfo_or_arcname="OEBPS/content.opf",

epub_generator/generation/gen_nav.py CHANGED Viewed

@@ -1,26 +1,30 @@
-from html import escape
+from xml.etree.ElementTree import Element
 from ..context import Template
 from ..i18n import I18N
-from ..types import BookMeta, EpubData, TocItem
-from .gen_toc import NavPoint
+from ..types import BookMeta, EpubData
+from .gen_toc import TocPoint, iter_toc
+from .xml_utils import indent, serialize_element
 def gen_nav(
     template: Template,
     i18n: I18N,
     epub_data: EpubData,
-    nav_points: list[NavPoint],
+    toc_points: list[TocPoint],
     has_cover: bool = False,
 ) -> str:
     meta: BookMeta | None = epub_data.meta
     has_head_chapter = epub_data.get_head is not None
-    toc_list = _generate_toc_list(epub_data.prefaces, epub_data.chapters, nav_points)
-    first_chapter_file = nav_points[0].file_name if nav_points else None
-    head_chapter_title = ""
+    toc_body = "\n".join(_render_toc_item(toc_points))
+    first_ref = next(iter_toc(toc_points), None)
+    first_chapter_file: str = ""
+    head_chapter_title: str = ""
+    if first_ref:
+        first_chapter_file = first_ref.file_name
     if has_head_chapter and epub_data.get_head:
-        # Try to extract title from first heading if available
-        head_chapter_title = "Preface"  # Default title
+        head_chapter_title = i18n.preface
     return template.render(
         template="nav.xhtml",
@@ -29,64 +33,38 @@ def gen_nav(
         has_cover=has_cover,
         has_head_chapter=has_head_chapter,
         head_chapter_title=head_chapter_title,
-        toc_list=toc_list,
+        toc_body=toc_body,
         first_chapter_file=first_chapter_file,
     )
-def _generate_toc_list(
-    prefaces: list[TocItem],
-    chapters: list[TocItem],
-    nav_points: list[NavPoint],
-) -> str:
-    nav_point_index = 0
-    html_parts = []
-    for chapters_list in (prefaces, chapters):
-        for toc_item in chapters_list:
-            nav_point_index, item_html = _generate_toc_item(
-                toc_item, nav_points, nav_point_index
-            )
-            html_parts.append(item_html)
-    return "\n".join(html_parts)
+def _render_toc_item(toc_points: list[TocPoint]):
+    for toc_point in toc_points:
+        element = _create_toc_li_element(toc_point)
+        element = indent(element)
+        yield serialize_element(element)
-def _generate_toc_item(
-    toc_item: TocItem,
-    nav_points: list[NavPoint],
-    nav_point_index: int,
-) -> tuple[int, str]:
-    title_escaped = escape(toc_item.title)
-    file_name = None
-    if toc_item.get_chapter is not None and nav_point_index < len(nav_points):
-        file_name = nav_points[nav_point_index].file_name
-        nav_point_index += 1
-    children_html = []
-    for child in toc_item.children:
-        nav_point_index, child_html = _generate_toc_item(
-            child, nav_points, nav_point_index
-        )
-        children_html.append(child_html)
+def _create_toc_li_element(toc_point: TocPoint) -> Element:
+    li = Element("li")
-    if file_name is None and children_html:
-        if nav_point_index > 0:
-            for i in range(nav_point_index - len(toc_item.children), nav_point_index):
-                if i < len(nav_points):
-                    file_name = nav_points[i].file_name
-                    break
-    if file_name:
-        html_parts = [f'      <li>\n        <a href="Text/{file_name}">{title_escaped}</a>']
+    if toc_point.ref is not None:
+        file_name = toc_point.ref.file_name
+        link = Element("a")
+        link.set("href", f"Text/{file_name}")
+        link.text = toc_point.title
+        li.append(link)
     else:
-        html_parts = [f'      <li>\n        <span>{title_escaped}</span>']
-    if children_html:
-        html_parts.append('        <ol>')
-        html_parts.extend(children_html)
-        html_parts.append('        </ol>')
-    html_parts.append('      </li>')
-    return nav_point_index, "\n".join(html_parts)
+        span = Element("span")
+        span.text = toc_point.title
+        li.append(span)
+    # 递归处理子节点
+    if toc_point.children:
+        ol = Element("ol")
+        for child in toc_point.children:
+            child_li = _create_toc_li_element(child)
+            ol.append(child_li)
+        li.append(ol)
+    return li

epub_generator/generation/gen_toc.py CHANGED Viewed

@@ -1,36 +1,57 @@
 from dataclasses import dataclass
-from typing import Any, Callable
+from typing import Any, Callable, Generator
 from ..types import EpubData, TocItem
 @dataclass
-class NavPoint:
-    toc_id: int
-    file_name: str
+class TocPoint:
+    title: str
     order: int
-    get_chapter: Callable[[], Any] | None = None
+    ref: "TocPointRef | None"
+    children: list["TocPoint"]
+    @property
+    def is_placeholder(self) -> bool:
+        """是否为占位节点（无对应文件）"""
+        return self.ref is None
+    @property
+    def has_file(self) -> bool:
+        """是否有对应的 XHTML 文件"""
+        return self.ref is not None
+@dataclass
+class TocPointRef:
+    part_id: str
+    file_name: str
+    get_chapter: Callable[[], Any]
-def gen_toc(
-    epub_data: EpubData,
-    has_cover: bool = False,
-) -> list[NavPoint]:
+def iter_toc(toc_points: list[TocPoint]) -> Generator[TocPointRef, None, None]:
+    for toc_point in toc_points:
+        if toc_point.ref:
+            yield toc_point.ref
+        yield from iter_toc(toc_point.children)
+def gen_toc(epub_data: EpubData) -> list[TocPoint]:
     prefaces = epub_data.prefaces
     chapters = epub_data.chapters
-    nav_point_generation = _NavPointGenerator(
-        has_cover=has_cover,
+    toc_point_generation = _TocPointGenerator(
         chapters_count=(
             _count_toc_items(prefaces) +
             _count_toc_items(chapters)
         ),
     )
+    toc_points: list[TocPoint] = []
     for chapters_list in (prefaces, chapters):
         for toc_item in chapters_list:
-            nav_point_generation.generate(toc_item)
+            toc_point = toc_point_generation.generate(toc_item)
+            toc_points.append(toc_point)
-    return nav_point_generation.nav_points
+    return toc_points
 def _count_toc_items(items: list[TocItem]) -> int:
@@ -50,39 +71,35 @@ def _max_depth_toc_items(items: list[TocItem]) -> int:
     return max_depth
-class _NavPointGenerator:
-    def __init__(self, has_cover: bool, chapters_count: int):
-        self._nav_points: list[NavPoint] = []
-        self._next_order: int = 2 if has_cover else 1
+class _TocPointGenerator:
+    def __init__(self, chapters_count: int):
+        self._next_order: int = 0
         self._next_id: int = 1
         self._digits = len(str(chapters_count))
-    @property
-    def nav_points(self) -> list[NavPoint]:
-        return self._nav_points
-    def generate(self, toc_item: TocItem) -> None:
-        self._create_nav_point(toc_item)
+    def generate(self, toc_item: TocItem) -> TocPoint:
+        return self._create_toc_point(toc_item)
-    def _create_nav_point(self, toc_item: TocItem) -> NavPoint:
-        nav_point: NavPoint | None = None
+    def _create_toc_point(self, toc_item: TocItem) -> TocPoint:
+        ref: TocPointRef | None = None
         if toc_item.get_chapter is not None:
-            toc_id = self._next_id
+            part_id = self._next_id
             self._next_id += 1
-            part_id = str(toc_id).zfill(self._digits)
-            nav_point = NavPoint(
-                toc_id=toc_id,
+            part_id = str(part_id).zfill(self._digits)
+            ref = TocPointRef(
+                part_id=part_id,
                 file_name=f"part{part_id}.xhtml",
-                order=self._next_order,
                 get_chapter=toc_item.get_chapter,
             )
-            self._nav_points.append(nav_point)
-            self._next_order += 1
-        for child in toc_item.children:
-            child_nav_point = self._create_nav_point(child)
-            if nav_point is None:
-                nav_point = child_nav_point
-        assert nav_point is not None, "TocItem has no chapter and no valid children"
-        return nav_point
+        order = self._next_order # 确保 order 以中序遍历为顺序
+        self._next_order += 1
+        return TocPoint(
+            title=toc_item.title,
+            order=order,
+            ref=ref,
+            children=[
+                self._create_toc_point(child)
+                for child in toc_item.children
+            ],
+        )

epub_generator/generation/xml_utils.py CHANGED Viewed

@@ -29,3 +29,18 @@ def serialize_element(element: Element) -> str:
             xml_string = xml_string.replace(f"{ns_prefix}:", f"{prefix}:")
     return xml_string
+def indent(elem: Element, level: int = 0) -> Element:
+    indent_str = "  " * level
+    next_indent_str = "  " * (level + 1)
+    if len(elem):
+        if not elem.text or not elem.text.strip():
+            elem.text = "\n" + next_indent_str
+        for i, child in enumerate(elem):
+            indent(child, level + 1)
+            if not child.tail or not child.tail.strip():
+                if i == len(elem) - 1:
+                    child.tail = "\n" + indent_str
+                else:
+                    child.tail = "\n" + next_indent_str
+    return elem

epub_generator/i18n.py CHANGED Viewed

@@ -9,9 +9,11 @@ class I18N:
             self.table_of_contents: str = "目录"
             self.landmarks: str = "路标"
             self.start_of_content: str = "正文开始"
+            self.preface: str = "前言"
         elif lan == "en":
             self.unnamed: str = "Unnamed"
             self.cover: str = "Cover"
             self.table_of_contents: str = "Table of Contents"
             self.landmarks: str = "Landmarks"
             self.start_of_content: str = "Start of Content"
+            self.preface: str = "Preface"

epub_generator/types.py CHANGED Viewed

@@ -84,30 +84,41 @@ class Mark:
     """Citation ID, matches Footnote.id"""
 @dataclass
-class Table:
-    """HTML table."""
-    html_content: str
-    """HTML table markup"""
+class BasicAsset:
+    """Asset as a base class for other assets."""
+    title: list["str | Mark | Formula | HTMLTag"] = field(default_factory=list, kw_only=True)
+    """Asset title (before content)"""
+    caption: list["str | Mark | Formula | HTMLTag"] = field(default_factory=list, kw_only=True)
+    """Asset caption (after content)"""
 @dataclass
-class Formula:
+class Table(BasicAsset):
+    """Table representation."""
+    html_content: "HTMLTag"
+    """HTML content of the table"""
+@dataclass
+class Formula(BasicAsset):
     """Mathematical formula."""
     latex_expression: str
     """LaTeX expression"""
 @dataclass
-class Image:
+class Image(BasicAsset):
     """Image reference."""
     path: Path
     """Absolute path to the image file"""
-    alt_text: str = "image"
-    """Alt text (defaults to "image")"""
 @dataclass
 class TextBlock:
+    """Text block representation."""
     kind: TextKind
     """Kind of text block."""
     level: int

epub_generator/validate.py ADDED Viewed

@@ -0,0 +1,224 @@
+from .types import (
+    BasicAsset,
+    Chapter,
+    ContentBlock,
+    EpubData,
+    Footnote,
+    Formula,
+    HTMLTag,
+    Image,
+    Mark,
+    Table,
+    TextBlock,
+    TocItem,
+)
+class InvalidUnicodeError(Exception):
+    """Raised when invalid Unicode characters (surrogates) are detected in EPUB data."""
+    def __init__(self, field_path: str, invalid_char_info: str):
+        """Initialize with field path and character information.
+        Args:
+            field_path: Dot-separated path to the field containing invalid characters
+            invalid_char_info: Information about the invalid character(s)
+        """
+        self.field_path = field_path
+        self.invalid_char_info = invalid_char_info
+        super().__init__(
+            f"Invalid Unicode character detected in {field_path}: {invalid_char_info}"
+        )
+def validate_epub_data(epub_data: EpubData) -> None:
+    """Validate an EpubData object for invalid Unicode characters.
+    This function checks all string fields in the EPUB data structure including:
+    - Book metadata (title, description, authors, etc.)
+    - Table of contents titles (recursively)
+    - Chapter content is NOT validated here (use validate_chapter separately)
+    Args:
+        epub_data: EPUB data to validate
+    Raises:
+        InvalidUnicodeError: If surrogate characters are detected in any string field
+    """
+    # Check metadata
+    if epub_data.meta:
+        meta = epub_data.meta
+        _check_string(meta.title, "EpubData.meta.title")
+        _check_string(meta.description, "EpubData.meta.description")
+        _check_string(meta.publisher, "EpubData.meta.publisher")
+        _check_string(meta.isbn, "EpubData.meta.isbn")
+        for i, author in enumerate(meta.authors):
+            _check_string(author, f"EpubData.meta.authors[{i}]")
+        for i, editor in enumerate(meta.editors):
+            _check_string(editor, f"EpubData.meta.editors[{i}]")
+        for i, translator in enumerate(meta.translators):
+            _check_string(translator, f"EpubData.meta.translators[{i}]")
+    # Check prefaces TOC
+    for i, preface in enumerate(epub_data.prefaces):
+        _check_toc_item(preface, f"EpubData.prefaces[{i}]")
+    # Check chapters TOC
+    for i, chapter_toc in enumerate(epub_data.chapters):
+        _check_toc_item(chapter_toc, f"EpubData.chapters[{i}]")
+def validate_chapter(chapter: Chapter, context: str = "Chapter") -> None:
+    """Validate a Chapter object for invalid Unicode characters.
+    Args:
+        chapter: Chapter to validate
+        context: Context string for error reporting (e.g., "Chapter", "chapters[0]")
+    Raises:
+        InvalidUnicodeError: If surrogate characters are detected in any string field
+    """
+    # Check main content elements
+    for i, element in enumerate(chapter.elements):
+        _check_content_block(element, f"{context}.elements[{i}]")
+    # Check footnotes
+    for i, footnote in enumerate(chapter.footnotes):
+        _check_footnote(footnote, f"{context}.footnotes[{i}]")
+def _check_string(value: str | None, field_path: str) -> None:
+    """Check if a string contains surrogate characters.
+    Args:
+        value: String to check
+        field_path: Path to the field for error reporting
+    Raises:
+        InvalidUnicodeError: If surrogate characters are detected
+    """
+    if value is None:
+        return
+    for i, char in enumerate(value):
+        code_point = ord(char)
+        # Check for surrogate pair range (U+D800 to U+DFFF)
+        if 0xD800 <= code_point <= 0xDFFF:
+            raise InvalidUnicodeError(
+                field_path=field_path,
+                invalid_char_info=f"surrogate character U+{code_point:04X} at position {i}",
+            )
+def _check_string_list(values: list[str | Mark | Formula | HTMLTag], field_path: str) -> None:
+    """Recursively check a list that may contain strings, marks, formulas, or HTML tags.
+    Args:
+        values: List to check
+        field_path: Path to the field for error reporting
+    Raises:
+        InvalidUnicodeError: If surrogate characters are detected
+    """
+    for i, item in enumerate(values):
+        item_path = f"{field_path}[{i}]"
+        if isinstance(item, str):
+            _check_string(item, item_path)
+        elif isinstance(item, Mark):
+            pass  # Mark only contains int ID
+        elif isinstance(item, Formula):
+            _check_string(item.latex_expression, f"{item_path}.latex_expression")
+            _check_string_list(item.title, f"{item_path}.title")
+            _check_string_list(item.caption, f"{item_path}.caption")
+        elif isinstance(item, HTMLTag):
+            _check_html_tag(item, item_path)
+def _check_html_tag(tag: HTMLTag, field_path: str) -> None:
+    """Check an HTML tag for invalid characters.
+    Args:
+        tag: HTML tag to check
+        field_path: Path to the field for error reporting
+    Raises:
+        InvalidUnicodeError: If surrogate characters are detected
+    """
+    _check_string(tag.name, f"{field_path}.name")
+    for i, (attr_name, attr_value) in enumerate(tag.attributes):
+        _check_string(attr_name, f"{field_path}.attributes[{i}][0]")
+        _check_string(attr_value, f"{field_path}.attributes[{i}][1]")
+    _check_string_list(tag.content, f"{field_path}.content")
+def _check_basic_asset(asset: BasicAsset, field_path: str) -> None:
+    """Check BasicAsset (and subclasses) for invalid characters.
+    Args:
+        asset: Asset to check
+        field_path: Path to the field for error reporting
+    Raises:
+        InvalidUnicodeError: If surrogate characters are detected
+    """
+    _check_string_list(asset.title, f"{field_path}.title")
+    _check_string_list(asset.caption, f"{field_path}.caption")
+    if isinstance(asset, Formula):
+        _check_string(asset.latex_expression, f"{field_path}.latex_expression")
+    elif isinstance(asset, Table):
+        _check_html_tag(asset.html_content, f"{field_path}.html_content")
+    elif isinstance(asset, Image):
+        pass  # Image only contains Path, no string content to check
+def _check_content_block(block: ContentBlock, field_path: str) -> None:
+    """Check a content block for invalid characters.
+    Args:
+        block: Content block to check
+        field_path: Path to the field for error reporting
+    Raises:
+        InvalidUnicodeError: If surrogate characters are detected
+    """
+    if isinstance(block, TextBlock):
+        _check_string_list(block.content, f"{field_path}.content")
+    elif isinstance(block, (Table, Formula, Image)):
+        _check_basic_asset(block, field_path)
+def _check_footnote(footnote: Footnote, field_path: str) -> None:
+    """Check a footnote for invalid characters.
+    Args:
+        footnote: Footnote to check
+        field_path: Path to the field for error reporting
+    Raises:
+        InvalidUnicodeError: If surrogate characters are detected
+    """
+    for i, content_block in enumerate(footnote.contents):
+        _check_content_block(content_block, f"{field_path}.contents[{i}]")
+def _check_toc_item(item: TocItem, field_path: str) -> None:
+    """Recursively check a TOC item for invalid characters.
+    Args:
+        item: TOC item to check
+        field_path: Path to the field for error reporting
+    Raises:
+        InvalidUnicodeError: If surrogate characters are detected
+    """
+    _check_string(item.title, f"{field_path}.title")
+    # Check nested children recursively
+    for i, child in enumerate(item.children):
+        _check_toc_item(child, f"{field_path}.children[{i}]")

{epub_generator-0.1.4.dist-info → epub_generator-0.1.6.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: epub-generator
-Version: 0.1.4
+Version: 0.1.6
 Summary: A simple Python EPUB 3.0 generator with a single API call
 License: MIT
 Keywords: epub,epub3,ebook,generator,publishing

epub_generator-0.1.6.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,27 @@
+epub_generator/__init__.py,sha256=G1P_GAUym94iv56PPK31641vlYrukUoMJZgWtmKscog,768
+epub_generator/context.py,sha256=9jHRpnQsNooRUSBoY_tiQ7aQ_AMZmyKUO22gPoO8Koc,4324
+epub_generator/data/container.xml.jinja,sha256=SkACyZgsAVUS5lmiCEhq3SpbFspYdyCnRNjWnLztLt0,252
+epub_generator/data/content.opf.jinja,sha256=DDaR9GZnSBcpNk2BWUu56Uo_248TA91AxE4tKsBuKnQ,2839
+epub_generator/data/cover.xhtml.jinja,sha256=heounlnHfOd-RNFIeytZQtAQ11ByPOiM1aB1lVyY6V4,328
+epub_generator/data/mimetype.jinja,sha256=5GjjUNEUPrZI9gx7C9YDEQHsBUSjYcp07O8laskB9Is,20
+epub_generator/data/nav.xhtml.jinja,sha256=zk5hf-MYoKxd4pcshZV5VliVrtDIgfH7n9f3-1L1cY0,1132
+epub_generator/data/part.xhtml.jinja,sha256=FEQaUjHfCy7EJyyvYZj-6T-lkDcsmz1wvsk0b8LU3E0,558
+epub_generator/data/style.css.jinja,sha256=n_DE-z97ikGzD3qufSwX_1iqkQcE_5kXiCIhyoXNjRA,1400
+epub_generator/generation/__init__.py,sha256=UIscwHa8ocr2D1mk1KaP-zi3P1x9eYJzxTo0RJ2dnks,35
+epub_generator/generation/gen_asset.py,sha256=WYwfGUvHM_CrwTuIIH7dYm-SL-vdhkTnvaZDymZxXzg,5978
+epub_generator/generation/gen_chapter.py,sha256=P6kmB8hdQnJB6SCheHzu5cOmZrC5H0LqNV-uuuigX1M,3425
+epub_generator/generation/gen_content.py,sha256=2ojjTgalveRnk1MXQaKsY53hPCgb7NHTwbMpLOXVrss,2018
+epub_generator/generation/gen_epub.py,sha256=rxHBp4nP5OFi9SJBfiCrncV1fmhb0j3WKfUqofxJykc,6487
+epub_generator/generation/gen_nav.py,sha256=_cjOP18C1CoTn_DELIB06pyMPZZ0CPbkk4oPEvICdKs,1955
+epub_generator/generation/gen_toc.py,sha256=MK2iTYBpF8VUtPHpwz5JB_H6nWsKRKpVuLzRPYGy0nw,2864
+epub_generator/generation/xml_utils.py,sha256=AVnU3AN6lmqWrdgaZTV7v77L9LonI7DX59BxkMZlef8,1822
+epub_generator/html_tag.py,sha256=P_Y0uRStCEEh7cCtpvK4t432NEcY9OLntAznvdxUF5k,343
+epub_generator/i18n.py,sha256=-L6J6hsy796_IQ4nLpNtAeXIkRM6oFSWSHDlRZXW8aA,705
+epub_generator/options.py,sha256=Er1dnaNvzDSnZRSRJGSqhkJsv1XtsCW2Ym_hUc8o_QI,181
+epub_generator/template.py,sha256=RdN2QRICIrYMzpxCU_x4m4V9WWZEP9VvT6QLp2YCm90,1556
+epub_generator/types.py,sha256=gBrdi1KYOVEnI0qEp1slLsyUw_Sd7v09uHvN8_Hf9Z8,4440
+epub_generator/validate.py,sha256=KBgvBsBuVnWTc4N-29cr2P92X0w_tGR4pMemk_KHy78,7544
+epub_generator-0.1.6.dist-info/LICENSE,sha256=9Zt_a4mrzkvR2rc0UbqTgbboIjWuumDFgeQyKos0H2E,1066
+epub_generator-0.1.6.dist-info/METADATA,sha256=JziMt9LukPRKo8rPy10qf9sIiiv98CgSxKoi7juHcYE,16555
+epub_generator-0.1.6.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
+epub_generator-0.1.6.dist-info/RECORD,,

epub_generator-0.1.4.dist-info/RECORD DELETED Viewed

@@ -1,25 +0,0 @@
-epub_generator/__init__.py,sha256=CRtP7zjqNPxOA_m4S8Jgavuw_KgaKpoBW5kdgqUivLQ,648
-epub_generator/context.py,sha256=AggXhRiOg70WZTLXF78LHA4coo3fa77OvKVe5wtgP6A,4495
-epub_generator/data/container.xml.jinja,sha256=SkACyZgsAVUS5lmiCEhq3SpbFspYdyCnRNjWnLztLt0,252
-epub_generator/data/content.opf.jinja,sha256=DDaR9GZnSBcpNk2BWUu56Uo_248TA91AxE4tKsBuKnQ,2839
-epub_generator/data/cover.xhtml.jinja,sha256=heounlnHfOd-RNFIeytZQtAQ11ByPOiM1aB1lVyY6V4,328
-epub_generator/data/mimetype.jinja,sha256=5GjjUNEUPrZI9gx7C9YDEQHsBUSjYcp07O8laskB9Is,20
-epub_generator/data/nav.xhtml.jinja,sha256=FGunTu_cDJmSBxV8cfaIDjHVUNsyjogWg1jL4VK8ihU,1132
-epub_generator/data/part.xhtml.jinja,sha256=FEQaUjHfCy7EJyyvYZj-6T-lkDcsmz1wvsk0b8LU3E0,558
-epub_generator/data/style.css.jinja,sha256=HyGWoevaZD9xPDJeMQY_1xmM0f6aK0prmqoW3mhTGp0,1072
-epub_generator/generation/__init__.py,sha256=UIscwHa8ocr2D1mk1KaP-zi3P1x9eYJzxTo0RJ2dnks,35
-epub_generator/generation/gen_asset.py,sha256=0muwCvAohODC76F9G9_UBibRPQSpmMJkgQxlCsM7QcQ,4480
-epub_generator/generation/gen_chapter.py,sha256=Irb0uJjK8Q5PnHoK4PFP7CIKKzbfhIK_4thvin6hg6g,5505
-epub_generator/generation/gen_epub.py,sha256=zkG0U5_g3FY-D6zkYGqp844IgWYJhbAqf6CnX2Do71Y,6412
-epub_generator/generation/gen_nav.py,sha256=D-ZNsbm26AEAovbXtx1wSwTfH4Q8H2WYfoYeQ1Sb9bk,2813
-epub_generator/generation/gen_toc.py,sha256=yt7GYu8Rfz9aw_GPZFUl9H3BKd1za1hSm2hhp8wyI68,2488
-epub_generator/generation/xml_utils.py,sha256=xMcNZl8CaV21XYx2yeykkHhvnq5N7yRHfIFu5KRlRHc,1261
-epub_generator/html_tag.py,sha256=P_Y0uRStCEEh7cCtpvK4t432NEcY9OLntAznvdxUF5k,343
-epub_generator/i18n.py,sha256=GQjpHO7t8_0rXNuoYmO-G7_9nCF7S5kluBG0ip_2jIA,622
-epub_generator/options.py,sha256=Er1dnaNvzDSnZRSRJGSqhkJsv1XtsCW2Ym_hUc8o_QI,181
-epub_generator/template.py,sha256=RdN2QRICIrYMzpxCU_x4m4V9WWZEP9VvT6QLp2YCm90,1556
-epub_generator/types.py,sha256=Raz6MT-aIkMp6Yw9hTu4HP_ySg4kMC-YJ_o0cjYzu_A,4059
-epub_generator-0.1.4.dist-info/LICENSE,sha256=9Zt_a4mrzkvR2rc0UbqTgbboIjWuumDFgeQyKos0H2E,1066
-epub_generator-0.1.4.dist-info/METADATA,sha256=cpydosW4bVyknIbCxKP4DAhl-M8NSct7-9jX9M4BISw,16555
-epub_generator-0.1.4.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
-epub_generator-0.1.4.dist-info/RECORD,,

{epub_generator-0.1.4.dist-info → epub_generator-0.1.6.dist-info}/LICENSE RENAMED Viewed

File without changes

{epub_generator-0.1.4.dist-info → epub_generator-0.1.6.dist-info}/WHEEL RENAMED Viewed

File without changes

epub-generator 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

epub-generator 0.1.4py3-none-any.whl → 0.1.6py3-none-any.whl