epub-generator 0.0.3__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,39 @@
1
- from .gen_epub import generate_epub_file
2
- from .types import TableRender, LaTeXRender
1
+ from .generation import generate_epub
2
+ from .options import LaTeXRender, TableRender
3
+ from .types import (
4
+ BookMeta,
5
+ Chapter,
6
+ ChapterGetter,
7
+ ContentBlock,
8
+ EpubData,
9
+ Footnote,
10
+ Formula,
11
+ Image,
12
+ Mark,
13
+ Table,
14
+ Text,
15
+ TextKind,
16
+ TocItem,
17
+ )
3
18
 
4
- __all__ = ["generate_epub_file", "template", "TableRender", "LaTeXRender"]
19
+ __all__ = [
20
+ # Main API function
21
+ "generate_epub",
22
+ # Options
23
+ "TableRender",
24
+ "LaTeXRender",
25
+ # Data types
26
+ "EpubData",
27
+ "BookMeta",
28
+ "TocItem",
29
+ "Chapter",
30
+ "ChapterGetter",
31
+ "ContentBlock",
32
+ "Text",
33
+ "TextKind",
34
+ "Table",
35
+ "Formula",
36
+ "Image",
37
+ "Footnote",
38
+ "Mark",
39
+ ]
epub_generator/context.py CHANGED
@@ -1,91 +1,141 @@
1
+ from dataclasses import dataclass
2
+ from hashlib import sha256
3
+ from importlib.resources import files
1
4
  from pathlib import Path
2
5
  from typing import cast
3
6
  from zipfile import ZipFile
4
- from importlib.resources import files
5
- from jinja2 import Environment, Template as JinjaTemplate
6
7
 
8
+ from jinja2 import Environment
9
+ from jinja2 import Template as JinjaTemplate
10
+
11
+ from .options import LaTeXRender, TableRender
7
12
  from .template import create_env
8
- from .types import TableRender, LaTeXRender
9
13
 
10
14
 
15
+ @dataclass
16
+ class _AssetNode:
17
+ file_name: str
18
+ media_type: str
19
+ content_hash: str
20
+
11
21
  class Context:
12
- def __init__(
22
+ def __init__(
13
23
  self,
14
24
  file: ZipFile,
15
25
  template: "Template",
16
- assets_path: Path | None,
17
26
  table_render: TableRender,
18
27
  latex_render: LaTeXRender,
19
- ) -> None:
20
-
21
- self._assets_path: Path | None = assets_path
22
- self._file: ZipFile = file
23
- self._template: Template = template
24
- self._table_render: TableRender = table_render
25
- self._latex_render: LaTeXRender = latex_render
26
- self._used_file_names: dict[str, str] = {}
27
-
28
- @property
29
- def file(self) -> ZipFile:
30
- return self._file
31
-
32
- @property
33
- def template(self) -> "Template":
34
- return self._template
35
-
36
- @property
37
- def table_render(self) -> TableRender:
38
- return self._table_render
39
-
40
- @property
41
- def latex_render(self) -> LaTeXRender:
42
- return self._latex_render
43
-
44
- def use_asset(self, file_name: str, media_type: str) -> None:
45
- self._used_file_names[file_name] = media_type
46
-
47
- def add_asset(self, file_name: str, media_type: str, data: bytes) -> None:
48
- if file_name in self._used_file_names:
49
- return
50
-
51
- self._used_file_names[file_name] = media_type
52
- self._file.writestr(
53
- zinfo_or_arcname="OEBPS/assets/" + file_name,
54
- data=data,
55
- )
56
-
57
- @property
58
- def used_files(self) -> list[tuple[str, str]]:
59
- used_files: list[tuple[str, str]] = []
60
- for file_name in sorted(list(self._used_file_names.keys())):
61
- media_type = self._used_file_names[file_name]
62
- used_files.append((file_name, media_type))
63
- return used_files
64
-
65
- def add_used_asset_files(self) -> None:
66
- if self._assets_path is None:
67
- return
68
- for file in sorted(self._assets_path.iterdir()):
69
- if file.name not in self._used_file_names:
70
- continue
71
- self._file.write(
72
- filename=file,
73
- arcname="OEBPS/assets/" + file.name,
74
- )
28
+ ) -> None:
29
+ self._file: ZipFile = file
30
+ self._template: Template = template
31
+ self._table_render: TableRender = table_render
32
+ self._latex_render: LaTeXRender = latex_render
33
+ self._path_to_node: dict[Path, _AssetNode] = {} # source_path -> node
34
+ self._hash_to_node: dict[str, _AssetNode] = {} # content_hash -> node
35
+ self._chapters_with_mathml: set[str] = set() # Track chapters containing MathML
36
+
37
+ @property
38
+ def file(self) -> ZipFile:
39
+ return self._file
40
+
41
+ @property
42
+ def template(self) -> "Template":
43
+ return self._template
44
+
45
+ @property
46
+ def table_render(self) -> TableRender:
47
+ return self._table_render
48
+
49
+ @property
50
+ def latex_render(self) -> LaTeXRender:
51
+ return self._latex_render
52
+
53
+ @property
54
+ def used_files(self) -> list[tuple[str, str]]:
55
+ nodes = list(self._hash_to_node.values())
56
+ nodes.sort(key=lambda node: node.file_name)
57
+ return [(node.file_name, node.media_type) for node in nodes]
58
+
59
+ def mark_chapter_has_mathml(self, chapter_file_name: str) -> None:
60
+ """Mark a chapter as containing MathML content for EPUB 3.0 manifest properties."""
61
+ self._chapters_with_mathml.add(chapter_file_name)
62
+
63
+ def chapter_has_mathml(self, chapter_file_name: str) -> bool:
64
+ """Check if a chapter contains MathML content."""
65
+ return chapter_file_name in self._chapters_with_mathml
66
+
67
+ def use_asset(
68
+ self,
69
+ source_path: Path,
70
+ media_type: str,
71
+ file_ext: str,
72
+ ) -> str:
73
+ if source_path in self._path_to_node:
74
+ return self._path_to_node[source_path].file_name
75
+
76
+ if not source_path.exists():
77
+ raise FileNotFoundError(f"Asset file not found: {source_path}")
78
+
79
+ with open(source_path, "rb") as f:
80
+ content = f.read()
81
+ content_hash = _sha256_hash(content)
82
+
83
+ if content_hash in self._hash_to_node:
84
+ node = self._hash_to_node[content_hash]
85
+ self._path_to_node[source_path] = node
86
+ return node.file_name
87
+
88
+ file_name = f"{content_hash}{file_ext}"
89
+ node = _AssetNode(
90
+ file_name=file_name,
91
+ media_type=media_type,
92
+ content_hash=content_hash,
93
+ )
94
+ self._path_to_node[source_path] = node
95
+ self._hash_to_node[content_hash] = node
96
+ self._file.write(
97
+ filename=source_path,
98
+ arcname="OEBPS/assets/" + file_name,
99
+ )
100
+ return file_name
101
+
102
+ def add_asset(self, data: bytes, media_type: str, file_ext: str) -> str:
103
+ content_hash = _sha256_hash(data)
104
+ if content_hash in self._hash_to_node:
105
+ return self._hash_to_node[content_hash].file_name
106
+
107
+ file_name = f"{content_hash}{file_ext}"
108
+ node = _AssetNode(
109
+ file_name=file_name,
110
+ media_type=media_type,
111
+ content_hash=content_hash,
112
+ )
113
+ self._hash_to_node[content_hash] = node
114
+
115
+ self._file.writestr(
116
+ zinfo_or_arcname="OEBPS/assets/" + file_name,
117
+ data=data,
118
+ )
119
+ return file_name
75
120
 
76
121
  class Template:
77
- def __init__(self):
78
- templates_path = cast(Path, files("epub_generator")) / "data"
79
- self._env: Environment = create_env(templates_path)
80
- self._templates: dict[str, JinjaTemplate] = {}
81
-
82
- def render(self, template: str, **params) -> str:
83
- jinja_template: JinjaTemplate = self._template(template)
84
- return jinja_template.render(**params)
85
-
86
- def _template(self, name: str) -> JinjaTemplate:
87
- template = self._templates.get(name, None)
88
- if template is None:
89
- template = self._env.get_template(name)
90
- self._templates[name] = template
91
- return template
122
+ def __init__(self):
123
+ templates_path = cast(Path, files("epub_generator")) / "data"
124
+ self._env: Environment = create_env(templates_path)
125
+ self._templates: dict[str, JinjaTemplate] = {}
126
+
127
+ def render(self, template: str, **params) -> str:
128
+ jinja_template: JinjaTemplate = self._template(template)
129
+ return jinja_template.render(**params)
130
+
131
+ def _template(self, name: str) -> JinjaTemplate:
132
+ template = self._templates.get(name, None)
133
+ if template is None:
134
+ template = self._env.get_template(name)
135
+ self._templates[name] = template
136
+ return template
137
+
138
+ def _sha256_hash(data: bytes) -> str:
139
+ hash256 = sha256()
140
+ hash256.update(data)
141
+ return hash256.hexdigest()
@@ -1,58 +1,60 @@
1
1
  <?xml version="1.0" encoding="utf-8"?>
2
- <package version="2.0" xmlns="http://www.idpf.org/2007/opf" unique-identifier="uid">
3
- <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
4
- <dc:language>zh</dc:language>
2
+ <package version="3.0" xmlns="http://www.idpf.org/2007/opf" unique-identifier="uid" xml:lang="zh">
3
+ <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
5
4
  <dc:identifier id="uid">{{ ISBN }}</dc:identifier>
6
- {% if "title" in meta %}
7
- <dc:title>{{ meta["title"] }}</dc:title>
5
+ <dc:language>zh</dc:language>
6
+ {% if meta and meta.title %}
7
+ <dc:title>{{ meta.title }}</dc:title>
8
8
  {% else %}
9
9
  <dc:title>{{ i18n.unnamed }}</dc:title>
10
10
  {% endif %}
11
- {% if "description" in meta %}
12
- <dc:description>{{ meta["description"] }}</dc:description>
11
+ {% if meta and meta.description %}
12
+ <dc:description>{{ meta.description }}</dc:description>
13
13
  {% endif %}
14
- {% if "publisher" in meta %}
15
- <dc:publisher>{{ meta["publisher"] }}</dc:publisher>
14
+ {% if meta and meta.publisher %}
15
+ <dc:publisher>{{ meta.publisher }}</dc:publisher>
16
16
  {% endif %}
17
- {% if "authors" in meta %}
18
- {% for author in meta["authors"] %}
19
- <dc:creator opf:role="aut">{{ author }}</dc:creator>
17
+ {% if meta and meta.authors %}
18
+ {% for author in meta.authors %}
19
+ <dc:creator id="creator{{ loop.index }}">{{ author }}</dc:creator>
20
+ <meta refines="#creator{{ loop.index }}" property="role" scheme="marc:relators">aut</meta>
20
21
  {% endfor %}
21
22
  {% endif %}
22
- {% if "editors" in meta %}
23
- {% for editor in meta["editors"] %}
24
- <dc:creator opf:role="edt">{{ editor }}</dc:creator>
23
+ {% if meta and meta.editors %}
24
+ {% for editor in meta.editors %}
25
+ <dc:creator id="editor{{ loop.index }}">{{ editor }}</dc:creator>
26
+ <meta refines="#editor{{ loop.index }}" property="role" scheme="marc:relators">edt</meta>
25
27
  {% endfor %}
26
28
  {% endif %}
27
- {% if "translators" in meta %}
28
- {% for translator in meta["translators"] %}
29
- <dc:creator opf:role="trl">{{ translator }}</dc:creator>
29
+ {% if meta and meta.translators %}
30
+ {% for translator in meta.translators %}
31
+ <dc:creator id="translator{{ loop.index }}">{{ translator }}</dc:creator>
32
+ <meta refines="#translator{{ loop.index }}" property="role" scheme="marc:relators">trl</meta>
30
33
  {% endfor %}
31
34
  {% endif %}
35
+ <meta property="dcterms:modified">{{ modified_timestamp }}</meta>
32
36
  {% if has_cover %}
33
- <meta name="cover" content="a_cover" />
37
+ <meta name="cover" content="cover-image" />
34
38
  {% endif %}
35
- <meta name="output encoding" content="utf-8" />
36
- <meta name="primary-writing-mode" content="horizontal-lr" />
37
39
  </metadata>
38
40
  <manifest>
41
+ <item id="nav" properties="nav" media-type="application/xhtml+xml" href="nav.xhtml" />
39
42
  <item id="a_css" media-type="text/css" href="styles/style.css" />
40
43
  {% for asset_file, media_type in asset_files %}
41
44
  <item id="a_{{ asset_file|safe }}" media-type="{{ media_type|safe }}" href="assets/{{ asset_file|safe }}" />
42
- {% endfor%}
45
+ {% endfor %}
43
46
  {% if has_cover %}
44
- <item id="a_cover" media-type="image/png" href="assets/cover.png" />
47
+ <item id="cover-image" properties="cover-image" media-type="image/png" href="assets/cover.png" />
45
48
  <item id="x_cover.xhtml" media-type="application/xhtml+xml" href="Text/cover.xhtml" />
46
49
  {% endif %}
47
50
  {% if has_head_chapter %}
48
51
  <item id="x_head.xhtml" media-type="application/xhtml+xml" href="Text/head.xhtml" />
49
52
  {% endif %}
50
53
  {% for nav_point in nav_points %}
51
- <item id="x_{{ nav_point.file_name|safe }}" media-type="application/xhtml+xml" href="Text/{{ nav_point.file_name|safe }}" />
54
+ <item id="x_{{ nav_point.file_name|safe }}" media-type="application/xhtml+xml" href="Text/{{ nav_point.file_name|safe }}"{% if nav_point.file_name in chapters_with_mathml %} properties="mathml"{% endif %} />
52
55
  {% endfor %}
53
- <item id="ncx" media-type="application/x-dtbncx+xml" href="toc.ncx" />
54
56
  </manifest>
55
- <spine toc="ncx">
57
+ <spine>
56
58
  {% if has_cover %}
57
59
  <itemref idref="x_cover.xhtml" linear="no" />
58
60
  {% endif %}
@@ -1,8 +1,9 @@
1
1
  <?xml version="1.0" encoding="utf-8"?>
2
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
3
- <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="zh">
2
+ <!DOCTYPE html>
3
+ <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" xml:lang="zh" lang="zh">
4
4
 
5
5
  <head>
6
+ <meta charset="utf-8"/>
6
7
  <title>{{ i18n.cover }}</title>
7
8
  </head>
8
9
 
@@ -0,0 +1,43 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE html>
3
+ <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" xml:lang="zh" lang="zh">
4
+ <head>
5
+ <meta charset="utf-8"/>
6
+ <title>{{ i18n.table_of_contents }}</title>
7
+ <link href="styles/style.css" rel="stylesheet" type="text/css"/>
8
+ </head>
9
+ <body>
10
+ <nav epub:type="toc" id="toc" role="doc-toc">
11
+ <h1>{{ i18n.table_of_contents }}</h1>
12
+ <ol>
13
+ {% if has_cover %}
14
+ <li>
15
+ <a href="Text/cover.xhtml">{{ i18n.cover }}</a>
16
+ </li>
17
+ {% endif %}
18
+ {% if has_head_chapter %}
19
+ <li>
20
+ <a href="Text/head.xhtml">{{ head_chapter_title }}</a>
21
+ </li>
22
+ {% endif %}
23
+ {{ toc_list|safe }}
24
+ </ol>
25
+ </nav>
26
+
27
+ <nav epub:type="landmarks" id="landmarks" hidden="hidden">
28
+ <h2>{{ i18n.landmarks }}</h2>
29
+ <ol>
30
+ {% if has_cover %}
31
+ <li>
32
+ <a epub:type="cover" href="Text/cover.xhtml">{{ i18n.cover }}</a>
33
+ </li>
34
+ {% endif %}
35
+ {% if first_chapter_file %}
36
+ <li>
37
+ <a epub:type="bodymatter" href="Text/{{ first_chapter_file }}">{{ i18n.start_of_content }}</a>
38
+ </li>
39
+ {% endif %}
40
+ </ol>
41
+ </nav>
42
+ </body>
43
+ </html>
@@ -1,9 +1,10 @@
1
- <?xml version="1.0" encoding="utf-8" standalone="no"?>
2
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
3
- <html xmlns="http://www.w3.org/1999/xhtml">
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <!DOCTYPE html>
3
+ <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" xml:lang="zh" lang="zh">
4
4
 
5
5
  <head>
6
- <title></title>
6
+ <meta charset="utf-8"/>
7
+ <title>Chapter</title>
7
8
  <link href="../styles/style.css" rel="stylesheet" type="text/css"/>
8
9
  </head>
9
10
 
@@ -12,10 +13,11 @@
12
13
  {{ item|safe }}
13
14
  {% endfor %}
14
15
  {% if citations %}
15
- <h2>{{ i18n.references }}</h2>
16
- {% for item in citations %}
17
- {{ item|safe }}
18
- {% endfor %}
16
+ <section epub:type="footnotes" role="doc-endnotes">
17
+ {% for item in citations %}
18
+ {{ item|safe }}
19
+ {% endfor %}
20
+ </section>
19
21
  {% endif %}
20
22
  </body>
21
23
 
@@ -0,0 +1 @@
1
+ from .gen_epub import generate_epub
@@ -0,0 +1,142 @@
1
+ import io
2
+ import re
3
+ from typing import Any, cast
4
+ from xml.etree.ElementTree import Element, fromstring
5
+
6
+ import matplotlib.pyplot as plt
7
+ from latex2mathml.converter import convert
8
+
9
+ from ..context import Context
10
+ from ..options import LaTeXRender, TableRender
11
+ from ..types import Formula, Image, Table
12
+
13
+ _MEDIA_TYPE_MAP = {
14
+ ".png": "image/png",
15
+ ".jpg": "image/jpeg",
16
+ ".jpeg": "image/jpeg",
17
+ ".gif": "image/gif",
18
+ ".svg": "image/svg+xml",
19
+ }
20
+
21
+ def process_table(context: Context, table: Table) -> Element | None:
22
+ if context.table_render == TableRender.CLIPPING:
23
+ return None
24
+ try:
25
+ wrapped_html = f"<div>{table.html_content}</div>"
26
+ parsed = fromstring(wrapped_html)
27
+ wrapper = Element("div", attrib={"class": "alt-wrapper"})
28
+
29
+ for child in parsed:
30
+ wrapper.append(child)
31
+
32
+ return wrapper if len(wrapper) > 0 else None
33
+ except Exception:
34
+ return None
35
+
36
+
37
+ def process_formula(context: Context, formula: Formula) -> Element | None:
38
+ if context.latex_render == LaTeXRender.CLIPPING:
39
+ return None
40
+
41
+ latex_expr = _normalize_expression(formula.latex_expression)
42
+ if not latex_expr:
43
+ return None
44
+
45
+ if context.latex_render == LaTeXRender.MATHML:
46
+ return _latex2mathml(latex_expr)
47
+
48
+ elif context.latex_render == LaTeXRender.SVG:
49
+ svg_image = _latex_formula2svg(latex_expr)
50
+ if svg_image is None:
51
+ return None
52
+ file_name = context.add_asset(
53
+ data=svg_image,
54
+ media_type="image/svg+xml",
55
+ file_ext=".svg",
56
+ )
57
+ img_element = Element("img")
58
+ img_element.set("src", f"../assets/{file_name}")
59
+ img_element.set("alt", "formula")
60
+
61
+ wrapper = Element("div", attrib={"class": "alt-wrapper"})
62
+ wrapper.append(img_element)
63
+ return wrapper
64
+
65
+ return None
66
+
67
+ def process_image(context: Context, image: Image) -> Element | None:
68
+ file_ext = image.path.suffix or ".png"
69
+ file_name = context.use_asset(
70
+ source_path=image.path,
71
+ media_type=_MEDIA_TYPE_MAP.get(file_ext.lower(), "image/png"),
72
+ file_ext=file_ext,
73
+ )
74
+ img_element = Element("img")
75
+ img_element.set("src", f"../assets/{file_name}")
76
+ img_element.set("alt", image.alt_text)
77
+
78
+ wrapper = Element("div", attrib={"class": "alt-wrapper"})
79
+ wrapper.append(img_element)
80
+ return wrapper
81
+
82
+
83
+ _ESCAPE_UNICODE_PATTERN = re.compile(r"&#x([0-9A-Fa-f]{5});")
84
+
85
+
86
+ def _latex2mathml(latex: str) -> None | Element:
87
+ try:
88
+ html_latex = convert(latex)
89
+ except Exception:
90
+ return None
91
+
92
+ # latex2mathml 转义会带上一个奇怪的 `&` 前缀,这显然是多余的
93
+ # 不得已,在这里用正则表达式处理以修正这个错误
94
+ def repl(match):
95
+ hex_code = match.group(1)
96
+ char = chr(int(hex_code, 16))
97
+ if char == "<":
98
+ return "&lt;"
99
+ elif char == ">":
100
+ return "&gt;"
101
+ else:
102
+ return char
103
+
104
+ mathml = re.sub(
105
+ pattern=_ESCAPE_UNICODE_PATTERN,
106
+ repl=repl,
107
+ string=html_latex,
108
+ )
109
+ try:
110
+ return fromstring(mathml)
111
+ except Exception:
112
+ return None
113
+
114
+
115
+ def _latex_formula2svg(latex: str, font_size: int = 12):
116
+ # from https://www.cnblogs.com/qizhou/p/18170083
117
+ try:
118
+ output = io.BytesIO()
119
+ plt.rc("text", usetex=True)
120
+ plt.rc("font", size=font_size)
121
+ fig, ax = plt.subplots()
122
+ txt = ax.text(0.5, 0.5, f"${latex}$", ha="center", va="center", transform=ax.transAxes)
123
+ ax.axis("off")
124
+ fig.canvas.draw()
125
+ bbox = txt.get_window_extent(cast(Any, fig.canvas).get_renderer())
126
+ fig.set_size_inches(bbox.width / fig.dpi, bbox.height / fig.dpi)
127
+ plt.savefig(
128
+ output,
129
+ format="svg",
130
+ transparent=True,
131
+ bbox_inches="tight",
132
+ pad_inches=0,
133
+ )
134
+ return output.getvalue()
135
+ except Exception:
136
+ return None
137
+
138
+
139
+ def _normalize_expression(expression: str) -> str:
140
+ expression = expression.replace("\n", "")
141
+ expression = expression.strip()
142
+ return expression