epub-generator 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epub_generator/__init__.py +4 -0
- epub_generator/context.py +90 -0
- epub_generator/gen_asset.py +133 -0
- epub_generator/gen_epub.py +178 -0
- epub_generator/gen_index.py +156 -0
- epub_generator/gen_part.py +126 -0
- epub_generator/hash.py +7 -0
- epub_generator/i18n.py +13 -0
- epub_generator/template.py +50 -0
- epub_generator/types.py +11 -0
- epub_generator-0.0.1.dist-info/LICENSE +21 -0
- epub_generator-0.0.1.dist-info/METADATA +19 -0
- epub_generator-0.0.1.dist-info/RECORD +14 -0
- epub_generator-0.0.1.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from zipfile import ZipFile
|
|
3
|
+
from importlib.resources import files
|
|
4
|
+
from jinja2 import Environment, Template as JinjaTemplate
|
|
5
|
+
|
|
6
|
+
from .template import create_env
|
|
7
|
+
from .types import TableRender, LaTeXRender
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Context:
|
|
11
|
+
def __init__(
|
|
12
|
+
self,
|
|
13
|
+
file: ZipFile,
|
|
14
|
+
template: "Template",
|
|
15
|
+
assets_path: Path | None,
|
|
16
|
+
table_render: TableRender,
|
|
17
|
+
latex_render: LaTeXRender,
|
|
18
|
+
) -> None:
|
|
19
|
+
|
|
20
|
+
self._assets_path: Path | None = assets_path
|
|
21
|
+
self._file: ZipFile = file
|
|
22
|
+
self._template: Template = template
|
|
23
|
+
self._table_render: TableRender = table_render
|
|
24
|
+
self._latex_render: LaTeXRender = latex_render
|
|
25
|
+
self._used_file_names: dict[str, str] = {}
|
|
26
|
+
|
|
27
|
+
@property
|
|
28
|
+
def file(self) -> ZipFile:
|
|
29
|
+
return self._file
|
|
30
|
+
|
|
31
|
+
@property
|
|
32
|
+
def template(self) -> "Template":
|
|
33
|
+
return self._template
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def table_render(self) -> TableRender:
|
|
37
|
+
return self._table_render
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def latex_render(self) -> LaTeXRender:
|
|
41
|
+
return self._latex_render
|
|
42
|
+
|
|
43
|
+
def use_asset(self, file_name: str, media_type: str) -> None:
|
|
44
|
+
self._used_file_names[file_name] = media_type
|
|
45
|
+
|
|
46
|
+
def add_asset(self, file_name: str, media_type: str, data: bytes) -> None:
|
|
47
|
+
if file_name in self._used_file_names:
|
|
48
|
+
return
|
|
49
|
+
|
|
50
|
+
self._used_file_names[file_name] = media_type
|
|
51
|
+
self._file.writestr(
|
|
52
|
+
zinfo_or_arcname="OEBPS/assets/" + file_name,
|
|
53
|
+
data=data,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
@property
|
|
57
|
+
def used_files(self) -> list[tuple[str, str]]:
|
|
58
|
+
used_files: list[tuple[str, str]] = []
|
|
59
|
+
for file_name in sorted(list(self._used_file_names.keys())):
|
|
60
|
+
media_type = self._used_file_names[file_name]
|
|
61
|
+
used_files.append((file_name, media_type))
|
|
62
|
+
return used_files
|
|
63
|
+
|
|
64
|
+
def add_used_asset_files(self) -> None:
|
|
65
|
+
if self._assets_path is None:
|
|
66
|
+
return
|
|
67
|
+
for file in sorted(self._assets_path.iterdir()):
|
|
68
|
+
if file.name not in self._used_file_names:
|
|
69
|
+
continue
|
|
70
|
+
self._file.write(
|
|
71
|
+
filename=file,
|
|
72
|
+
arcname="OEBPS/assets/" + file.name,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
class Template:
|
|
76
|
+
def __init__(self):
|
|
77
|
+
templates_path = files("pdf_craft") / "data" / "templates"
|
|
78
|
+
self._env: Environment = create_env(templates_path)
|
|
79
|
+
self._templates: dict[str, JinjaTemplate] = {}
|
|
80
|
+
|
|
81
|
+
def render(self, template: str, **params) -> str:
|
|
82
|
+
template: JinjaTemplate = self._template(template)
|
|
83
|
+
return template.render(**params)
|
|
84
|
+
|
|
85
|
+
def _template(self, name: str) -> JinjaTemplate:
|
|
86
|
+
template: JinjaTemplate = self._templates.get(name, None)
|
|
87
|
+
if template is None:
|
|
88
|
+
template = self._env.get_template(name)
|
|
89
|
+
self._templates[name] = template
|
|
90
|
+
return template
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import re
|
|
3
|
+
import matplotlib.pyplot as plt
|
|
4
|
+
|
|
5
|
+
from xml.etree.ElementTree import fromstring, Element
|
|
6
|
+
from latex2mathml.converter import convert
|
|
7
|
+
from .hash import sha256_hash
|
|
8
|
+
from .types import LaTeXRender
|
|
9
|
+
from .context import Context
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def try_gen_table(context: Context, element: Element) -> list[Element] | None:
|
|
13
|
+
if context.table_render == LaTeXRender.CLIPPING:
|
|
14
|
+
return None
|
|
15
|
+
|
|
16
|
+
table_html = _find_child(element, ("html",))
|
|
17
|
+
children: list[Element] = []
|
|
18
|
+
if table_html is not None:
|
|
19
|
+
for child in table_html:
|
|
20
|
+
children.append(child)
|
|
21
|
+
|
|
22
|
+
return children
|
|
23
|
+
|
|
24
|
+
def try_gen_formula(context: Context, element: Element) -> Element | None:
|
|
25
|
+
if context.latex_render == LaTeXRender.CLIPPING:
|
|
26
|
+
return None
|
|
27
|
+
|
|
28
|
+
latex = (element.text or "").strip()
|
|
29
|
+
if not latex:
|
|
30
|
+
return None
|
|
31
|
+
|
|
32
|
+
latex_expr = _normalize_expression(latex)
|
|
33
|
+
if context.latex_render == LaTeXRender.MATHML:
|
|
34
|
+
return _latex2mathml(latex_expr)
|
|
35
|
+
|
|
36
|
+
elif context.latex_render == LaTeXRender.SVG:
|
|
37
|
+
svg_image = _latex_formula2svg(latex_expr)
|
|
38
|
+
if svg_image is None:
|
|
39
|
+
return None
|
|
40
|
+
|
|
41
|
+
file_name = f"{sha256_hash(svg_image)}.svg"
|
|
42
|
+
img_element = _create_image_element(file_name, element)
|
|
43
|
+
context.add_asset(file_name, "image/svg+xml", svg_image)
|
|
44
|
+
|
|
45
|
+
return img_element
|
|
46
|
+
|
|
47
|
+
def try_gen_asset(context: Context, element: Element) -> Element | None:
|
|
48
|
+
hash = element.get("hash", None)
|
|
49
|
+
if hash is None:
|
|
50
|
+
return None
|
|
51
|
+
|
|
52
|
+
file_name = f"{hash}.png"
|
|
53
|
+
context.use_asset(file_name, "image/png")
|
|
54
|
+
|
|
55
|
+
return _create_image_element(file_name, element)
|
|
56
|
+
|
|
57
|
+
_ESCAPE_UNICODE_PATTERN = re.compile(r"&#x([0-9A-Fa-f]{5});")
|
|
58
|
+
|
|
59
|
+
def _latex2mathml(latex: str) -> None | Element:
|
|
60
|
+
try:
|
|
61
|
+
html_latex = convert(latex)
|
|
62
|
+
except Exception:
|
|
63
|
+
return None
|
|
64
|
+
|
|
65
|
+
# latex2mathml 转义会带上一个奇怪的 `&` 前缀,这显然是多余的
|
|
66
|
+
# 不得已,在这里用正则表达式处理以修正这个错误
|
|
67
|
+
def repl(match):
|
|
68
|
+
hex_code = match.group(1)
|
|
69
|
+
char = chr(int(hex_code, 16))
|
|
70
|
+
if char == "<":
|
|
71
|
+
return "<"
|
|
72
|
+
elif char == ">":
|
|
73
|
+
return ">"
|
|
74
|
+
else:
|
|
75
|
+
return char
|
|
76
|
+
|
|
77
|
+
mathml = re.sub(
|
|
78
|
+
pattern=_ESCAPE_UNICODE_PATTERN,
|
|
79
|
+
repl=repl,
|
|
80
|
+
string=html_latex,
|
|
81
|
+
)
|
|
82
|
+
try:
|
|
83
|
+
return fromstring(mathml)
|
|
84
|
+
except Exception:
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
def _latex_formula2svg(latex: str, font_size: int=12):
|
|
88
|
+
# from https://www.cnblogs.com/qizhou/p/18170083
|
|
89
|
+
try:
|
|
90
|
+
output = io.BytesIO()
|
|
91
|
+
plt.rc("text", usetex = True)
|
|
92
|
+
plt.rc("font", size = font_size)
|
|
93
|
+
fig, ax = plt.subplots()
|
|
94
|
+
txt = ax.text(0.5, 0.5, f"${latex}$", ha="center", va="center", transform=ax.transAxes)
|
|
95
|
+
ax.axis("off")
|
|
96
|
+
fig.canvas.draw()
|
|
97
|
+
bbox = txt.get_window_extent(renderer=fig.canvas.get_renderer())
|
|
98
|
+
fig.set_size_inches(bbox.width / fig.dpi, bbox.height / fig.dpi)
|
|
99
|
+
plt.savefig(
|
|
100
|
+
output,
|
|
101
|
+
format="svg",
|
|
102
|
+
transparent=True,
|
|
103
|
+
bbox_inches="tight",
|
|
104
|
+
pad_inches=0,
|
|
105
|
+
)
|
|
106
|
+
return output.getvalue()
|
|
107
|
+
except Exception:
|
|
108
|
+
return None
|
|
109
|
+
|
|
110
|
+
def _create_image_element(file_name: str, origin: Element):
|
|
111
|
+
img_element = Element("img")
|
|
112
|
+
img_element.set("src", f"../assets/{file_name}")
|
|
113
|
+
alt: str | None = None
|
|
114
|
+
|
|
115
|
+
if origin.text:
|
|
116
|
+
alt = origin.text
|
|
117
|
+
if alt is None:
|
|
118
|
+
img_element.set("alt", "image")
|
|
119
|
+
else:
|
|
120
|
+
img_element.set("alt", alt)
|
|
121
|
+
|
|
122
|
+
return img_element
|
|
123
|
+
|
|
124
|
+
def _find_child(parent: Element, tags: tuple[str, ...]) -> Element | None:
|
|
125
|
+
for child in parent:
|
|
126
|
+
if child.tag in tags:
|
|
127
|
+
return child
|
|
128
|
+
return None
|
|
129
|
+
|
|
130
|
+
def _normalize_expression(expression: str) -> str:
|
|
131
|
+
expression = expression.replace("\n", "")
|
|
132
|
+
expression = expression.strip()
|
|
133
|
+
return expression
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
from os import PathLike
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Literal
|
|
6
|
+
from uuid import uuid4
|
|
7
|
+
from zipfile import ZipFile
|
|
8
|
+
from xml.etree.ElementTree import fromstring, Element
|
|
9
|
+
|
|
10
|
+
from .types import TableRender, LaTeXRender
|
|
11
|
+
from .gen_part import generate_part
|
|
12
|
+
from .gen_index import gen_index, NavPoint
|
|
13
|
+
from .i18n import I18N
|
|
14
|
+
from .context import Context, Template
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def generate_epub_file(
|
|
18
|
+
from_dir_path: PathLike,
|
|
19
|
+
epub_file_path: PathLike,
|
|
20
|
+
lan: Literal["zh", "en"] = "zh",
|
|
21
|
+
table_render: TableRender = TableRender.HTML,
|
|
22
|
+
latex_render: LaTeXRender = LaTeXRender.MATHML,
|
|
23
|
+
) -> None:
|
|
24
|
+
|
|
25
|
+
i18n = I18N(lan)
|
|
26
|
+
template = Template()
|
|
27
|
+
from_dir_path = Path(from_dir_path)
|
|
28
|
+
epub_file_path = Path(epub_file_path)
|
|
29
|
+
index_path = from_dir_path / "index.json"
|
|
30
|
+
meta_path = from_dir_path / "meta.json"
|
|
31
|
+
assets_path: Path | None = from_dir_path / "assets"
|
|
32
|
+
chapters_path: Path = from_dir_path / "chapters"
|
|
33
|
+
head_chapter_path = chapters_path / "chapter.xml"
|
|
34
|
+
|
|
35
|
+
toc_ncx: str
|
|
36
|
+
nav_points: list[NavPoint] = []
|
|
37
|
+
meta: dict = {}
|
|
38
|
+
has_head_chapter: bool = head_chapter_path.exists()
|
|
39
|
+
has_cover: bool = (from_dir_path / "cover.png").exists()
|
|
40
|
+
|
|
41
|
+
if meta_path.exists():
|
|
42
|
+
with open(meta_path, "r", encoding="utf-8") as f:
|
|
43
|
+
meta = json.loads(f.read())
|
|
44
|
+
|
|
45
|
+
if not assets_path.exists():
|
|
46
|
+
assets_path = None
|
|
47
|
+
|
|
48
|
+
toc_ncx, nav_points = gen_index(
|
|
49
|
+
template=template,
|
|
50
|
+
i18n=i18n,
|
|
51
|
+
meta=meta,
|
|
52
|
+
index_file_path=index_path,
|
|
53
|
+
has_cover=has_cover,
|
|
54
|
+
check_chapter_exits=lambda id: (chapters_path / f"chapter_{id}.xml").exists(),
|
|
55
|
+
)
|
|
56
|
+
epub_base_path = epub_file_path.parent
|
|
57
|
+
epub_base_path.mkdir(parents=True, exist_ok=True)
|
|
58
|
+
|
|
59
|
+
with ZipFile(epub_file_path, "w") as file:
|
|
60
|
+
context = Context(
|
|
61
|
+
file=file,
|
|
62
|
+
template=template,
|
|
63
|
+
assets_path=assets_path,
|
|
64
|
+
table_render=table_render,
|
|
65
|
+
latex_render=latex_render,
|
|
66
|
+
)
|
|
67
|
+
file.writestr(
|
|
68
|
+
zinfo_or_arcname="mimetype",
|
|
69
|
+
data=template.render("mimetype").encode("utf-8"),
|
|
70
|
+
)
|
|
71
|
+
file.writestr(
|
|
72
|
+
zinfo_or_arcname="OEBPS/toc.ncx",
|
|
73
|
+
data=toc_ncx.encode("utf-8"),
|
|
74
|
+
)
|
|
75
|
+
_write_chapters(
|
|
76
|
+
context=context,
|
|
77
|
+
i18n=i18n,
|
|
78
|
+
nav_points=nav_points,
|
|
79
|
+
chapters_path=chapters_path,
|
|
80
|
+
has_head_chapter=has_head_chapter,
|
|
81
|
+
head_chapter_path=head_chapter_path,
|
|
82
|
+
)
|
|
83
|
+
_write_basic_files(
|
|
84
|
+
context=context,
|
|
85
|
+
i18n=i18n,
|
|
86
|
+
meta=meta,
|
|
87
|
+
nav_points=nav_points,
|
|
88
|
+
has_cover=has_cover,
|
|
89
|
+
has_head_chapter=has_head_chapter,
|
|
90
|
+
)
|
|
91
|
+
_write_assets(
|
|
92
|
+
context=context,
|
|
93
|
+
i18n=i18n,
|
|
94
|
+
from_dir_path=from_dir_path,
|
|
95
|
+
has_cover=has_cover,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
def _write_assets(
|
|
99
|
+
context: Context,
|
|
100
|
+
i18n: I18N,
|
|
101
|
+
from_dir_path: Path,
|
|
102
|
+
has_cover: bool,
|
|
103
|
+
):
|
|
104
|
+
context.file.writestr(
|
|
105
|
+
zinfo_or_arcname="OEBPS/styles/style.css",
|
|
106
|
+
data=context.template.render("style.css").encode("utf-8"),
|
|
107
|
+
)
|
|
108
|
+
if has_cover:
|
|
109
|
+
context.file.writestr(
|
|
110
|
+
zinfo_or_arcname="OEBPS/Text/cover.xhtml",
|
|
111
|
+
data=context.template.render(
|
|
112
|
+
template="cover.xhtml",
|
|
113
|
+
i18n=i18n,
|
|
114
|
+
).encode("utf-8"),
|
|
115
|
+
)
|
|
116
|
+
if has_cover:
|
|
117
|
+
context.file.write(
|
|
118
|
+
filename=from_dir_path / "cover.png",
|
|
119
|
+
arcname="OEBPS/assets/cover.png",
|
|
120
|
+
)
|
|
121
|
+
context.add_used_asset_files()
|
|
122
|
+
|
|
123
|
+
def _write_chapters(
|
|
124
|
+
context: Context,
|
|
125
|
+
i18n: I18N,
|
|
126
|
+
nav_points: list[NavPoint],
|
|
127
|
+
chapters_path: Path,
|
|
128
|
+
has_head_chapter: bool,
|
|
129
|
+
head_chapter_path: Path,
|
|
130
|
+
):
|
|
131
|
+
|
|
132
|
+
if has_head_chapter:
|
|
133
|
+
chapter_xml = _read_xml(head_chapter_path)
|
|
134
|
+
data = generate_part(context, chapter_xml, i18n)
|
|
135
|
+
context.file.writestr(
|
|
136
|
+
zinfo_or_arcname="OEBPS/Text/head.xhtml",
|
|
137
|
+
data=data.encode("utf-8"),
|
|
138
|
+
)
|
|
139
|
+
for nav_point in nav_points:
|
|
140
|
+
chapter_path = chapters_path / f"chapter_{nav_point.index_id}.xml"
|
|
141
|
+
if chapter_path.exists():
|
|
142
|
+
chapter_xml = _read_xml(chapter_path)
|
|
143
|
+
data = generate_part(context, chapter_xml, i18n)
|
|
144
|
+
context.file.writestr(
|
|
145
|
+
zinfo_or_arcname="OEBPS/Text/" + nav_point.file_name,
|
|
146
|
+
data=data.encode("utf-8"),
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
def _write_basic_files(
|
|
150
|
+
context: Context,
|
|
151
|
+
i18n: I18N,
|
|
152
|
+
meta: dict,
|
|
153
|
+
nav_points: list[NavPoint],
|
|
154
|
+
has_cover: bool,
|
|
155
|
+
has_head_chapter: bool,
|
|
156
|
+
):
|
|
157
|
+
context.file.writestr(
|
|
158
|
+
zinfo_or_arcname="META-INF/container.xml",
|
|
159
|
+
data=context.template.render("container.xml").encode("utf-8"),
|
|
160
|
+
)
|
|
161
|
+
content = context.template.render(
|
|
162
|
+
template="content.opf",
|
|
163
|
+
meta=meta,
|
|
164
|
+
i18n=i18n,
|
|
165
|
+
ISBN=meta.get("ISBN", str(uuid4())),
|
|
166
|
+
nav_points=nav_points,
|
|
167
|
+
has_head_chapter=has_head_chapter,
|
|
168
|
+
has_cover=has_cover,
|
|
169
|
+
asset_files=context.used_files,
|
|
170
|
+
)
|
|
171
|
+
context.file.writestr(
|
|
172
|
+
zinfo_or_arcname="OEBPS/content.opf",
|
|
173
|
+
data=content.encode("utf-8"),
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
def _read_xml(path: Path) -> Element:
|
|
177
|
+
with open(path, "r", encoding="utf-8") as file:
|
|
178
|
+
return fromstring(file.read())
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from json import loads
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Callable
|
|
6
|
+
from xml.etree.ElementTree import tostring, Element
|
|
7
|
+
from .i18n import I18N
|
|
8
|
+
from .context import Template
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class NavPoint:
|
|
13
|
+
index_id: int
|
|
14
|
+
order: int
|
|
15
|
+
file_name: str
|
|
16
|
+
|
|
17
|
+
def gen_index(
|
|
18
|
+
template: Template,
|
|
19
|
+
i18n: I18N,
|
|
20
|
+
meta: dict,
|
|
21
|
+
index_file_path: Path,
|
|
22
|
+
has_cover: bool,
|
|
23
|
+
check_chapter_exits: Callable[[int], bool],
|
|
24
|
+
) -> tuple[str, list[NavPoint]]:
|
|
25
|
+
|
|
26
|
+
nav_elements: list[Element]
|
|
27
|
+
nav_points: list[NavPoint]
|
|
28
|
+
depth: int
|
|
29
|
+
|
|
30
|
+
if index_file_path.exists():
|
|
31
|
+
prefaces, chapters = _parse_index(index_file_path)
|
|
32
|
+
nav_point_generation = _NavPointGeneration(
|
|
33
|
+
has_cover=has_cover,
|
|
34
|
+
check_chapter_exits=check_chapter_exits,
|
|
35
|
+
chapters_count=(
|
|
36
|
+
_count_chapters(prefaces) +
|
|
37
|
+
_count_chapters(chapters)
|
|
38
|
+
),
|
|
39
|
+
)
|
|
40
|
+
nav_elements = []
|
|
41
|
+
for chapters_list in (prefaces, chapters):
|
|
42
|
+
for chapter in chapters_list:
|
|
43
|
+
element = nav_point_generation.generate(chapter)
|
|
44
|
+
if element is not None:
|
|
45
|
+
nav_elements.append(element)
|
|
46
|
+
|
|
47
|
+
depth = max(
|
|
48
|
+
_max_depth(prefaces),
|
|
49
|
+
_max_depth(chapters),
|
|
50
|
+
)
|
|
51
|
+
nav_points = nav_point_generation.nav_points
|
|
52
|
+
|
|
53
|
+
else:
|
|
54
|
+
nav_elements = []
|
|
55
|
+
nav_points = []
|
|
56
|
+
depth = 0
|
|
57
|
+
|
|
58
|
+
toc_ncx = template.render(
|
|
59
|
+
template="toc.ncx",
|
|
60
|
+
depth=depth,
|
|
61
|
+
i18n=i18n,
|
|
62
|
+
meta=meta,
|
|
63
|
+
has_cover=has_cover,
|
|
64
|
+
nav_points=[tostring(p, encoding="unicode") for p in nav_elements],
|
|
65
|
+
)
|
|
66
|
+
return toc_ncx, nav_points
|
|
67
|
+
|
|
68
|
+
def _count_chapters(chapters: list[_Chapter]) -> int:
|
|
69
|
+
count: int = 0
|
|
70
|
+
for chapter in chapters:
|
|
71
|
+
count += 1 + _count_chapters(chapter.children)
|
|
72
|
+
return count
|
|
73
|
+
|
|
74
|
+
def _max_depth(chapters: list[_Chapter]) -> int:
|
|
75
|
+
max_depth: int = 0
|
|
76
|
+
for chapter in chapters:
|
|
77
|
+
max_depth = max(
|
|
78
|
+
max_depth,
|
|
79
|
+
_max_depth(chapter.children) + 1,
|
|
80
|
+
)
|
|
81
|
+
return max_depth
|
|
82
|
+
|
|
83
|
+
class _NavPointGeneration:
|
|
84
|
+
def __init__(self, has_cover: bool, chapters_count: int, check_chapter_exits: Callable[[int], bool]):
|
|
85
|
+
self._nav_points: list[NavPoint] = []
|
|
86
|
+
self._next_order: int = 2 if has_cover else 1
|
|
87
|
+
self._digits = len(str(chapters_count))
|
|
88
|
+
self._check_chapter_exits: Callable[[int], bool] = check_chapter_exits
|
|
89
|
+
|
|
90
|
+
@property
|
|
91
|
+
def nav_points(self) -> list[NavPoint]:
|
|
92
|
+
return self._nav_points
|
|
93
|
+
|
|
94
|
+
def generate(self, chapter: _Chapter) -> Element | None:
|
|
95
|
+
if not self._check_chapter_exits(chapter.id):
|
|
96
|
+
return None
|
|
97
|
+
|
|
98
|
+
part_id = str(chapter.id).zfill(self._digits)
|
|
99
|
+
file_name = f"part{part_id}.xhtml"
|
|
100
|
+
order = self._next_order
|
|
101
|
+
|
|
102
|
+
nav_point_xml = Element("navPoint")
|
|
103
|
+
nav_point_xml.set("id", f"np_{chapter.id}")
|
|
104
|
+
nav_point_xml.set("playOrder", str(order))
|
|
105
|
+
|
|
106
|
+
label_xml = Element("navLabel")
|
|
107
|
+
label_text_xml = Element("text")
|
|
108
|
+
label_text_xml.text = chapter.headline
|
|
109
|
+
label_xml.append(label_text_xml)
|
|
110
|
+
|
|
111
|
+
content_xml = Element("content")
|
|
112
|
+
content_xml.set("src", f"Text/{file_name}")
|
|
113
|
+
|
|
114
|
+
nav_point_xml.append(label_xml)
|
|
115
|
+
nav_point_xml.append(content_xml)
|
|
116
|
+
|
|
117
|
+
self._next_order += 1
|
|
118
|
+
self._nav_points.append(NavPoint(
|
|
119
|
+
index_id=chapter.id,
|
|
120
|
+
order=order,
|
|
121
|
+
file_name=file_name,
|
|
122
|
+
))
|
|
123
|
+
for child in chapter.children:
|
|
124
|
+
child_xml = self.generate(child)
|
|
125
|
+
if child_xml is not None:
|
|
126
|
+
nav_point_xml.append(child_xml)
|
|
127
|
+
|
|
128
|
+
return nav_point_xml
|
|
129
|
+
|
|
130
|
+
@dataclass
|
|
131
|
+
class _Chapter:
|
|
132
|
+
id: int
|
|
133
|
+
headline: str
|
|
134
|
+
children: list[_Chapter]
|
|
135
|
+
|
|
136
|
+
def _parse_index(file_path: Path) -> tuple[list[_Chapter], list[_Chapter]]:
|
|
137
|
+
data: dict | list
|
|
138
|
+
with open(file_path, "r", encoding="utf-8") as file:
|
|
139
|
+
data = loads(file.read())
|
|
140
|
+
if isinstance(data, list):
|
|
141
|
+
return [], _transform_chapters(data)
|
|
142
|
+
elif isinstance(data, dict):
|
|
143
|
+
return (
|
|
144
|
+
_transform_chapters(data["prefaces"]),
|
|
145
|
+
_transform_chapters(data["chapters"]),
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
def _transform_chapters(data_list: list) -> list[_Chapter]:
|
|
149
|
+
chapters: list[_Chapter] = []
|
|
150
|
+
for data in data_list:
|
|
151
|
+
chapters.append(_Chapter(
|
|
152
|
+
id=int(data["id"]),
|
|
153
|
+
headline=data["headline"],
|
|
154
|
+
children=_transform_chapters(data["children"]),
|
|
155
|
+
))
|
|
156
|
+
return chapters
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
from typing import Generator
|
|
2
|
+
from xml.etree.ElementTree import tostring, Element
|
|
3
|
+
|
|
4
|
+
from .i18n import I18N
|
|
5
|
+
from .context import Context
|
|
6
|
+
from .gen_asset import try_gen_table, try_gen_formula, try_gen_asset
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def generate_part(
|
|
10
|
+
context: Context,
|
|
11
|
+
chapter_xml: Element,
|
|
12
|
+
i18n: I18N,
|
|
13
|
+
) -> str:
|
|
14
|
+
|
|
15
|
+
return context.template.render(
|
|
16
|
+
template="part.xhtml",
|
|
17
|
+
i18n=i18n,
|
|
18
|
+
content=[
|
|
19
|
+
tostring(child, encoding="unicode")
|
|
20
|
+
for child in _render_contents(context, chapter_xml)
|
|
21
|
+
],
|
|
22
|
+
citations=[
|
|
23
|
+
tostring(child, encoding="unicode")
|
|
24
|
+
for child in _render_footnotes(context, chapter_xml)
|
|
25
|
+
],
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
_XML2HTML_TAGS: dict[str, str] = {
|
|
29
|
+
"headline": "h1",
|
|
30
|
+
"quote": "p",
|
|
31
|
+
"text": "p",
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
def _render_contents(context: Context, chapter_element: Element) -> Generator[Element, None, None]:
|
|
35
|
+
for child in chapter_element:
|
|
36
|
+
layout = _render_layout(context, child)
|
|
37
|
+
if layout is not None:
|
|
38
|
+
yield layout
|
|
39
|
+
|
|
40
|
+
def _render_footnotes(context: Context, chapter_element: Element):
|
|
41
|
+
for footnote in chapter_element:
|
|
42
|
+
if footnote.tag != "footnote":
|
|
43
|
+
continue
|
|
44
|
+
|
|
45
|
+
found_mark = False
|
|
46
|
+
citation_div = Element("div", attrib={
|
|
47
|
+
"class": "citation",
|
|
48
|
+
})
|
|
49
|
+
for child in footnote:
|
|
50
|
+
if child.tag == "mark":
|
|
51
|
+
found_mark = True
|
|
52
|
+
else:
|
|
53
|
+
layout = _render_layout(context, child)
|
|
54
|
+
if layout is not None:
|
|
55
|
+
citation_div.append(layout)
|
|
56
|
+
|
|
57
|
+
if not found_mark or len(citation_div) == 0:
|
|
58
|
+
continue
|
|
59
|
+
|
|
60
|
+
footnote_id = int(footnote.get("id"))
|
|
61
|
+
ref = Element("a")
|
|
62
|
+
ref.text = f"[{footnote_id}]"
|
|
63
|
+
ref.attrib = {
|
|
64
|
+
"id": f"mark-{footnote_id}",
|
|
65
|
+
"href": f"#ref-{footnote_id}",
|
|
66
|
+
"class": "citation",
|
|
67
|
+
}
|
|
68
|
+
first_layout = citation_div[0]
|
|
69
|
+
if first_layout.tag == "p":
|
|
70
|
+
ref.tail = first_layout.text
|
|
71
|
+
first_layout.text = None
|
|
72
|
+
first_layout.insert(0, ref)
|
|
73
|
+
else:
|
|
74
|
+
inject_p = Element("p")
|
|
75
|
+
inject_p.append(ref)
|
|
76
|
+
citation_div.insert(0, inject_p)
|
|
77
|
+
|
|
78
|
+
yield citation_div
|
|
79
|
+
|
|
80
|
+
def _render_layout(context: Context, raw_layout: Element) -> Element | None:
|
|
81
|
+
if raw_layout.tag == "footnote":
|
|
82
|
+
pass
|
|
83
|
+
|
|
84
|
+
elif raw_layout.tag in _XML2HTML_TAGS:
|
|
85
|
+
layout = Element(_XML2HTML_TAGS[raw_layout.tag])
|
|
86
|
+
layout.text = raw_layout.text
|
|
87
|
+
for mark in raw_layout:
|
|
88
|
+
assert mark.tag == "mark"
|
|
89
|
+
mark_id = int(mark.get("id"))
|
|
90
|
+
anchor = Element("a")
|
|
91
|
+
anchor.attrib = {
|
|
92
|
+
"id": f"ref-{mark_id}",
|
|
93
|
+
"href": f"#mark-{mark_id}",
|
|
94
|
+
"class": "super",
|
|
95
|
+
}
|
|
96
|
+
layout.append(anchor)
|
|
97
|
+
anchor.text = f"[{mark_id}]"
|
|
98
|
+
anchor.tail = mark.tail
|
|
99
|
+
|
|
100
|
+
if raw_layout.tag == "quote":
|
|
101
|
+
blockquote = Element("blockquote")
|
|
102
|
+
blockquote.append(layout)
|
|
103
|
+
return blockquote
|
|
104
|
+
else:
|
|
105
|
+
return layout
|
|
106
|
+
|
|
107
|
+
else:
|
|
108
|
+
asset_wrapper = Element("div", attrib={
|
|
109
|
+
"class": "alt-wrapper",
|
|
110
|
+
})
|
|
111
|
+
if raw_layout.tag == "table":
|
|
112
|
+
asset_wrapper.extend(try_gen_table(context, raw_layout))
|
|
113
|
+
elif raw_layout.tag == "formula":
|
|
114
|
+
formula = try_gen_formula(context, raw_layout)
|
|
115
|
+
if formula is not None:
|
|
116
|
+
asset_wrapper.append(formula)
|
|
117
|
+
|
|
118
|
+
if len(asset_wrapper) == 0:
|
|
119
|
+
asset = try_gen_asset(context, raw_layout)
|
|
120
|
+
if asset is not None:
|
|
121
|
+
asset_wrapper.append(asset)
|
|
122
|
+
|
|
123
|
+
if len(asset_wrapper) > 0:
|
|
124
|
+
return asset_wrapper
|
|
125
|
+
|
|
126
|
+
return None
|
epub_generator/hash.py
ADDED
epub_generator/i18n.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from typing import Literal
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class I18N:
|
|
5
|
+
def __init__(self, lan: Literal["zh", "en"]):
|
|
6
|
+
if lan == "zh":
|
|
7
|
+
self.unnamed: str = "未命名"
|
|
8
|
+
self.cover: str = "封面"
|
|
9
|
+
self.references: str = "引用"
|
|
10
|
+
elif lan == "en":
|
|
11
|
+
self.unnamed: str = "Unnamed"
|
|
12
|
+
self.cover: str = "Cover"
|
|
13
|
+
self.references: str = "References"
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
from typing import Tuple, Callable
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from jinja2 import select_autoescape, Environment, BaseLoader, TemplateNotFound
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def create_env(dir_path: Path) -> Environment:
|
|
9
|
+
return Environment(
|
|
10
|
+
loader=_DSLoader(dir_path),
|
|
11
|
+
autoescape=select_autoescape(),
|
|
12
|
+
trim_blocks=True,
|
|
13
|
+
keep_trailing_newline=True,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
_LoaderResult = Tuple[str, str | None, Callable[[], bool] | None]
|
|
17
|
+
|
|
18
|
+
class _DSLoader(BaseLoader):
|
|
19
|
+
def __init__(self, dir_path: Path):
|
|
20
|
+
super().__init__()
|
|
21
|
+
self._dir_path: Path = dir_path
|
|
22
|
+
|
|
23
|
+
def get_source(self, _: Environment, template: str) -> _LoaderResult:
|
|
24
|
+
template = self._norm_template(template)
|
|
25
|
+
target_path = (self._dir_path / template).resolve()
|
|
26
|
+
|
|
27
|
+
if not target_path.exists():
|
|
28
|
+
raise TemplateNotFound(f"cannot find {template}")
|
|
29
|
+
|
|
30
|
+
return self._get_source_with_path(target_path)
|
|
31
|
+
|
|
32
|
+
def _norm_template(self, template: str) -> str:
|
|
33
|
+
if bool(re.match(r"^\.+/", template)):
|
|
34
|
+
raise TemplateNotFound(f"invalid path {template}")
|
|
35
|
+
|
|
36
|
+
template = re.sub(r"^/", "", template)
|
|
37
|
+
template = re.sub(r"\.jinja$", "", template, flags=re.IGNORECASE)
|
|
38
|
+
template = f"{template}.jinja"
|
|
39
|
+
|
|
40
|
+
return template
|
|
41
|
+
|
|
42
|
+
def _get_source_with_path(self, path: Path) -> _LoaderResult:
|
|
43
|
+
mtime = path.stat().st_mtime
|
|
44
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
45
|
+
source = f.read()
|
|
46
|
+
|
|
47
|
+
def is_updated() -> bool:
|
|
48
|
+
return mtime == path.stat().st_mtime
|
|
49
|
+
|
|
50
|
+
return source, path, is_updated
|
epub_generator/types.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Moskize91
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: epub-generator
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary:
|
|
5
|
+
License: MIT
|
|
6
|
+
Author: Tao Zeyu
|
|
7
|
+
Author-email: i@taozeyu.com
|
|
8
|
+
Requires-Python: >=3.10,<3.13
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Requires-Dist: jinja2 (>=3.1.5,<4.0.0)
|
|
15
|
+
Requires-Dist: latex2mathml (>=3.77.0,<4.0.0)
|
|
16
|
+
Requires-Dist: matplotlib (>=3.10.1,<3.11.0)
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
|
|
19
|
+
# epub-generator
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
epub_generator/__init__.py,sha256=BTAVO_YqMdArhQQQN1aNsXIiPATpqZzb6OWVuAQyoNI,160
|
|
2
|
+
epub_generator/context.py,sha256=hWauz9eza6b_k0zgjZSA9hhSqYMiRAQ2cZ8H6BIA34k,2626
|
|
3
|
+
epub_generator/gen_asset.py,sha256=52K92RWzioRka2uzU94s1gnL0BqcaFmPrOsKgC77_Nc,3567
|
|
4
|
+
epub_generator/gen_epub.py,sha256=Dvi6AsnQbQ3uOU0fcXfazTM3g9U1mnccWM68VIuJq10,4834
|
|
5
|
+
epub_generator/gen_index.py,sha256=nwyE0pzeVHKKRT0xC7THXuSRsiaiS2mQA84LLjXpkgI,4141
|
|
6
|
+
epub_generator/gen_part.py,sha256=t9XZIqzJdtdxnFZqPGvJpVWqT68jTZNSaQ9T973NEFo,3319
|
|
7
|
+
epub_generator/hash.py,sha256=EUIrpST9vAvSy8-kfPeBqemUfYyNXbZXF90fS1I355E,138
|
|
8
|
+
epub_generator/i18n.py,sha256=zGpM4TpESqMPr57f-WsUlK1UDo3wTdAL6H5o9OhQ6xc,349
|
|
9
|
+
epub_generator/template.py,sha256=GdV3QnypProKFCMH1kBNfdt6wiShygP_-xGnE5EOUwU,1460
|
|
10
|
+
epub_generator/types.py,sha256=zSd6SOytrcjzuOGuRYa49TcnSvGRJmGXO3e7_Im_t9k,170
|
|
11
|
+
epub_generator-0.0.1.dist-info/LICENSE,sha256=9Zt_a4mrzkvR2rc0UbqTgbboIjWuumDFgeQyKos0H2E,1066
|
|
12
|
+
epub_generator-0.0.1.dist-info/METADATA,sha256=9ULGGqSBwFTkXDjWO1rDHXamwlh7-H18_UI4hQvUASU,596
|
|
13
|
+
epub_generator-0.0.1.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
14
|
+
epub_generator-0.0.1.dist-info/RECORD,,
|