epub-generator 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epub_generator/__init__.py +41 -0
- epub_generator/context.py +141 -0
- epub_generator/data/container.xml.jinja +6 -0
- epub_generator/data/content.opf.jinja +68 -0
- epub_generator/data/cover.xhtml.jinja +16 -0
- epub_generator/data/mimetype.jinja +1 -0
- epub_generator/data/nav.xhtml.jinja +43 -0
- epub_generator/data/part.xhtml.jinja +24 -0
- epub_generator/data/style.css.jinja +68 -0
- epub_generator/generation/__init__.py +1 -0
- epub_generator/generation/gen_asset.py +156 -0
- epub_generator/generation/gen_chapter.py +177 -0
- epub_generator/generation/gen_epub.py +198 -0
- epub_generator/generation/gen_nav.py +92 -0
- epub_generator/generation/gen_toc.py +88 -0
- epub_generator/generation/xml_utils.py +31 -0
- epub_generator/html_tag.py +11 -0
- epub_generator/i18n.py +17 -0
- epub_generator/options.py +12 -0
- epub_generator/template.py +52 -0
- epub_generator/types.py +154 -0
- epub_generator-0.1.3.dist-info/LICENSE +21 -0
- epub_generator-0.1.3.dist-info/METADATA +570 -0
- epub_generator-0.1.3.dist-info/RECORD +25 -0
- epub_generator-0.1.3.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from .generation import generate_epub
|
|
2
|
+
from .options import LaTeXRender, TableRender
|
|
3
|
+
from .types import (
|
|
4
|
+
BookMeta,
|
|
5
|
+
Chapter,
|
|
6
|
+
ChapterGetter,
|
|
7
|
+
ContentBlock,
|
|
8
|
+
EpubData,
|
|
9
|
+
Footnote,
|
|
10
|
+
Formula,
|
|
11
|
+
HTMLTag,
|
|
12
|
+
Image,
|
|
13
|
+
Mark,
|
|
14
|
+
Table,
|
|
15
|
+
TextBlock,
|
|
16
|
+
TextKind,
|
|
17
|
+
TocItem,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
# Main API function
|
|
22
|
+
"generate_epub",
|
|
23
|
+
# Options
|
|
24
|
+
"TableRender",
|
|
25
|
+
"LaTeXRender",
|
|
26
|
+
# Data types
|
|
27
|
+
"EpubData",
|
|
28
|
+
"BookMeta",
|
|
29
|
+
"TocItem",
|
|
30
|
+
"Chapter",
|
|
31
|
+
"ChapterGetter",
|
|
32
|
+
"ContentBlock",
|
|
33
|
+
"TextBlock",
|
|
34
|
+
"TextKind",
|
|
35
|
+
"Table",
|
|
36
|
+
"Formula",
|
|
37
|
+
"HTMLTag",
|
|
38
|
+
"Image",
|
|
39
|
+
"Footnote",
|
|
40
|
+
"Mark",
|
|
41
|
+
]
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from hashlib import sha256
|
|
3
|
+
from importlib.resources import files
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import cast
|
|
6
|
+
from zipfile import ZipFile
|
|
7
|
+
|
|
8
|
+
from jinja2 import Environment
|
|
9
|
+
from jinja2 import Template as JinjaTemplate
|
|
10
|
+
|
|
11
|
+
from .options import LaTeXRender, TableRender
|
|
12
|
+
from .template import create_env
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class _AssetNode:
|
|
17
|
+
file_name: str
|
|
18
|
+
media_type: str
|
|
19
|
+
content_hash: str
|
|
20
|
+
|
|
21
|
+
class Context:
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
file: ZipFile,
|
|
25
|
+
template: "Template",
|
|
26
|
+
table_render: TableRender,
|
|
27
|
+
latex_render: LaTeXRender,
|
|
28
|
+
) -> None:
|
|
29
|
+
self._file: ZipFile = file
|
|
30
|
+
self._template: Template = template
|
|
31
|
+
self._table_render: TableRender = table_render
|
|
32
|
+
self._latex_render: LaTeXRender = latex_render
|
|
33
|
+
self._path_to_node: dict[Path, _AssetNode] = {} # source_path -> node
|
|
34
|
+
self._hash_to_node: dict[str, _AssetNode] = {} # content_hash -> node
|
|
35
|
+
self._chapters_with_mathml: set[str] = set() # Track chapters containing MathML
|
|
36
|
+
|
|
37
|
+
@property
|
|
38
|
+
def file(self) -> ZipFile:
|
|
39
|
+
return self._file
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def template(self) -> "Template":
|
|
43
|
+
return self._template
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def table_render(self) -> TableRender:
|
|
47
|
+
return self._table_render
|
|
48
|
+
|
|
49
|
+
@property
|
|
50
|
+
def latex_render(self) -> LaTeXRender:
|
|
51
|
+
return self._latex_render
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def used_files(self) -> list[tuple[str, str]]:
|
|
55
|
+
nodes = list(self._hash_to_node.values())
|
|
56
|
+
nodes.sort(key=lambda node: node.file_name)
|
|
57
|
+
return [(node.file_name, node.media_type) for node in nodes]
|
|
58
|
+
|
|
59
|
+
def mark_chapter_has_mathml(self, chapter_file_name: str) -> None:
|
|
60
|
+
"""Mark a chapter as containing MathML content for EPUB 3.0 manifest properties."""
|
|
61
|
+
self._chapters_with_mathml.add(chapter_file_name)
|
|
62
|
+
|
|
63
|
+
def chapter_has_mathml(self, chapter_file_name: str) -> bool:
|
|
64
|
+
"""Check if a chapter contains MathML content."""
|
|
65
|
+
return chapter_file_name in self._chapters_with_mathml
|
|
66
|
+
|
|
67
|
+
def use_asset(
|
|
68
|
+
self,
|
|
69
|
+
source_path: Path,
|
|
70
|
+
media_type: str,
|
|
71
|
+
file_ext: str,
|
|
72
|
+
) -> str:
|
|
73
|
+
if source_path in self._path_to_node:
|
|
74
|
+
return self._path_to_node[source_path].file_name
|
|
75
|
+
|
|
76
|
+
if not source_path.exists():
|
|
77
|
+
raise FileNotFoundError(f"Asset file not found: {source_path}")
|
|
78
|
+
|
|
79
|
+
with open(source_path, "rb") as f:
|
|
80
|
+
content = f.read()
|
|
81
|
+
content_hash = _sha256_hash(content)
|
|
82
|
+
|
|
83
|
+
if content_hash in self._hash_to_node:
|
|
84
|
+
node = self._hash_to_node[content_hash]
|
|
85
|
+
self._path_to_node[source_path] = node
|
|
86
|
+
return node.file_name
|
|
87
|
+
|
|
88
|
+
file_name = f"{content_hash}{file_ext}"
|
|
89
|
+
node = _AssetNode(
|
|
90
|
+
file_name=file_name,
|
|
91
|
+
media_type=media_type,
|
|
92
|
+
content_hash=content_hash,
|
|
93
|
+
)
|
|
94
|
+
self._path_to_node[source_path] = node
|
|
95
|
+
self._hash_to_node[content_hash] = node
|
|
96
|
+
self._file.write(
|
|
97
|
+
filename=source_path,
|
|
98
|
+
arcname="OEBPS/assets/" + file_name,
|
|
99
|
+
)
|
|
100
|
+
return file_name
|
|
101
|
+
|
|
102
|
+
def add_asset(self, data: bytes, media_type: str, file_ext: str) -> str:
|
|
103
|
+
content_hash = _sha256_hash(data)
|
|
104
|
+
if content_hash in self._hash_to_node:
|
|
105
|
+
return self._hash_to_node[content_hash].file_name
|
|
106
|
+
|
|
107
|
+
file_name = f"{content_hash}{file_ext}"
|
|
108
|
+
node = _AssetNode(
|
|
109
|
+
file_name=file_name,
|
|
110
|
+
media_type=media_type,
|
|
111
|
+
content_hash=content_hash,
|
|
112
|
+
)
|
|
113
|
+
self._hash_to_node[content_hash] = node
|
|
114
|
+
|
|
115
|
+
self._file.writestr(
|
|
116
|
+
zinfo_or_arcname="OEBPS/assets/" + file_name,
|
|
117
|
+
data=data,
|
|
118
|
+
)
|
|
119
|
+
return file_name
|
|
120
|
+
|
|
121
|
+
class Template:
|
|
122
|
+
def __init__(self):
|
|
123
|
+
templates_path = cast(Path, files("epub_generator")) / "data"
|
|
124
|
+
self._env: Environment = create_env(templates_path)
|
|
125
|
+
self._templates: dict[str, JinjaTemplate] = {}
|
|
126
|
+
|
|
127
|
+
def render(self, template: str, **params) -> str:
|
|
128
|
+
jinja_template: JinjaTemplate = self._template(template)
|
|
129
|
+
return jinja_template.render(**params)
|
|
130
|
+
|
|
131
|
+
def _template(self, name: str) -> JinjaTemplate:
|
|
132
|
+
template = self._templates.get(name, None)
|
|
133
|
+
if template is None:
|
|
134
|
+
template = self._env.get_template(name)
|
|
135
|
+
self._templates[name] = template
|
|
136
|
+
return template
|
|
137
|
+
|
|
138
|
+
def _sha256_hash(data: bytes) -> str:
|
|
139
|
+
hash256 = sha256()
|
|
140
|
+
hash256.update(data)
|
|
141
|
+
return hash256.hexdigest()
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="utf-8"?>
|
|
2
|
+
<package version="3.0" xmlns="http://www.idpf.org/2007/opf" unique-identifier="uid" xml:lang="zh">
|
|
3
|
+
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
|
|
4
|
+
<dc:identifier id="uid">{{ ISBN }}</dc:identifier>
|
|
5
|
+
<dc:language>zh</dc:language>
|
|
6
|
+
{% if meta and meta.title %}
|
|
7
|
+
<dc:title>{{ meta.title }}</dc:title>
|
|
8
|
+
{% else %}
|
|
9
|
+
<dc:title>{{ i18n.unnamed }}</dc:title>
|
|
10
|
+
{% endif %}
|
|
11
|
+
{% if meta and meta.description %}
|
|
12
|
+
<dc:description>{{ meta.description }}</dc:description>
|
|
13
|
+
{% endif %}
|
|
14
|
+
{% if meta and meta.publisher %}
|
|
15
|
+
<dc:publisher>{{ meta.publisher }}</dc:publisher>
|
|
16
|
+
{% endif %}
|
|
17
|
+
{% if meta and meta.authors %}
|
|
18
|
+
{% for author in meta.authors %}
|
|
19
|
+
<dc:creator id="creator{{ loop.index }}">{{ author }}</dc:creator>
|
|
20
|
+
<meta refines="#creator{{ loop.index }}" property="role" scheme="marc:relators">aut</meta>
|
|
21
|
+
{% endfor %}
|
|
22
|
+
{% endif %}
|
|
23
|
+
{% if meta and meta.editors %}
|
|
24
|
+
{% for editor in meta.editors %}
|
|
25
|
+
<dc:creator id="editor{{ loop.index }}">{{ editor }}</dc:creator>
|
|
26
|
+
<meta refines="#editor{{ loop.index }}" property="role" scheme="marc:relators">edt</meta>
|
|
27
|
+
{% endfor %}
|
|
28
|
+
{% endif %}
|
|
29
|
+
{% if meta and meta.translators %}
|
|
30
|
+
{% for translator in meta.translators %}
|
|
31
|
+
<dc:creator id="translator{{ loop.index }}">{{ translator }}</dc:creator>
|
|
32
|
+
<meta refines="#translator{{ loop.index }}" property="role" scheme="marc:relators">trl</meta>
|
|
33
|
+
{% endfor %}
|
|
34
|
+
{% endif %}
|
|
35
|
+
<meta property="dcterms:modified">{{ modified_timestamp }}</meta>
|
|
36
|
+
{% if has_cover %}
|
|
37
|
+
<meta name="cover" content="cover-image" />
|
|
38
|
+
{% endif %}
|
|
39
|
+
</metadata>
|
|
40
|
+
<manifest>
|
|
41
|
+
<item id="nav" properties="nav" media-type="application/xhtml+xml" href="nav.xhtml" />
|
|
42
|
+
<item id="a_css" media-type="text/css" href="styles/style.css" />
|
|
43
|
+
{% for asset_file, media_type in asset_files %}
|
|
44
|
+
<item id="a_{{ asset_file|safe }}" media-type="{{ media_type|safe }}" href="assets/{{ asset_file|safe }}" />
|
|
45
|
+
{% endfor %}
|
|
46
|
+
{% if has_cover %}
|
|
47
|
+
<item id="cover-image" properties="cover-image" media-type="image/png" href="assets/cover.png" />
|
|
48
|
+
<item id="x_cover.xhtml" media-type="application/xhtml+xml" href="Text/cover.xhtml" />
|
|
49
|
+
{% endif %}
|
|
50
|
+
{% if has_head_chapter %}
|
|
51
|
+
<item id="x_head.xhtml" media-type="application/xhtml+xml" href="Text/head.xhtml" />
|
|
52
|
+
{% endif %}
|
|
53
|
+
{% for nav_point in nav_points %}
|
|
54
|
+
<item id="x_{{ nav_point.file_name|safe }}" media-type="application/xhtml+xml" href="Text/{{ nav_point.file_name|safe }}"{% if nav_point.file_name in chapters_with_mathml %} properties="mathml"{% endif %} />
|
|
55
|
+
{% endfor %}
|
|
56
|
+
</manifest>
|
|
57
|
+
<spine>
|
|
58
|
+
{% if has_cover %}
|
|
59
|
+
<itemref idref="x_cover.xhtml" linear="no" />
|
|
60
|
+
{% endif %}
|
|
61
|
+
{% if has_head_chapter %}
|
|
62
|
+
<itemref idref="x_head.xhtml" />
|
|
63
|
+
{% endif %}
|
|
64
|
+
{% for nav_point in nav_points %}
|
|
65
|
+
<itemref idref="x_{{ nav_point.file_name|safe }}" />
|
|
66
|
+
{% endfor %}
|
|
67
|
+
</spine>
|
|
68
|
+
</package>
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="utf-8"?>
|
|
2
|
+
<!DOCTYPE html>
|
|
3
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" xml:lang="zh" lang="zh">
|
|
4
|
+
|
|
5
|
+
<head>
|
|
6
|
+
<meta charset="utf-8"/>
|
|
7
|
+
<title>{{ i18n.cover }}</title>
|
|
8
|
+
</head>
|
|
9
|
+
|
|
10
|
+
<body>
|
|
11
|
+
<p>
|
|
12
|
+
<img src="../assets/cover.png" alt="Cover" />
|
|
13
|
+
</p>
|
|
14
|
+
</body>
|
|
15
|
+
|
|
16
|
+
</html>
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
application/epub+zip
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
2
|
+
<!DOCTYPE html>
|
|
3
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" xml:lang="zh" lang="zh">
|
|
4
|
+
<head>
|
|
5
|
+
<meta charset="utf-8"/>
|
|
6
|
+
<title>{{ i18n.table_of_contents }}</title>
|
|
7
|
+
<link href="styles/style.css" rel="stylesheet" type="text/css"/>
|
|
8
|
+
</head>
|
|
9
|
+
<body>
|
|
10
|
+
<nav epub:type="toc" id="toc" role="doc-toc">
|
|
11
|
+
<h1>{{ i18n.table_of_contents }}</h1>
|
|
12
|
+
<ol>
|
|
13
|
+
{% if has_cover %}
|
|
14
|
+
<li>
|
|
15
|
+
<a href="Text/cover.xhtml">{{ i18n.cover }}</a>
|
|
16
|
+
</li>
|
|
17
|
+
{% endif %}
|
|
18
|
+
{% if has_head_chapter %}
|
|
19
|
+
<li>
|
|
20
|
+
<a href="Text/head.xhtml">{{ head_chapter_title }}</a>
|
|
21
|
+
</li>
|
|
22
|
+
{% endif %}
|
|
23
|
+
{{ toc_list|safe }}
|
|
24
|
+
</ol>
|
|
25
|
+
</nav>
|
|
26
|
+
|
|
27
|
+
<nav epub:type="landmarks" id="landmarks" hidden="hidden">
|
|
28
|
+
<h2>{{ i18n.landmarks }}</h2>
|
|
29
|
+
<ol>
|
|
30
|
+
{% if has_cover %}
|
|
31
|
+
<li>
|
|
32
|
+
<a epub:type="cover" href="Text/cover.xhtml">{{ i18n.cover }}</a>
|
|
33
|
+
</li>
|
|
34
|
+
{% endif %}
|
|
35
|
+
{% if first_chapter_file %}
|
|
36
|
+
<li>
|
|
37
|
+
<a epub:type="bodymatter" href="Text/{{ first_chapter_file }}">{{ i18n.start_of_content }}</a>
|
|
38
|
+
</li>
|
|
39
|
+
{% endif %}
|
|
40
|
+
</ol>
|
|
41
|
+
</nav>
|
|
42
|
+
</body>
|
|
43
|
+
</html>
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
<?xml version="1.0" encoding="utf-8"?>
|
|
2
|
+
<!DOCTYPE html>
|
|
3
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" xml:lang="zh" lang="zh">
|
|
4
|
+
|
|
5
|
+
<head>
|
|
6
|
+
<meta charset="utf-8"/>
|
|
7
|
+
<title>Chapter</title>
|
|
8
|
+
<link href="../styles/style.css" rel="stylesheet" type="text/css"/>
|
|
9
|
+
</head>
|
|
10
|
+
|
|
11
|
+
<body>
|
|
12
|
+
{% for item in content %}
|
|
13
|
+
{{ item|safe }}
|
|
14
|
+
{% endfor %}
|
|
15
|
+
{% if citations %}
|
|
16
|
+
<section epub:type="footnotes" role="doc-endnotes">
|
|
17
|
+
{% for item in citations %}
|
|
18
|
+
{{ item|safe }}
|
|
19
|
+
{% endfor %}
|
|
20
|
+
</section>
|
|
21
|
+
{% endif %}
|
|
22
|
+
</body>
|
|
23
|
+
|
|
24
|
+
</html>
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
blockquote {
|
|
2
|
+
border-left: 0.15em solid #c7c7c7;
|
|
3
|
+
margin: 0.75em;
|
|
4
|
+
padding: 1em;
|
|
5
|
+
text-indent:2em;
|
|
6
|
+
line-height:1.5em;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
a.super {
|
|
10
|
+
font-size: 0.75em;
|
|
11
|
+
vertical-align: super;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
a.citation {
|
|
15
|
+
font-size: 0.75em;
|
|
16
|
+
font-weight: bold;
|
|
17
|
+
vertical-align: super;
|
|
18
|
+
padding-right: 0.55em;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
div.citation {
|
|
22
|
+
margin-top: 0.5em;
|
|
23
|
+
margin-left: 0.75em;
|
|
24
|
+
margin-bottom: 1.5em;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
div.alt-wrapper {
|
|
28
|
+
display: flex;
|
|
29
|
+
flex-direction: column;
|
|
30
|
+
align-items: center;
|
|
31
|
+
padding-top: 0.8em;
|
|
32
|
+
padding-bottom: 3em;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
div.alt-wrapper table {
|
|
36
|
+
width: 100%;
|
|
37
|
+
table-layout: fixed;
|
|
38
|
+
border-collapse: collapse;
|
|
39
|
+
border-spacing: 0;
|
|
40
|
+
margin: 0;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
div.alt-wrapper th,
|
|
44
|
+
.alt-wrapper td {
|
|
45
|
+
padding: 8px 12px;
|
|
46
|
+
border: 1px solid #e1e4e8;
|
|
47
|
+
text-align: left;
|
|
48
|
+
font-size: 14px;
|
|
49
|
+
line-height: 1.5;
|
|
50
|
+
word-wrap: break-word;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
div.alt-wrapper th {
|
|
54
|
+
background-color: #f6f8fa;
|
|
55
|
+
font-weight: 600;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
span.formula-inline {
|
|
59
|
+
display: inline;
|
|
60
|
+
vertical-align: middle;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
span.formula-inline img {
|
|
64
|
+
display: inline-block;
|
|
65
|
+
vertical-align: middle;
|
|
66
|
+
margin: 0 0.2em;
|
|
67
|
+
max-height: 1.2em;
|
|
68
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .gen_epub import generate_epub
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import re
|
|
3
|
+
from typing import Any, cast
|
|
4
|
+
from xml.etree.ElementTree import Element, fromstring
|
|
5
|
+
|
|
6
|
+
import matplotlib.pyplot as plt
|
|
7
|
+
from latex2mathml.converter import convert
|
|
8
|
+
|
|
9
|
+
from ..context import Context
|
|
10
|
+
from ..options import LaTeXRender, TableRender
|
|
11
|
+
from ..types import Formula, Image, Table
|
|
12
|
+
|
|
13
|
+
_MEDIA_TYPE_MAP = {
|
|
14
|
+
".png": "image/png",
|
|
15
|
+
".jpg": "image/jpeg",
|
|
16
|
+
".jpeg": "image/jpeg",
|
|
17
|
+
".gif": "image/gif",
|
|
18
|
+
".svg": "image/svg+xml",
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
def process_table(context: Context, table: Table) -> Element | None:
|
|
22
|
+
if context.table_render == TableRender.CLIPPING:
|
|
23
|
+
return None
|
|
24
|
+
try:
|
|
25
|
+
wrapped_html = f"<div>{table.html_content}</div>"
|
|
26
|
+
parsed = fromstring(wrapped_html)
|
|
27
|
+
wrapper = Element("div", attrib={"class": "alt-wrapper"})
|
|
28
|
+
|
|
29
|
+
for child in parsed:
|
|
30
|
+
wrapper.append(child)
|
|
31
|
+
|
|
32
|
+
return wrapper if len(wrapper) > 0 else None
|
|
33
|
+
except Exception:
|
|
34
|
+
return None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def process_formula(
|
|
38
|
+
context: Context,
|
|
39
|
+
formula: Formula,
|
|
40
|
+
inline_mode: bool,
|
|
41
|
+
) -> Element | None:
|
|
42
|
+
|
|
43
|
+
if context.latex_render == LaTeXRender.CLIPPING:
|
|
44
|
+
return None
|
|
45
|
+
|
|
46
|
+
latex_expr = _normalize_expression(formula.latex_expression)
|
|
47
|
+
if not latex_expr:
|
|
48
|
+
return None
|
|
49
|
+
|
|
50
|
+
if context.latex_render == LaTeXRender.MATHML:
|
|
51
|
+
return _latex2mathml(
|
|
52
|
+
latex=latex_expr,
|
|
53
|
+
inline_mode=inline_mode,
|
|
54
|
+
)
|
|
55
|
+
elif context.latex_render == LaTeXRender.SVG:
|
|
56
|
+
svg_image = _latex_formula2svg(latex_expr)
|
|
57
|
+
if svg_image is None:
|
|
58
|
+
return None
|
|
59
|
+
file_name = context.add_asset(
|
|
60
|
+
data=svg_image,
|
|
61
|
+
media_type="image/svg+xml",
|
|
62
|
+
file_ext=".svg",
|
|
63
|
+
)
|
|
64
|
+
img_element = Element("img")
|
|
65
|
+
img_element.set("src", f"../assets/{file_name}")
|
|
66
|
+
img_element.set("alt", "formula")
|
|
67
|
+
|
|
68
|
+
if inline_mode:
|
|
69
|
+
wrapper = Element("span", attrib={"class": "formula-inline"})
|
|
70
|
+
else:
|
|
71
|
+
wrapper = Element("div", attrib={"class": "alt-wrapper"})
|
|
72
|
+
|
|
73
|
+
wrapper.append(img_element)
|
|
74
|
+
return wrapper
|
|
75
|
+
|
|
76
|
+
return None
|
|
77
|
+
|
|
78
|
+
def process_image(context: Context, image: Image) -> Element | None:
|
|
79
|
+
file_ext = image.path.suffix or ".png"
|
|
80
|
+
file_name = context.use_asset(
|
|
81
|
+
source_path=image.path,
|
|
82
|
+
media_type=_MEDIA_TYPE_MAP.get(file_ext.lower(), "image/png"),
|
|
83
|
+
file_ext=file_ext,
|
|
84
|
+
)
|
|
85
|
+
img_element = Element("img")
|
|
86
|
+
img_element.set("src", f"../assets/{file_name}")
|
|
87
|
+
img_element.set("alt", image.alt_text)
|
|
88
|
+
|
|
89
|
+
wrapper = Element("div", attrib={"class": "alt-wrapper"})
|
|
90
|
+
wrapper.append(img_element)
|
|
91
|
+
return wrapper
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
_ESCAPE_UNICODE_PATTERN = re.compile(r"&#x([0-9A-Fa-f]{5});")
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _latex2mathml(latex: str, inline_mode: bool) -> None | Element:
|
|
98
|
+
try:
|
|
99
|
+
html_latex = convert(
|
|
100
|
+
latex=latex,
|
|
101
|
+
display="inline" if inline_mode else "block",
|
|
102
|
+
)
|
|
103
|
+
except Exception:
|
|
104
|
+
return None
|
|
105
|
+
|
|
106
|
+
# latex2mathml 转义会带上一个奇怪的 `&` 前缀,这显然是多余的
|
|
107
|
+
# 不得已,在这里用正则表达式处理以修正这个错误
|
|
108
|
+
def repl(match):
|
|
109
|
+
hex_code = match.group(1)
|
|
110
|
+
char = chr(int(hex_code, 16))
|
|
111
|
+
if char == "<":
|
|
112
|
+
return "<"
|
|
113
|
+
elif char == ">":
|
|
114
|
+
return ">"
|
|
115
|
+
else:
|
|
116
|
+
return char
|
|
117
|
+
|
|
118
|
+
mathml = re.sub(
|
|
119
|
+
pattern=_ESCAPE_UNICODE_PATTERN,
|
|
120
|
+
repl=repl,
|
|
121
|
+
string=html_latex,
|
|
122
|
+
)
|
|
123
|
+
try:
|
|
124
|
+
return fromstring(mathml)
|
|
125
|
+
except Exception:
|
|
126
|
+
return None
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _latex_formula2svg(latex: str, font_size: int = 12):
|
|
130
|
+
# from https://www.cnblogs.com/qizhou/p/18170083
|
|
131
|
+
try:
|
|
132
|
+
output = io.BytesIO()
|
|
133
|
+
plt.rc("text", usetex=True)
|
|
134
|
+
plt.rc("font", size=font_size)
|
|
135
|
+
fig, ax = plt.subplots()
|
|
136
|
+
txt = ax.text(0.5, 0.5, f"${latex}$", ha="center", va="center", transform=ax.transAxes)
|
|
137
|
+
ax.axis("off")
|
|
138
|
+
fig.canvas.draw()
|
|
139
|
+
bbox = txt.get_window_extent(cast(Any, fig.canvas).get_renderer())
|
|
140
|
+
fig.set_size_inches(bbox.width / fig.dpi, bbox.height / fig.dpi)
|
|
141
|
+
plt.savefig(
|
|
142
|
+
output,
|
|
143
|
+
format="svg",
|
|
144
|
+
transparent=True,
|
|
145
|
+
bbox_inches="tight",
|
|
146
|
+
pad_inches=0,
|
|
147
|
+
)
|
|
148
|
+
return output.getvalue()
|
|
149
|
+
except Exception:
|
|
150
|
+
return None
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _normalize_expression(expression: str) -> str:
|
|
154
|
+
expression = expression.replace("\n", "")
|
|
155
|
+
expression = expression.strip()
|
|
156
|
+
return expression
|