fancydocx 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fancydocx/__init__.py +155 -0
- fancydocx/__main__.py +5 -0
- fancydocx/cli.py +117 -0
- fancydocx/color.py +128 -0
- fancydocx/core.py +201 -0
- fancydocx/fontmetrics.py +444 -0
- fancydocx/numbering.py +167 -0
- fancydocx/package.py +234 -0
- fancydocx/render.py +1466 -0
- fancydocx/styles.py +572 -0
- fancydocx/theme.py +113 -0
- fancydocx-0.1.0.dist-info/METADATA +90 -0
- fancydocx-0.1.0.dist-info/RECORD +17 -0
- fancydocx-0.1.0.dist-info/WHEEL +5 -0
- fancydocx-0.1.0.dist-info/entry_points.txt +2 -0
- fancydocx-0.1.0.dist-info/licenses/LICENSE +21 -0
- fancydocx-0.1.0.dist-info/top_level.txt +1 -0
fancydocx/__init__.py
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
"""
|
|
2
|
+
fancydocx - pure-Python DOCX -> single self-contained HTML converter.
|
|
3
|
+
|
|
4
|
+
import fancydocx
|
|
5
|
+
fancydocx.convert("resume.docx", "resume.html") # write a file
|
|
6
|
+
html = fancydocx.convert("resume.docx") # or get the HTML string
|
|
7
|
+
|
|
8
|
+
No external engines, no LibreOffice, no network. Images are inlined as data
|
|
9
|
+
URIs and embedded fonts are recovered as @font-face, so the output is one
|
|
10
|
+
portable .html file.
|
|
11
|
+
"""
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
import html as _html
|
|
14
|
+
import pathlib
|
|
15
|
+
|
|
16
|
+
from .core import local
|
|
17
|
+
from .package import DocxPackage
|
|
18
|
+
from .theme import Theme
|
|
19
|
+
from .styles import Styles, rpr_to_css, ppr_to_css, line_height_css
|
|
20
|
+
from .numbering import Numbering
|
|
21
|
+
from .render import Converter
|
|
22
|
+
from .fontmetrics import embed_css_for_families
|
|
23
|
+
|
|
24
|
+
__version__ = "0.1.0"
|
|
25
|
+
__all__ = ["convert", "convert_docx", "convert_file", "DocxPackage", "__version__"]
|
|
26
|
+
|
|
27
|
+
BASE_CSS = """
|
|
28
|
+
*{box-sizing:border-box}
|
|
29
|
+
html,body{margin:0;padding:0}
|
|
30
|
+
body{background:#e9e9ee;color:#000;-webkit-print-color-adjust:exact;print-color-adjust:exact;
|
|
31
|
+
text-rendering:geometricPrecision}
|
|
32
|
+
.docx-doc{padding:24px 12px}
|
|
33
|
+
/* isolation:isolate makes each page its own stacking context, so that
|
|
34
|
+
z-index:-1 layers (header/footer art, behindDoc shapes) paint ABOVE the
|
|
35
|
+
page's own background but BELOW in-flow content -- exactly Word's
|
|
36
|
+
page-color / behind-text / text layering. Without it, negative z-index
|
|
37
|
+
children fall behind the page background and vanish. */
|
|
38
|
+
.docx-page{position:relative;background:#fff;margin:0 auto 24px;
|
|
39
|
+
box-shadow:0 2px 14px rgba(0,0,0,.28);overflow:hidden;isolation:isolate}
|
|
40
|
+
/* .docx-body is intentionally NOT positioned so absolutely-positioned floats
|
|
41
|
+
(anchored images/shapes) resolve against the .docx-page box = true page
|
|
42
|
+
coordinates, matching Word's page-relative anchoring. */
|
|
43
|
+
.docx-page p{margin:0}
|
|
44
|
+
.docx-page table{border-spacing:0;max-width:none;border-collapse:collapse}
|
|
45
|
+
.docx-page td,.docx-page th{vertical-align:top}
|
|
46
|
+
.docx-page img{max-width:none}
|
|
47
|
+
.docx-page a{color:inherit;text-decoration:inherit}
|
|
48
|
+
.leader{flex:1 1 auto;align-self:flex-end;border-bottom:1px dotted currentColor;margin:0 4px 3px}
|
|
49
|
+
.tab{display:inline-block;min-width:2em}
|
|
50
|
+
.docx-header,.docx-footer{pointer-events:none}
|
|
51
|
+
@media print{
|
|
52
|
+
html,body{background:#fff}
|
|
53
|
+
.docx-doc{padding:0}
|
|
54
|
+
.docx-page{box-shadow:none;margin:0;page-break-after:always}
|
|
55
|
+
@page{margin:0}
|
|
56
|
+
}
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _title(pkg, path):
|
|
61
|
+
core = pkg.xml("docProps/core.xml")
|
|
62
|
+
if core is not None:
|
|
63
|
+
for el in core.iter():
|
|
64
|
+
if local(el.tag) == "title" and el.text:
|
|
65
|
+
return el.text.strip()
|
|
66
|
+
return pathlib.Path(str(path)).stem
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _body_rule(styles, theme):
|
|
70
|
+
"""Default inherited run/paragraph look, applied to .docx-body."""
|
|
71
|
+
rpr = styles.effective_rpr(None, None, {})
|
|
72
|
+
ppr = styles.effective_ppr(None, {})
|
|
73
|
+
d = rpr_to_css(rpr, theme)
|
|
74
|
+
out = {}
|
|
75
|
+
for k in ("font-family", "font-size", "color"):
|
|
76
|
+
if k in d:
|
|
77
|
+
out[k] = d[k]
|
|
78
|
+
# Word single spacing is font-metric based (see fontmetrics.py); the
|
|
79
|
+
# numeric factor keeps the geometry even under font substitution.
|
|
80
|
+
out["line-height"] = line_height_css(ppr.get("spacing"),
|
|
81
|
+
rpr.get("font"), rpr.get("sz") or 11.0)
|
|
82
|
+
out.setdefault("font-family", "'Calibri', 'Segoe UI', sans-serif")
|
|
83
|
+
out.setdefault("font-size", "11pt")
|
|
84
|
+
out["word-wrap"] = "break-word"
|
|
85
|
+
return ".docx-body{%s}" % ";".join("%s:%s" % (k, v) for k, v in out.items())
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def convert_docx(path, include_headers=True, embed_fonts=False):
|
|
89
|
+
"""
|
|
90
|
+
Convert a .docx file to a single self-contained HTML string.
|
|
91
|
+
|
|
92
|
+
embed_fonts: additionally inline every referenced font family found on
|
|
93
|
+
THIS machine as base64 @font-face. This makes the HTML render with the
|
|
94
|
+
exact intended glyph metrics on any viewer, at the cost of several MB
|
|
95
|
+
per file -- off by default for batch conversions.
|
|
96
|
+
"""
|
|
97
|
+
pkg = DocxPackage(path)
|
|
98
|
+
try:
|
|
99
|
+
theme = Theme(pkg)
|
|
100
|
+
styles = Styles(pkg, theme)
|
|
101
|
+
numbering = Numbering(pkg, theme)
|
|
102
|
+
conv = Converter(pkg, theme, styles, numbering, include_headers=include_headers)
|
|
103
|
+
body = conv.render_document()
|
|
104
|
+
font_css, doc_families = pkg.font_face_css_and_families()
|
|
105
|
+
if embed_fonts:
|
|
106
|
+
local_css = embed_css_for_families(conv.used_fonts, already_embedded=doc_families)
|
|
107
|
+
if local_css:
|
|
108
|
+
font_css = font_css + "\n" + local_css if font_css else local_css
|
|
109
|
+
body_rule = _body_rule(styles, theme)
|
|
110
|
+
title = _title(pkg, path)
|
|
111
|
+
finally:
|
|
112
|
+
pkg.close()
|
|
113
|
+
|
|
114
|
+
return (
|
|
115
|
+
"<!doctype html>\n<html lang=\"en\">\n<head>\n<meta charset=\"utf-8\">\n"
|
|
116
|
+
"<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\">\n"
|
|
117
|
+
"<title>%s</title>\n<style>\n%s\n%s\n%s\n</style>\n</head>\n<body>\n"
|
|
118
|
+
"<div class=\"docx-doc\">%s</div>\n</body>\n</html>\n"
|
|
119
|
+
% (_html.escape(title), BASE_CSS, body_rule, font_css, body)
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def convert_file(in_path, out_path, include_headers=True, embed_fonts=False):
|
|
124
|
+
"""Convert one .docx to one .html on disk. Returns the output path."""
|
|
125
|
+
result = convert_docx(in_path, include_headers=include_headers,
|
|
126
|
+
embed_fonts=embed_fonts)
|
|
127
|
+
out = pathlib.Path(out_path)
|
|
128
|
+
out.parent.mkdir(parents=True, exist_ok=True)
|
|
129
|
+
out.write_text(result, encoding="utf-8")
|
|
130
|
+
return str(out)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def convert(source, output=None, *, embed_fonts=False, include_headers=True):
|
|
134
|
+
"""
|
|
135
|
+
One-line entry point.
|
|
136
|
+
|
|
137
|
+
import fancydocx
|
|
138
|
+
fancydocx.convert("resume.docx", "resume.html") # write the file, returns path
|
|
139
|
+
html = fancydocx.convert("resume.docx") # no output -> returns HTML str
|
|
140
|
+
|
|
141
|
+
Parameters
|
|
142
|
+
----------
|
|
143
|
+
source : str | os.PathLike
|
|
144
|
+
Path to the input .docx file.
|
|
145
|
+
output : str | os.PathLike | None
|
|
146
|
+
Where to write the HTML. If None, the HTML is returned as a string.
|
|
147
|
+
embed_fonts : bool
|
|
148
|
+
Inline locally-installed referenced fonts as base64 @font-face
|
|
149
|
+
(exact metrics on any viewer, at the cost of file size).
|
|
150
|
+
include_headers : bool
|
|
151
|
+
Render document headers/footers (default True).
|
|
152
|
+
"""
|
|
153
|
+
if output is None:
|
|
154
|
+
return convert_docx(source, include_headers=include_headers, embed_fonts=embed_fonts)
|
|
155
|
+
return convert_file(source, output, include_headers=include_headers, embed_fonts=embed_fonts)
|
fancydocx/__main__.py
ADDED
fancydocx/cli.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Command-line interface for fancydocx, exposed as the ``fancydocx`` command
|
|
3
|
+
(and ``python -m fancydocx``).
|
|
4
|
+
|
|
5
|
+
Single file:
|
|
6
|
+
fancydocx resume.docx -> resume.html (next to input)
|
|
7
|
+
fancydocx resume.docx -o out.html
|
|
8
|
+
|
|
9
|
+
Whole folder (recursive), mirroring the tree into an output dir:
|
|
10
|
+
fancydocx ./docs -o ./html
|
|
11
|
+
fancydocx ./docs -o ./html --workers 8
|
|
12
|
+
"""
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
import argparse
|
|
15
|
+
import concurrent.futures as cf
|
|
16
|
+
import sys
|
|
17
|
+
import time
|
|
18
|
+
import traceback
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
from . import __version__, convert_file
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _iter_docx(root, pattern):
|
|
25
|
+
for p in sorted(Path(root).rglob(pattern)):
|
|
26
|
+
# Skip Word lock/temp files like ~$name.docx
|
|
27
|
+
if p.name.startswith("~$"):
|
|
28
|
+
continue
|
|
29
|
+
if p.is_file():
|
|
30
|
+
yield p
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _one(in_path, out_path, include_headers, embed_fonts=False):
|
|
34
|
+
t0 = time.perf_counter()
|
|
35
|
+
try:
|
|
36
|
+
convert_file(in_path, out_path, include_headers=include_headers,
|
|
37
|
+
embed_fonts=embed_fonts)
|
|
38
|
+
return (in_path, out_path, None, time.perf_counter() - t0)
|
|
39
|
+
except Exception as e:
|
|
40
|
+
return (in_path, out_path,
|
|
41
|
+
"".join(traceback.format_exception_only(type(e), e)).strip(),
|
|
42
|
+
time.perf_counter() - t0)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def main(argv=None):
|
|
46
|
+
ap = argparse.ArgumentParser(
|
|
47
|
+
prog="fancydocx",
|
|
48
|
+
description="Convert fancy .docx files to a single self-contained HTML file.")
|
|
49
|
+
ap.add_argument("input", help="A .docx file or a folder containing .docx files")
|
|
50
|
+
ap.add_argument("-o", "--output", help="Output .html file (single) or output folder (batch)")
|
|
51
|
+
ap.add_argument("--glob", default="*.docx", help="Glob for batch mode (default: *.docx)")
|
|
52
|
+
ap.add_argument("--workers", type=int, default=1,
|
|
53
|
+
help="Parallel worker processes for batch mode (default: 1)")
|
|
54
|
+
ap.add_argument("--no-headers", action="store_true", help="Skip header/footer rendering")
|
|
55
|
+
ap.add_argument("--embed-fonts", action="store_true",
|
|
56
|
+
help="Inline locally-installed referenced fonts as @font-face "
|
|
57
|
+
"(exact metrics everywhere, but several MB per file)")
|
|
58
|
+
ap.add_argument("--quiet", action="store_true", help="Only print a final summary")
|
|
59
|
+
ap.add_argument("--version", action="version", version="fancydocx %s" % __version__)
|
|
60
|
+
args = ap.parse_args(argv)
|
|
61
|
+
|
|
62
|
+
include_headers = not args.no_headers
|
|
63
|
+
inp = Path(args.input)
|
|
64
|
+
if not inp.exists():
|
|
65
|
+
ap.error("input not found: %s" % inp)
|
|
66
|
+
|
|
67
|
+
# ---- single file -------------------------------------------------
|
|
68
|
+
if inp.is_file():
|
|
69
|
+
out = Path(args.output) if args.output else inp.with_suffix(".html")
|
|
70
|
+
in_p, out_p, err, dt = _one(inp, out, include_headers, args.embed_fonts)
|
|
71
|
+
if err:
|
|
72
|
+
print("FAILED %s\n %s" % (in_p, err), file=sys.stderr)
|
|
73
|
+
return 1
|
|
74
|
+
print("OK %s -> %s (%.2fs)" % (in_p, out_p, dt))
|
|
75
|
+
return 0
|
|
76
|
+
|
|
77
|
+
# ---- batch folder ------------------------------------------------
|
|
78
|
+
out_dir = Path(args.output) if args.output else inp / "_html"
|
|
79
|
+
files = list(_iter_docx(inp, args.glob))
|
|
80
|
+
if not files:
|
|
81
|
+
print("No files matching %r under %s" % (args.glob, inp))
|
|
82
|
+
return 0
|
|
83
|
+
|
|
84
|
+
jobs = [(f, out_dir / f.relative_to(inp).with_suffix(".html")) for f in files]
|
|
85
|
+
ok = fail = 0
|
|
86
|
+
total = len(jobs)
|
|
87
|
+
started = time.perf_counter()
|
|
88
|
+
print("Converting %d file(s) -> %s (workers=%d)" % (total, out_dir, args.workers))
|
|
89
|
+
|
|
90
|
+
def report(res, i):
|
|
91
|
+
nonlocal ok, fail
|
|
92
|
+
in_p, out_p, err, dt = res
|
|
93
|
+
if err:
|
|
94
|
+
fail += 1
|
|
95
|
+
print("[%d/%d] FAILED %s\n %s" % (i, total, in_p, err), file=sys.stderr)
|
|
96
|
+
else:
|
|
97
|
+
ok += 1
|
|
98
|
+
if not args.quiet:
|
|
99
|
+
print("[%d/%d] %s -> %s (%.2fs)" % (i, total, in_p.name, out_p, dt))
|
|
100
|
+
|
|
101
|
+
if args.workers > 1:
|
|
102
|
+
with cf.ProcessPoolExecutor(max_workers=args.workers) as ex:
|
|
103
|
+
futs = {ex.submit(_one, f, o, include_headers, args.embed_fonts): idx
|
|
104
|
+
for idx, (f, o) in enumerate(jobs, 1)}
|
|
105
|
+
for fut in cf.as_completed(futs):
|
|
106
|
+
report(fut.result(), futs[fut])
|
|
107
|
+
else:
|
|
108
|
+
for idx, (f, o) in enumerate(jobs, 1):
|
|
109
|
+
report(_one(f, o, include_headers, args.embed_fonts), idx)
|
|
110
|
+
|
|
111
|
+
print("\nDone: %d ok, %d failed, %d total in %.1fs"
|
|
112
|
+
% (ok, fail, total, time.perf_counter() - started))
|
|
113
|
+
return 1 if fail else 0
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
if __name__ == "__main__":
|
|
117
|
+
raise SystemExit(main())
|
fancydocx/color.py
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Color resolution: hex parsing, theme-color lookup, and the tint/shade
|
|
3
|
+
math Office applies to themed colors.
|
|
4
|
+
|
|
5
|
+
Word colors come in three flavors:
|
|
6
|
+
* explicit sRGB <w:color w:val="1F4E79"/>
|
|
7
|
+
* "auto" <w:color w:val="auto"/> (context default)
|
|
8
|
+
* theme reference <w:color w:themeColor="accent1" w:themeShade="BF"/>
|
|
9
|
+
|
|
10
|
+
For theme references, `themeTint`/`themeShade` are a hex fraction of 255
|
|
11
|
+
applied to the *luminance* of the resolved theme color (HSL space) -- this
|
|
12
|
+
is what Office actually does, not a naive per-channel scale, so the
|
|
13
|
+
accent-bar shades come out matching.
|
|
14
|
+
"""
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
import colorsys
|
|
17
|
+
|
|
18
|
+
# Named highlight colors (<w:highlight w:val="yellow"/>).
|
|
19
|
+
HIGHLIGHT = {
|
|
20
|
+
"black": "000000", "blue": "0000FF", "cyan": "00FFFF", "darkBlue": "00008B",
|
|
21
|
+
"darkCyan": "008B8B", "darkGray": "A9A9A9", "darkGreen": "006400",
|
|
22
|
+
"darkMagenta": "8B008B", "darkRed": "8B0000", "darkYellow": "808000",
|
|
23
|
+
"green": "00FF00", "lightGray": "D3D3D3", "magenta": "FF00FF", "red": "FF0000",
|
|
24
|
+
"white": "FFFFFF", "yellow": "FFFF00",
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
# themeColor attribute value -> clrScheme key. The <w:clrSchemeMapping> in
|
|
28
|
+
# settings.xml can remap tx1/bg1/tx2/bg2, handled in theme.py; this is the
|
|
29
|
+
# default identity mapping.
|
|
30
|
+
THEME_ALIAS = {
|
|
31
|
+
"dark1": "dk1", "light1": "lt1", "dark2": "dk2", "light2": "lt2",
|
|
32
|
+
"text1": "dk1", "background1": "lt1", "text2": "dk2", "background2": "lt2",
|
|
33
|
+
"accent1": "accent1", "accent2": "accent2", "accent3": "accent3",
|
|
34
|
+
"accent4": "accent4", "accent5": "accent5", "accent6": "accent6",
|
|
35
|
+
"hyperlink": "hlink", "followedHyperlink": "folHlink",
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def normalize_hex(val):
|
|
40
|
+
"""Return a 6-digit uppercase hex string, or None for auto/blank/invalid."""
|
|
41
|
+
if not val:
|
|
42
|
+
return None
|
|
43
|
+
v = val.strip().lstrip("#")
|
|
44
|
+
if v.lower() == "auto":
|
|
45
|
+
return None
|
|
46
|
+
if len(v) == 3: # rare shorthand
|
|
47
|
+
v = "".join(c * 2 for c in v)
|
|
48
|
+
if len(v) != 6:
|
|
49
|
+
return None
|
|
50
|
+
try:
|
|
51
|
+
int(v, 16)
|
|
52
|
+
except ValueError:
|
|
53
|
+
return None
|
|
54
|
+
return v.upper()
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def hex_to_rgb(h):
|
|
58
|
+
return tuple(int(h[i:i + 2], 16) for i in (0, 2, 4))
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def rgb_to_hex(rgb):
|
|
62
|
+
return "".join("%02X" % max(0, min(255, int(round(c)))) for c in rgb)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def apply_tint_shade(hex6, tint=None, shade=None):
|
|
66
|
+
"""
|
|
67
|
+
Apply themeTint / themeShade (hex byte, fraction of 255) to a base color,
|
|
68
|
+
operating on HSL luminance the way Office does.
|
|
69
|
+
"""
|
|
70
|
+
if not hex6:
|
|
71
|
+
return hex6
|
|
72
|
+
r, g, b = (c / 255.0 for c in hex_to_rgb(hex6))
|
|
73
|
+
h, l, s = colorsys.rgb_to_hls(r, g, b)
|
|
74
|
+
if shade is not None:
|
|
75
|
+
try:
|
|
76
|
+
f = int(shade, 16) / 255.0
|
|
77
|
+
l = l * f
|
|
78
|
+
except ValueError:
|
|
79
|
+
pass
|
|
80
|
+
if tint is not None:
|
|
81
|
+
try:
|
|
82
|
+
f = int(tint, 16) / 255.0
|
|
83
|
+
l = l * f + (1.0 - f)
|
|
84
|
+
except ValueError:
|
|
85
|
+
pass
|
|
86
|
+
l = max(0.0, min(1.0, l))
|
|
87
|
+
r, g, b = colorsys.hls_to_rgb(h, l, s)
|
|
88
|
+
return rgb_to_hex((r * 255, g * 255, b * 255))
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def color_descriptor(el):
|
|
92
|
+
"""
|
|
93
|
+
Build a color descriptor from any element carrying w:val / w:themeColor
|
|
94
|
+
(+ themeTint/themeShade). Returns None if the element is absent.
|
|
95
|
+
"""
|
|
96
|
+
if el is None:
|
|
97
|
+
return None
|
|
98
|
+
from .core import qn
|
|
99
|
+
return {
|
|
100
|
+
"val": el.get(qn("w:val")),
|
|
101
|
+
"theme": el.get(qn("w:themeColor")),
|
|
102
|
+
"tint": el.get(qn("w:themeTint")),
|
|
103
|
+
"shade": el.get(qn("w:themeShade")),
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def resolve(desc, theme, default=None):
|
|
108
|
+
"""
|
|
109
|
+
Descriptor -> '#RRGGBB' (or `default` when it resolves to auto/none).
|
|
110
|
+
|
|
111
|
+
Precedence: when Word saves a theme-referenced color it ALSO bakes the
|
|
112
|
+
resolved sRGB into w:val (e.g. w:color w:val="9A92BF"
|
|
113
|
+
w:themeColor="accent5" w:themeTint="99"). That cached value is Word's own
|
|
114
|
+
integer-HSL math -- bit-exact by definition -- so prefer it and only
|
|
115
|
+
recompute from the theme when no explicit value exists (or it is 'auto').
|
|
116
|
+
"""
|
|
117
|
+
if desc is None:
|
|
118
|
+
return default
|
|
119
|
+
hexv = normalize_hex(desc.get("val"))
|
|
120
|
+
if hexv:
|
|
121
|
+
return "#" + hexv
|
|
122
|
+
tname = desc.get("theme")
|
|
123
|
+
if tname and theme is not None:
|
|
124
|
+
base = theme.color(tname) or theme.color(THEME_ALIAS.get(tname, tname))
|
|
125
|
+
if base:
|
|
126
|
+
base = apply_tint_shade(base, desc.get("tint"), desc.get("shade"))
|
|
127
|
+
return "#" + base
|
|
128
|
+
return default
|
fancydocx/core.py
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Core helpers: OOXML namespaces, element utilities, and unit conversions.
|
|
3
|
+
|
|
4
|
+
Word stores geometry in several units. Getting these conversions exactly
|
|
5
|
+
right is the whole ballgame for 1:1 layout parity:
|
|
6
|
+
|
|
7
|
+
* twips = 1/20 point = 1/1440 inch (margins, indents, table widths)
|
|
8
|
+
* EMU = 1/914400 inch (DrawingML: image/shape geometry)
|
|
9
|
+
* half-point = 1/2 point (font sizes: <w:sz w:val="24"/> = 12pt)
|
|
10
|
+
* eighth-pt = 1/8 point (border widths)
|
|
11
|
+
* pct50 = 1/50 percent (some width/shade values)
|
|
12
|
+
|
|
13
|
+
CSS reference DPI is 96, so 1in = 96px and 1pt = 96/72 px = 4/3 px.
|
|
14
|
+
We emit lengths in px (predictable box math) and font metrics in pt
|
|
15
|
+
(matches Word's typographic intent). Both resolve to the same physical
|
|
16
|
+
scale, so mixing them is safe.
|
|
17
|
+
"""
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
# ---------------------------------------------------------------------------
|
|
21
|
+
# Namespaces
|
|
22
|
+
# ---------------------------------------------------------------------------
|
|
23
|
+
NS = {
|
|
24
|
+
"w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main",
|
|
25
|
+
"r": "http://schemas.openxmlformats.org/officeDocument/2006/relationships",
|
|
26
|
+
"a": "http://schemas.openxmlformats.org/drawingml/2006/main",
|
|
27
|
+
"wp": "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing",
|
|
28
|
+
"pic": "http://schemas.openxmlformats.org/drawingml/2006/picture",
|
|
29
|
+
"wps": "http://schemas.microsoft.com/office/word/2010/wordprocessingShape",
|
|
30
|
+
"wpg": "http://schemas.microsoft.com/office/word/2010/wordprocessingGroup",
|
|
31
|
+
"wpc": "http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas",
|
|
32
|
+
"mc": "http://schemas.openxmlformats.org/markup-compatibility/2006",
|
|
33
|
+
"v": "urn:schemas-microsoft-com:vml",
|
|
34
|
+
"o": "urn:schemas-microsoft-com:office:office",
|
|
35
|
+
"w10": "urn:schemas-microsoft-com:office:word",
|
|
36
|
+
"w14": "http://schemas.microsoft.com/office/word/2010/wordml",
|
|
37
|
+
"wp14": "http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing",
|
|
38
|
+
"rel": "http://schemas.openxmlformats.org/package/2006/relationships",
|
|
39
|
+
"ct": "http://schemas.openxmlformats.org/package/2006/content-types",
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def qn(name: str) -> str:
|
|
44
|
+
"""'w:val' -> '{http://.../main}val' (Clark notation for ElementTree)."""
|
|
45
|
+
prefix, local_name = name.split(":", 1)
|
|
46
|
+
return "{%s}%s" % (NS[prefix], local_name)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def local(tag) -> str:
|
|
50
|
+
"""Strip the namespace from a Clark-notation tag: '{ns}p' -> 'p'."""
|
|
51
|
+
if tag is None or not isinstance(tag, str):
|
|
52
|
+
return ""
|
|
53
|
+
return tag.rsplit("}", 1)[-1]
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# ---------------------------------------------------------------------------
|
|
57
|
+
# Element access helpers (all namespace-aware, all None-safe)
|
|
58
|
+
# ---------------------------------------------------------------------------
|
|
59
|
+
def get(el, name: str, default=None):
|
|
60
|
+
"""Attribute lookup by prefixed name, e.g. get(el, 'w:val')."""
|
|
61
|
+
if el is None:
|
|
62
|
+
return default
|
|
63
|
+
v = el.get(qn(name))
|
|
64
|
+
return default if v is None else v
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def find(el, path: str):
|
|
68
|
+
"""First descendant matching an ElementTree path using our NS map."""
|
|
69
|
+
if el is None:
|
|
70
|
+
return None
|
|
71
|
+
return el.find(path, NS)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def findall(el, path: str):
|
|
75
|
+
if el is None:
|
|
76
|
+
return []
|
|
77
|
+
return el.findall(path, NS)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def child(el, name: str):
|
|
81
|
+
"""First *direct* child with the given prefixed tag."""
|
|
82
|
+
if el is None:
|
|
83
|
+
return None
|
|
84
|
+
want = qn(name)
|
|
85
|
+
for c in el:
|
|
86
|
+
if c.tag == want:
|
|
87
|
+
return c
|
|
88
|
+
return None
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def children(el, name: str | None = None):
|
|
92
|
+
"""Direct children, optionally filtered by prefixed tag."""
|
|
93
|
+
if el is None:
|
|
94
|
+
return []
|
|
95
|
+
if name is None:
|
|
96
|
+
return list(el)
|
|
97
|
+
want = qn(name)
|
|
98
|
+
return [c for c in el if c.tag == want]
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def bool_attr(val, default=False):
|
|
102
|
+
"""Interpret an OOXML on/off value ('1','0','true','false','on','off')."""
|
|
103
|
+
if val is None:
|
|
104
|
+
return default
|
|
105
|
+
return str(val).lower() not in ("0", "false", "off", "no")
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def toggle(el, name: str):
|
|
109
|
+
"""
|
|
110
|
+
Toggle property such as <w:b/> or <w:b w:val="0"/>.
|
|
111
|
+
Returns True (on), False (explicitly off), or None (absent).
|
|
112
|
+
"""
|
|
113
|
+
if el is None:
|
|
114
|
+
return None
|
|
115
|
+
sub = child(el, name)
|
|
116
|
+
if sub is None:
|
|
117
|
+
return None
|
|
118
|
+
return bool_attr(sub.get(qn("w:val")), default=True)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def int_val(el, name: str, attr: str = "w:val", default=None):
|
|
122
|
+
sub = child(el, name)
|
|
123
|
+
if sub is None:
|
|
124
|
+
return default
|
|
125
|
+
raw = sub.get(qn(attr))
|
|
126
|
+
if raw is None:
|
|
127
|
+
return default
|
|
128
|
+
try:
|
|
129
|
+
return int(round(float(raw)))
|
|
130
|
+
except (TypeError, ValueError):
|
|
131
|
+
return default
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def str_val(el, name: str, attr: str = "w:val", default=None):
|
|
135
|
+
sub = child(el, name)
|
|
136
|
+
if sub is None:
|
|
137
|
+
return default
|
|
138
|
+
v = sub.get(qn(attr))
|
|
139
|
+
return default if v is None else v
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
# ---------------------------------------------------------------------------
|
|
143
|
+
# Unit conversions -> CSS
|
|
144
|
+
# ---------------------------------------------------------------------------
|
|
145
|
+
PX_PER_INCH = 96.0
|
|
146
|
+
PT_PER_INCH = 72.0
|
|
147
|
+
EMU_PER_INCH = 914400.0
|
|
148
|
+
TWIPS_PER_INCH = 1440.0
|
|
149
|
+
PT_TO_PX = PX_PER_INCH / PT_PER_INCH # 4/3
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _num(x):
|
|
153
|
+
try:
|
|
154
|
+
return float(x)
|
|
155
|
+
except (TypeError, ValueError):
|
|
156
|
+
return None
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def twips_to_px(v):
|
|
160
|
+
v = _num(v)
|
|
161
|
+
return None if v is None else v / TWIPS_PER_INCH * PX_PER_INCH # v/15
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def emu_to_px(v):
|
|
165
|
+
v = _num(v)
|
|
166
|
+
return None if v is None else v / EMU_PER_INCH * PX_PER_INCH # v/9525
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def halfpt_to_pt(v):
|
|
170
|
+
v = _num(v)
|
|
171
|
+
return None if v is None else v / 2.0
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def eighthpt_to_px(v):
|
|
175
|
+
"""Border widths are in 1/8 pt."""
|
|
176
|
+
v = _num(v)
|
|
177
|
+
return None if v is None else (v / 8.0) * PT_TO_PX
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def pt_to_px(v):
|
|
181
|
+
v = _num(v)
|
|
182
|
+
return None if v is None else v * PT_TO_PX
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def px(v, nd: int = 2):
|
|
186
|
+
"""Format a px number compactly ('12.0' -> '12', '12.500' -> '12.5')."""
|
|
187
|
+
if v is None:
|
|
188
|
+
return None
|
|
189
|
+
v = round(float(v), nd)
|
|
190
|
+
if v == int(v):
|
|
191
|
+
return "%dpx" % int(v)
|
|
192
|
+
return ("%.*f" % (nd, v)).rstrip("0").rstrip(".") + "px"
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def pt(v, nd: int = 2):
|
|
196
|
+
if v is None:
|
|
197
|
+
return None
|
|
198
|
+
v = round(float(v), nd)
|
|
199
|
+
if v == int(v):
|
|
200
|
+
return "%dpt" % int(v)
|
|
201
|
+
return ("%.*f" % (nd, v)).rstrip("0").rstrip(".") + "pt"
|