fancydocx 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,444 @@
1
+ """
2
+ Local font metrics + (opt-in) font embedding. Pure standard library.
3
+
4
+ Why this exists: Word's "single" line spacing is not 1.0x the font size and
5
+ not a universal 1.15 -- it is the font's own design line height:
6
+
7
+ single = (usWinAscent + usWinDescent) / unitsPerEm [OS/2, head]
8
+ + max(0, lineGap - ((winAsc+winDesc) - (hheaAsc - hheaDesc)))
9
+ [external leading]
10
+
11
+ (the GDI TEXTMETRIC formula Word inherits). Calibri = 1.2207, Segoe UI =
12
+ 1.3301, Times New Roman = 1.1074... If we emitted CSS `line-height:1.0` for
13
+ single spacing we would be ~20-30% too tight; `normal` would use whatever
14
+ FALLBACK font the viewer has, drifting from Word's geometry whenever the real
15
+ font is missing. So we probe the actual font file when it is installed
16
+ locally, fall back to a table of known Office/Windows fonts, then to 1.2.
17
+ Emitting the *number* keeps the layout at Word's geometry even when the
18
+ browser substitutes the family.
19
+
20
+ The same TTF/TTC name-table parser powers the opt-in local font embedding
21
+ pass (--embed-fonts): match referenced families against installed fonts and
22
+ inline them as @font-face so text renders with the exact intended metrics.
23
+ """
24
+ from __future__ import annotations
25
+ import base64
26
+ import os
27
+ import struct
28
+
29
+ # Measured (usWinAscent+usWinDescent+extLeading)/upm for fonts we cannot
30
+ # always probe. Sources: the fonts' own OS/2 tables.
31
+ KNOWN_FACTORS = {
32
+ "calibri": 1.2207, "calibri light": 1.2207,
33
+ "cambria": 1.1729,
34
+ "segoe ui": 1.3301, "segoe ui semibold": 1.3301, "segoe ui light": 1.3301,
35
+ "segoe ui semilight": 1.3301, "segoe ui black": 1.3301,
36
+ "arial": 1.1499, "arial black": 1.4102, "arial narrow": 1.1367,
37
+ "times new roman": 1.1499,
38
+ "georgia": 1.1367,
39
+ "verdana": 1.2158,
40
+ "tahoma": 1.2070,
41
+ "trebuchet ms": 1.1602,
42
+ "courier new": 1.1328,
43
+ "garamond": 1.1250,
44
+ "book antiqua": 1.1699,
45
+ # Century Gothic carries a large hhea.lineGap that Word adds as external
46
+ # leading on EVERY line (the classic "Century Gothic is double spaced"
47
+ # effect). Validated against Word's own render of a Futura->CG
48
+ # substituted document: 14pt line = 29.7px -> factor 1.594.
49
+ "century gothic": 1.5940,
50
+ "candara": 1.2207, "constantia": 1.2168, "corbel": 1.2207, "consolas": 1.1719,
51
+ "franklin gothic book": 1.1367, "franklin gothic medium": 1.1367,
52
+ "gill sans mt": 1.1621,
53
+ "rockwell": 1.1719,
54
+ "comic sans ms": 1.3945,
55
+ "impact": 1.2188,
56
+ "sitka": 1.3242,
57
+ # Aptos family (M365 default since 2023; a.k.a. Bierstadt).
58
+ # 1.2847 measured from the actual OS/2+hhea tables of the released TTFs.
59
+ "aptos": 1.2847, "aptos display": 1.2847, "aptos light": 1.2847,
60
+ "aptos semibold": 1.2847, "aptos black": 1.2847, "aptos extrabold": 1.2847,
61
+ "aptos narrow": 1.2847, "aptos serif": 1.2847, "aptos mono": 1.2847,
62
+ # NOTE: deliberately no "futura" entry -- Futura is almost never
63
+ # installed; Word substitutes Century Gothic, so line_factor() follows
64
+ # the SUBSTITUTES chain to CG's metrics, matching what Word shows.
65
+ "wingdings": 1.1000, "symbol": 1.2000, "webdings": 1.1000,
66
+ }
67
+ DEFAULT_FACTOR = 1.2
68
+
69
+ # Families Word commonly substitutes when the named font is missing. Used to
70
+ # build the CSS fallback chain so a font-less viewer degrades the way Word
71
+ # would, rather than to the browser's default sans.
72
+ SUBSTITUTES = {
73
+ "aptos": "Calibri", "aptos display": "Calibri Light", "aptos light": "Calibri Light",
74
+ "aptos serif": "Cambria", "aptos mono": "Consolas",
75
+ "futura": "Century Gothic", "futura md bt": "Century Gothic",
76
+ "futura bk bt": "Century Gothic",
77
+ "helvetica": "Arial", "helvetica neue": "Arial",
78
+ "avenir": "Segoe UI", "avenir next": "Segoe UI",
79
+ "gotham": "Montserrat", "proxima nova": "Segoe UI",
80
+ "myriad pro": "Segoe UI", "minion pro": "Cambria",
81
+ }
82
+
83
+ _WEIGHT_WORDS = (
84
+ ("thin", 100), ("hairline", 100), ("extralight", 200), ("ultralight", 200),
85
+ ("semilight", 350), ("light", 300), ("medium", 500), ("demibold", 600),
86
+ ("semibold", 600), ("extrabold", 800), ("ultrabold", 800), ("heavy", 900),
87
+ ("black", 900), ("bold", 700),
88
+ )
89
+
90
+
91
+ class FontFace:
92
+ __slots__ = ("path", "index", "family", "subfamily", "weight", "italic",
93
+ "factor", "full_name")
94
+
95
+ def __init__(self, path, index, family, subfamily, weight, italic, factor, full_name):
96
+ self.path = path
97
+ self.index = index # face index inside a .ttc, else None
98
+ self.family = family
99
+ self.subfamily = subfamily or ""
100
+ self.weight = weight
101
+ self.italic = italic
102
+ self.factor = factor
103
+ self.full_name = full_name or family
104
+
105
+
106
+ # ---------------------------------------------------------------------------
107
+ # sfnt parsing (TTF/OTF/TTC) - just the head/hhea/OS2/name tables.
108
+ # ---------------------------------------------------------------------------
109
+ def _u16(b, o):
110
+ return struct.unpack_from(">H", b, o)[0]
111
+
112
+
113
+ def _s16(b, o):
114
+ return struct.unpack_from(">h", b, o)[0]
115
+
116
+
117
+ def _u32(b, o):
118
+ return struct.unpack_from(">I", b, o)[0]
119
+
120
+
121
+ def _parse_name_table(data, off):
122
+ """Return {nameID: best string} preferring Windows/en records."""
123
+ try:
124
+ count = _u16(data, off + 2)
125
+ str_off = off + _u16(data, off + 4)
126
+ out = {}
127
+ score = {}
128
+ for i in range(count):
129
+ rec = off + 6 + i * 12
130
+ plat = _u16(data, rec)
131
+ enc = _u16(data, rec + 2)
132
+ lang = _u16(data, rec + 4)
133
+ nid = _u16(data, rec + 6)
134
+ ln = _u16(data, rec + 8)
135
+ so = _u16(data, rec + 10)
136
+ if nid not in (1, 2, 4, 16, 17):
137
+ continue
138
+ raw = data[str_off + so: str_off + so + ln]
139
+ if plat == 3: # Windows, UTF-16BE
140
+ try:
141
+ s = raw.decode("utf-16-be")
142
+ except UnicodeDecodeError:
143
+ continue
144
+ sc = 3 if (lang & 0xFF) == 0x09 else 2 # prefer English
145
+ elif plat == 1: # Mac Roman
146
+ s = raw.decode("mac_roman", "replace")
147
+ sc = 1
148
+ else:
149
+ continue
150
+ if sc >= score.get(nid, 0):
151
+ score[nid] = sc
152
+ out[nid] = s.strip("\x00").strip()
153
+ return out
154
+ except (struct.error, IndexError):
155
+ return {}
156
+
157
+
158
+ def _parse_sfnt(data, base=0):
159
+ """Parse one font (at offset `base` for TTC members). Returns dict or None."""
160
+ try:
161
+ numtables = _u16(data, base + 4)
162
+ tables = {}
163
+ for i in range(numtables):
164
+ rec = base + 12 + 16 * i
165
+ tag = data[rec:rec + 4].decode("latin-1")
166
+ tables[tag] = (_u32(data, rec + 8), _u32(data, rec + 12))
167
+ if "head" not in tables or "name" not in tables:
168
+ return None
169
+ head_off = tables["head"][0]
170
+ upm = _u16(data, head_off + 18)
171
+ mac_style = _u16(data, head_off + 44)
172
+ if not upm:
173
+ return None
174
+ win_asc = win_desc = None
175
+ weight = 400
176
+ if "OS/2" in tables:
177
+ os2 = tables["OS/2"][0]
178
+ weight = _u16(data, os2 + 4) or 400
179
+ win_asc = _u16(data, os2 + 74)
180
+ win_desc = _u16(data, os2 + 76)
181
+ hhea_asc = hhea_desc = line_gap = 0
182
+ if "hhea" in tables:
183
+ hh = tables["hhea"][0]
184
+ hhea_asc = _s16(data, hh + 4)
185
+ hhea_desc = _s16(data, hh + 6)
186
+ line_gap = _s16(data, hh + 8)
187
+ if win_asc is None:
188
+ win_asc, win_desc = hhea_asc, -hhea_desc
189
+ ext = max(0, line_gap - ((win_asc + win_desc) - (hhea_asc - hhea_desc)))
190
+ factor = (win_asc + win_desc + ext) / float(upm)
191
+ names = _parse_name_table(data, tables["name"][0])
192
+ family = names.get(16) or names.get(1)
193
+ sub = names.get(17) or names.get(2) or ""
194
+ if not family:
195
+ return None
196
+ italic = bool(mac_style & 2) or "italic" in sub.lower() or "oblique" in sub.lower()
197
+ subl = sub.lower().replace(" ", "")
198
+ for word, wval in _WEIGHT_WORDS:
199
+ if word in subl:
200
+ weight = wval
201
+ break
202
+ return {"family": family, "sub": sub, "weight": weight, "italic": italic,
203
+ "factor": factor, "full": names.get(4)}
204
+ except (struct.error, IndexError, UnicodeDecodeError):
205
+ return None
206
+
207
+
208
+ def parse_font_file(path):
209
+ """Yield FontFace for each face in a TTF/OTF/TTC file (header-only read)."""
210
+ try:
211
+ with open(path, "rb") as f:
212
+ data = f.read()
213
+ except OSError:
214
+ return
215
+ if len(data) < 12:
216
+ return
217
+ tag = data[:4]
218
+ if tag == b"ttcf":
219
+ try:
220
+ n = _u32(data, 8)
221
+ offsets = [_u32(data, 12 + 4 * i) for i in range(min(n, 64))]
222
+ except struct.error:
223
+ return
224
+ for idx, off in enumerate(offsets):
225
+ info = _parse_sfnt(data, off)
226
+ if info:
227
+ yield FontFace(path, idx, info["family"], info["sub"], info["weight"],
228
+ info["italic"], info["factor"], info["full"])
229
+ elif tag in (b"\x00\x01\x00\x00", b"OTTO", b"true"):
230
+ info = _parse_sfnt(data, 0)
231
+ if info:
232
+ yield FontFace(path, None, info["family"], info["sub"], info["weight"],
233
+ info["italic"], info["factor"], info["full"])
234
+
235
+
236
+ # ---------------------------------------------------------------------------
237
+ # System font registry (lazy singleton)
238
+ # ---------------------------------------------------------------------------
239
+ _FONT_DIRS = None
240
+ _REGISTRY = None # lowercase family -> [FontFace]
241
+
242
+
243
+ def _font_dirs():
244
+ global _FONT_DIRS
245
+ if _FONT_DIRS is None:
246
+ dirs = []
247
+ # Extra dirs (testing / portable fonts), highest priority.
248
+ extra = os.environ.get("DOCX2HTML_FONT_DIRS")
249
+ if extra:
250
+ dirs.extend(p for p in extra.split(os.pathsep) if p)
251
+ windir = os.environ.get("WINDIR", r"C:\Windows")
252
+ dirs.append(os.path.join(windir, "Fonts"))
253
+ lad = os.environ.get("LOCALAPPDATA")
254
+ if lad:
255
+ dirs.append(os.path.join(lad, "Microsoft", "Windows", "Fonts"))
256
+ _FONT_DIRS = [d for d in dirs if os.path.isdir(d)]
257
+ return _FONT_DIRS
258
+
259
+
260
+ def _registry():
261
+ """Scan system font files once per process; index faces by family name."""
262
+ global _REGISTRY
263
+ if _REGISTRY is not None:
264
+ return _REGISTRY
265
+ reg = {}
266
+ for d in _font_dirs():
267
+ try:
268
+ entries = os.listdir(d)
269
+ except OSError:
270
+ continue
271
+ for fn in entries:
272
+ if not fn.lower().endswith((".ttf", ".otf", ".ttc")):
273
+ continue
274
+ for face in parse_font_file(os.path.join(d, fn)):
275
+ reg.setdefault(face.family.lower(), []).append(face)
276
+ # Also index "Family Subfamily" for styled families that CSS
277
+ # can reference directly (e.g. 'Segoe UI Semibold').
278
+ if face.subfamily and face.subfamily.lower() not in ("regular", "normal",
279
+ "book", "roman"):
280
+ combo = ("%s %s" % (face.family, face.subfamily)).lower()
281
+ reg.setdefault(combo, []).append(face)
282
+ _REGISTRY = reg
283
+ return reg
284
+
285
+
286
+ def find_faces(family):
287
+ """All installed faces for a family name (case-insensitive)."""
288
+ if not family:
289
+ return []
290
+ return _registry().get(family.strip().lower(), [])
291
+
292
+
293
+ _factor_cache = {}
294
+
295
+
296
+ def _probe_factor(key):
297
+ faces = find_faces(key)
298
+ if faces:
299
+ regular = min(faces, key=lambda f: (f.italic, abs(f.weight - 400)))
300
+ return regular.factor
301
+ return None
302
+
303
+
304
+ def line_factor(family):
305
+ """
306
+ Word-single-spacing multiplier for a font family (1 line == factor em).
307
+ Resolution order mirrors what Word would actually render with:
308
+ 1. the font's real metrics if installed locally,
309
+ 2. a table of known Office/Windows fonts,
310
+ 3. the metrics of Word's SUBSTITUTE for a missing font (chained),
311
+ 4. the base family with style suffixes stripped,
312
+ 5. 1.2.
313
+ """
314
+ if not family:
315
+ return DEFAULT_FACTOR
316
+ key = family.strip().lower()
317
+ if key in _factor_cache:
318
+ return _factor_cache[key]
319
+ val = _probe_factor(key)
320
+ if val is None:
321
+ val = KNOWN_FACTORS.get(key)
322
+ if val is None:
323
+ # Word substitution chain (e.g. Futura -> Century Gothic).
324
+ seen = {key}
325
+ sub = SUBSTITUTES.get(key)
326
+ while val is None and sub and sub.lower() not in seen:
327
+ skey = sub.lower()
328
+ seen.add(skey)
329
+ val = _probe_factor(skey)
330
+ if val is None:
331
+ val = KNOWN_FACTORS.get(skey)
332
+ sub = SUBSTITUTES.get(skey)
333
+ if val is None:
334
+ # strip style suffixes: 'Aptos Display' -> 'aptos'
335
+ base = key
336
+ for suffix in (" display", " light", " semilight", " semibold", " medium",
337
+ " black", " extrabold", " condensed", " narrow"):
338
+ if base.endswith(suffix):
339
+ base = base[: -len(suffix)]
340
+ break
341
+ if base != key:
342
+ val = KNOWN_FACTORS.get(base) or _probe_factor(base)
343
+ if val is None:
344
+ val = DEFAULT_FACTOR
345
+ val = round(val, 4)
346
+ _factor_cache[key] = val
347
+ return val
348
+
349
+
350
+ def substitute(family):
351
+ """Word-style substitution target for a missing family (or None)."""
352
+ if not family:
353
+ return None
354
+ return SUBSTITUTES.get(family.strip().lower())
355
+
356
+
357
+ # ---------------------------------------------------------------------------
358
+ # Opt-in embedding: referenced families -> @font-face CSS from local files
359
+ # ---------------------------------------------------------------------------
360
+ def _extract_face_bytes(face):
361
+ """Font program bytes for one face. TTC members are re-assembled into a
362
+ standalone sfnt so browsers can load them."""
363
+ with open(face.path, "rb") as f:
364
+ data = f.read()
365
+ if face.index is None:
366
+ return data
367
+ # Rebuild a single-font sfnt from the TTC member's table directory.
368
+ base = _u32(data, 12 + 4 * face.index)
369
+ numtables = _u16(data, base + 4)
370
+ records = []
371
+ for i in range(numtables):
372
+ rec = base + 12 + 16 * i
373
+ tag = data[rec:rec + 4]
374
+ checksum = _u32(data, rec + 4)
375
+ off = _u32(data, rec + 8)
376
+ ln = _u32(data, rec + 12)
377
+ records.append((tag, checksum, off, ln))
378
+ header = data[base:base + 12]
379
+ out_tables = []
380
+ running = 12 + 16 * numtables
381
+ directory = b""
382
+ for tag, checksum, off, ln in records:
383
+ blob = data[off:off + ln]
384
+ pad = (-len(blob)) % 4
385
+ directory += tag + struct.pack(">III", checksum, running, ln)
386
+ out_tables.append(blob + b"\x00" * pad)
387
+ running += ln + pad
388
+ return header + directory + b"".join(out_tables)
389
+
390
+
391
+ def _mime_for(data):
392
+ head = data[:4]
393
+ if head == b"OTTO":
394
+ return "font/otf", "opentype"
395
+ if head == b"wOFF":
396
+ return "font/woff", "woff"
397
+ if head == b"wOF2":
398
+ return "font/woff2", "woff2"
399
+ return "font/ttf", "truetype"
400
+
401
+
402
+ def embed_css_for_families(families, already_embedded=(), max_bytes_per_face=6_000_000):
403
+ """
404
+ @font-face rules for every referenced family found on this machine.
405
+ `already_embedded` families (e.g. fonts recovered from the docx itself)
406
+ are skipped. Whole-file embedding: correct but heavy -- opt-in by design.
407
+ """
408
+ skip = {f.strip().lower() for f in already_embedded if f}
409
+ rules = []
410
+ seen_keys = set()
411
+ for fam in sorted({(f or "").strip() for f in families if f}):
412
+ low = fam.lower()
413
+ if not low or low in skip:
414
+ continue
415
+ faces = find_faces(low)
416
+ if not faces:
417
+ continue
418
+ # Regular / bold / italic / bold-italic at most, dedup by (w, i).
419
+ picked = {}
420
+ for face in faces:
421
+ wkey = 700 if face.weight >= 600 else (face.weight if face.weight != 400 else 400)
422
+ k = (wkey, face.italic)
423
+ cur = picked.get(k)
424
+ if cur is None or abs(face.weight - wkey) < abs(cur.weight - wkey):
425
+ picked[k] = face
426
+ for (w, italic), face in sorted(picked.items()):
427
+ dedup = (low, w, italic)
428
+ if dedup in seen_keys:
429
+ continue
430
+ seen_keys.add(dedup)
431
+ try:
432
+ blob = _extract_face_bytes(face)
433
+ except (OSError, struct.error):
434
+ continue
435
+ if len(blob) > max_bytes_per_face:
436
+ continue
437
+ mime, fmt = _mime_for(blob)
438
+ b64 = base64.b64encode(blob).decode("ascii")
439
+ rules.append(
440
+ "@font-face{font-family:'%s';font-weight:%d;font-style:%s;"
441
+ "src:url(data:%s;base64,%s) format('%s');font-display:block;}"
442
+ % (fam.replace("'", ""), w, "italic" if italic else "normal",
443
+ mime, b64, fmt))
444
+ return "\n".join(rules)
fancydocx/numbering.py ADDED
@@ -0,0 +1,167 @@
1
+ """
2
+ List numbering. Resolves <w:numPr> (numId + ilvl) to a concrete marker:
3
+ the bullet glyph or the formatted ordinal, plus the level's own indent and
4
+ run properties (bullet font/size/color).
5
+
6
+ We render markers ourselves rather than emitting <ul>/<ol> so the glyph,
7
+ color, font and hanging indent match Word exactly.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ from .core import NS, qn, child, children, find, str_val, int_val
12
+ from .styles import parse_ppr, parse_rpr
13
+
14
+ # Common Symbol/Wingdings private-use bullet code points -> Unicode.
15
+ _BULLET_MAP = {
16
+ "": "•", # Symbol bullet -> •
17
+ "·": "•", # middle dot -> •
18
+ "": "▪", # Wingdings sq bullet -> ▪
19
+ "": "▪",
20
+ "": "◦", # -> ◦
21
+ "o": "◦", # courier 'o' sub-bullet -> ◦
22
+ "": "➢", # -> ➢
23
+ "": "✔", # Wingdings check -> ✔
24
+ "": "▪",
25
+ "–": "–", # en dash bullet
26
+ "-": "-",
27
+ "": "❖",
28
+ "": "⇨",
29
+ }
30
+
31
+
32
+ def _to_roman(n, upper=False):
33
+ if n <= 0:
34
+ return str(n)
35
+ vals = [(1000, "m"), (900, "cm"), (500, "d"), (400, "cd"), (100, "c"),
36
+ (90, "xc"), (50, "l"), (40, "xl"), (10, "x"), (9, "ix"),
37
+ (5, "v"), (4, "iv"), (1, "i")]
38
+ out = []
39
+ for v, s in vals:
40
+ while n >= v:
41
+ out.append(s)
42
+ n -= v
43
+ r = "".join(out)
44
+ return r.upper() if upper else r
45
+
46
+
47
+ def _to_letter(n, upper=False):
48
+ # 1->a, 26->z, 27->aa (spreadsheet style)
49
+ if n <= 0:
50
+ return str(n)
51
+ s = ""
52
+ while n > 0:
53
+ n, rem = divmod(n - 1, 26)
54
+ s = chr(ord("a") + rem) + s
55
+ return s.upper() if upper else s
56
+
57
+
58
+ def format_number(n, fmt):
59
+ if fmt == "decimal":
60
+ return str(n)
61
+ if fmt == "decimalZero":
62
+ return "%02d" % n
63
+ if fmt == "lowerLetter":
64
+ return _to_letter(n, upper=False)
65
+ if fmt == "upperLetter":
66
+ return _to_letter(n, upper=True)
67
+ if fmt == "lowerRoman":
68
+ return _to_roman(n, upper=False)
69
+ if fmt == "upperRoman":
70
+ return _to_roman(n, upper=True)
71
+ if fmt in ("none",):
72
+ return ""
73
+ return str(n)
74
+
75
+
76
+ def bullet_glyph(lvl_text):
77
+ if not lvl_text:
78
+ return "•"
79
+ ch = lvl_text[0]
80
+ if ch in _BULLET_MAP:
81
+ return _BULLET_MAP[ch]
82
+ if ch.isprintable() and ord(ch) < 0xF000:
83
+ return ch
84
+ return "•"
85
+
86
+
87
+ class Numbering:
88
+ def __init__(self, pkg, theme):
89
+ self.theme = theme
90
+ self.abstract = {} # abstractNumId -> {ilvl: leveldict}
91
+ self.nums = {} # numId -> {'abstract':id, 'overrides':{ilvl:start}}
92
+ self._load(pkg)
93
+
94
+ def _load(self, pkg):
95
+ root = pkg.xml("word/numbering.xml")
96
+ if root is None:
97
+ return
98
+ for ab in children(root, "w:abstractNum"):
99
+ aid = ab.get(qn("w:abstractNumId"))
100
+ levels = {}
101
+ for lvl in children(ab, "w:lvl"):
102
+ try:
103
+ il = int(lvl.get(qn("w:ilvl")))
104
+ except (TypeError, ValueError):
105
+ continue
106
+ levels[il] = {
107
+ "numFmt": str_val(lvl, "w:numFmt", default="decimal"),
108
+ "lvlText": str_val(lvl, "w:lvlText", default=""),
109
+ "start": int_val(lvl, "w:start", default=1),
110
+ "suff": str_val(lvl, "w:suff", default="tab"),
111
+ "ppr": parse_ppr(child(lvl, "w:pPr"), self.theme),
112
+ "rpr": parse_rpr(child(lvl, "w:rPr"), self.theme),
113
+ }
114
+ self.abstract[aid] = levels
115
+ for num in children(root, "w:num"):
116
+ nid = num.get(qn("w:numId"))
117
+ ab = child(num, "w:abstractNumId")
118
+ aid = ab.get(qn("w:val")) if ab is not None else None
119
+ overrides = {}
120
+ for ovr in children(num, "w:lvlOverride"):
121
+ il = ovr.get(qn("w:ilvl"))
122
+ so = child(ovr, "w:startOverride")
123
+ if il is not None and so is not None:
124
+ try:
125
+ overrides[int(il)] = int(so.get(qn("w:val")))
126
+ except (TypeError, ValueError):
127
+ pass
128
+ self.nums[nid] = {"abstract": aid, "overrides": overrides}
129
+
130
+ def level(self, num_id, ilvl):
131
+ info = self.nums.get(num_id)
132
+ if not info:
133
+ return None
134
+ levels = self.abstract.get(info["abstract"])
135
+ if not levels:
136
+ return None
137
+ return levels.get(ilvl)
138
+
139
+ def start_value(self, num_id, ilvl):
140
+ info = self.nums.get(num_id)
141
+ if info and ilvl in info["overrides"]:
142
+ return info["overrides"][ilvl]
143
+ lvl = self.level(num_id, ilvl)
144
+ return lvl["start"] if lvl else 1
145
+
146
+ def marker(self, num_id, ilvl, count_of):
147
+ """
148
+ Build marker text for the level. `count_of(ilvl)` returns the current
149
+ 1-based counter for a level (used to expand %1..%9 in lvlText).
150
+ Returns (text, rpr_dict) or None.
151
+ """
152
+ lvl = self.level(num_id, ilvl)
153
+ if lvl is None:
154
+ return None
155
+ fmt = lvl["numFmt"]
156
+ if fmt == "bullet":
157
+ return bullet_glyph(lvl["lvlText"]), lvl["rpr"]
158
+ text = lvl["lvlText"] or ""
159
+ # Expand %1..%9 placeholders using each referenced level's own format.
160
+ for n in range(1, 10):
161
+ token = "%%%d" % n
162
+ if token in text:
163
+ ref = n - 1
164
+ ref_lvl = self.level(num_id, ref)
165
+ ref_fmt = ref_lvl["numFmt"] if ref_lvl else "decimal"
166
+ text = text.replace(token, format_number(count_of(ref), ref_fmt))
167
+ return text, lvl["rpr"]