mf2dom 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mf2dom/renderer.py ADDED
@@ -0,0 +1,601 @@
1
+ """Microformats2 renderer.
2
+
3
+ Renders mf2 JSON back into semantic HTML in a deterministic way such that:
4
+ HTML1 -> JSON -> HTML2 -> JSON -> HTML2
5
+
6
+ The output uses semantic HTML5 elements that render beautifully with
7
+ classless CSS frameworks like PicoCSS.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from collections.abc import Mapping, Sequence
13
+ from typing import TYPE_CHECKING, TypeGuard, cast
14
+
15
+ from justhtml import JustHTML
16
+ from justhtml.node import SimpleDomNode
17
+
18
+ from .types import Mf2Document, Mf2Item
19
+
20
+ if TYPE_CHECKING: # pragma: no cover
21
+ from .types import EValue, RelUrl, UrlObject
22
+
23
+
24
+ def _el(tag: str, attrs: dict[str, str | None] | None = None) -> SimpleDomNode:
25
+ """Create an element with optional attributes."""
26
+ node = SimpleDomNode(tag)
27
+ node.attrs = attrs if attrs is not None else {}
28
+ return node
29
+
30
+
31
+ def _text(data: str) -> SimpleDomNode:
32
+ """Create a text node."""
33
+ node = SimpleDomNode("#text")
34
+ node.data = data
35
+ return node
36
+
37
+
38
+ def _parse_html_fragment(html: str) -> list[SimpleDomNode]:
39
+ """Parse HTML and return the body's children (cloned)."""
40
+ doc = JustHTML(f"<body>{html}</body>")
41
+ html_el = doc.root.children[0] # #document > html # type: ignore[index]
42
+ body = html_el.children[1] # html > body # type: ignore[union-attr]
43
+ return [child.clone_node(deep=True) for child in body.children]
44
+
45
+
46
+ # Semantic element mapping for h-* root types
47
+ _SEMANTIC_ROOT_ELEMENTS: dict[str, str] = {
48
+ "h-entry": "article",
49
+ "h-feed": "section",
50
+ "h-event": "article",
51
+ "h-product": "article",
52
+ "h-recipe": "article",
53
+ "h-review": "article",
54
+ "h-resume": "article",
55
+ "h-adr": "address",
56
+ "h-cite": "blockquote",
57
+ "h-geo": "data",
58
+ }
59
+
60
+ # Semantic element mapping for properties
61
+ _SEMANTIC_PROPERTY_ELEMENTS: dict[str, str] = {
62
+ # Address components use address element
63
+ "p-adr": "address",
64
+ "p-street-address": "span",
65
+ "p-extended-address": "span",
66
+ "p-locality": "span",
67
+ "p-region": "span",
68
+ "p-postal-code": "span",
69
+ "p-country-name": "span",
70
+ # Name properties use strong for emphasis
71
+ "p-name": "strong",
72
+ # Paragraph-like properties
73
+ "p-summary": "p",
74
+ "p-note": "p",
75
+ "p-content": "p",
76
+ "p-description": "p",
77
+ # Author info
78
+ "p-author": "span",
79
+ }
80
+
81
+ # Properties that are typically URLs (should render as <a>)
82
+ _URL_PROPERTIES: frozenset[str] = frozenset(
83
+ {
84
+ "url",
85
+ "uid",
86
+ "photo",
87
+ "logo",
88
+ "video",
89
+ "audio",
90
+ "syndication",
91
+ "in-reply-to",
92
+ "like-of",
93
+ "repost-of",
94
+ "bookmark-of",
95
+ "tag-of",
96
+ "location",
97
+ }
98
+ )
99
+
100
+ # Properties that are emails (should render as <a href="mailto:">)
101
+ _EMAIL_PROPERTIES: frozenset[str] = frozenset({"email"})
102
+
103
+ # Properties that are telephone numbers (should render as <a href="tel:">)
104
+ _TEL_PROPERTIES: frozenset[str] = frozenset({"tel"})
105
+
106
+ # Properties that are typically datetimes (should render as <time>)
107
+ _DATETIME_PROPERTIES: frozenset[str] = frozenset(
108
+ {
109
+ "published",
110
+ "updated",
111
+ "start",
112
+ "end",
113
+ "duration",
114
+ "bday",
115
+ "anniversary",
116
+ "rev",
117
+ }
118
+ )
119
+
120
+ # Semantic property ordering based on microformats.org wiki
121
+ # Properties are grouped by semantic meaning for good display across types:
122
+ # 1. Visual identity (photo, logo)
123
+ # 2. Name/identity
124
+ # 3. Author (for h-entry)
125
+ # 4. Description/content
126
+ # 5. Dates (important for h-entry, h-event)
127
+ # 6. Location (for h-event, h-card)
128
+ # 7. URLs and links
129
+ # 8. Contact info (email, tel)
130
+ # 9. Address details
131
+ # 10. Organization/role
132
+ # 11. Categories and other metadata
133
+ _PROPERTY_ORDER: list[str] = [
134
+ # Visual identity first
135
+ "photo",
136
+ "logo",
137
+ "featured",
138
+ # Name properties
139
+ "name",
140
+ "honorific-prefix",
141
+ "given-name",
142
+ "additional-name",
143
+ "family-name",
144
+ "sort-string",
145
+ "honorific-suffix",
146
+ "nickname",
147
+ "ipa",
148
+ # Author (important for h-entry)
149
+ "author",
150
+ # Description/content
151
+ "summary",
152
+ "note",
153
+ "content",
154
+ "description",
155
+ # Dates (prominent for h-entry, h-event)
156
+ "published",
157
+ "updated",
158
+ "start",
159
+ "end",
160
+ "duration",
161
+ "bday",
162
+ "anniversary",
163
+ "rev",
164
+ # Location (for h-event)
165
+ "location",
166
+ # URLs and links
167
+ "url",
168
+ "uid",
169
+ "syndication",
170
+ "in-reply-to",
171
+ "like-of",
172
+ "repost-of",
173
+ "bookmark-of",
174
+ # Contact info
175
+ "email",
176
+ "tel",
177
+ "impp",
178
+ # Address details
179
+ "adr",
180
+ "geo",
181
+ "latitude",
182
+ "longitude",
183
+ "altitude",
184
+ "street-address",
185
+ "extended-address",
186
+ "locality",
187
+ "region",
188
+ "postal-code",
189
+ "country-name",
190
+ "label",
191
+ # Organization/role
192
+ "org",
193
+ "job-title",
194
+ "role",
195
+ # Categories and metadata
196
+ "category",
197
+ "rsvp",
198
+ "attendee",
199
+ "key",
200
+ "sex",
201
+ "gender-identity",
202
+ ]
203
+
204
+
205
+ def _property_sort_key(prop: str) -> tuple[int, str]:
206
+ """Return a sort key for property ordering."""
207
+ try:
208
+ return (_PROPERTY_ORDER.index(prop), prop)
209
+ except ValueError:
210
+ return (len(_PROPERTY_ORDER), prop)
211
+
212
+
213
+ def _get_render_category(prop: str, value: str) -> str:
214
+ """Return a category string for grouping properties with the same value.
215
+
216
+ Properties with the same value and category can be merged into a single element.
217
+ """
218
+ if prop in _URL_PROPERTIES and value.startswith(("http://", "https://", "/")):
219
+ return "url"
220
+ if prop in _EMAIL_PROPERTIES:
221
+ return "email"
222
+ if prop in _TEL_PROPERTIES:
223
+ return "tel"
224
+ if prop in _DATETIME_PROPERTIES:
225
+ return "datetime"
226
+ return f"text:{prop}" # Different text properties use different elements
227
+
228
+
229
+ def _get_semantic_element(types: Sequence[str]) -> str:
230
+ """Determine the semantic HTML element based on microformat types."""
231
+ for t in types:
232
+ if t in _SEMANTIC_ROOT_ELEMENTS:
233
+ return _SEMANTIC_ROOT_ELEMENTS[t]
234
+ return "div"
235
+
236
+
237
+ def _get_property_element(prop: str, prefix: str) -> str:
238
+ """Determine the semantic HTML element for a property."""
239
+ full_prop = f"{prefix}-{prop}"
240
+ return _SEMANTIC_PROPERTY_ELEMENTS.get(full_prop, "span")
241
+
242
+
243
+ def _is_mf2_item(value: object) -> TypeGuard[Mf2Item]:
244
+ return isinstance(value, dict) and "type" in value and "properties" in value
245
+
246
+
247
+ def _class_value(classes: Sequence[str]) -> str | None:
248
+ """Return class attribute value or None if empty."""
249
+ cls = " ".join(c for c in classes if c)
250
+ return cls if cls else None
251
+
252
+
253
+ def _value_vcp_node(value: object) -> SimpleDomNode | None:
254
+ """Create a VCP data node for the value."""
255
+ if value is None:
256
+ return None
257
+ if isinstance(value, dict) and "value" in value:
258
+ value = value["value"] # type: ignore[literal-required]
259
+ if not isinstance(value, str):
260
+ value = str(value)
261
+ return _el("data", {"class": "value", "value": value})
262
+
263
+
264
+ def _get_rels(url: str, rel_urls: dict[str, RelUrl] | None) -> str | None:
265
+ """Get rel attribute value if URL has associated rels."""
266
+ if not rel_urls or url not in rel_urls:
267
+ return None
268
+ rels = rel_urls[url].get("rels", [])
269
+ if not rels:
270
+ return None
271
+ return " ".join(rels)
272
+
273
+
274
+ def _render_text_property(
275
+ props: Sequence[str],
276
+ value: str,
277
+ rel_urls: dict[str, RelUrl] | None = None,
278
+ ) -> SimpleDomNode:
279
+ """Render one or more properties with the same value as a single element."""
280
+ # Use the first property to determine rendering style (all should be same category)
281
+ prop = props[0]
282
+ # Photo/logo should render as <img>, not <a>
283
+ if prop in {"photo", "logo"}:
284
+ cls = _class_value([f"u-{p}" for p in props])
285
+ return _el("img", {"class": cls, "src": value})
286
+ # Video should render as <video>, not <a>
287
+ if prop == "video":
288
+ cls = _class_value([f"u-{p}" for p in props])
289
+ return _el("video", {"class": cls, "src": value})
290
+ # Audio should render as <audio>, not <a>
291
+ if prop == "audio":
292
+ cls = _class_value([f"u-{p}" for p in props])
293
+ return _el("audio", {"class": cls, "src": value})
294
+ # Use semantic elements based on property type
295
+ if prop in _URL_PROPERTIES and value.startswith(("http://", "https://", "/")):
296
+ cls = _class_value([f"u-{p}" for p in props])
297
+ rel = _get_rels(value, rel_urls)
298
+ attrs: dict[str, str | None] = {"class": cls, "href": value}
299
+ if rel:
300
+ attrs["rel"] = rel
301
+ el = _el("a", attrs)
302
+ el.append_child(_text(value))
303
+ return el
304
+ if prop in _EMAIL_PROPERTIES:
305
+ cls = _class_value([f"u-{p}" for p in props])
306
+ href = value if value.startswith("mailto:") else f"mailto:{value}"
307
+ text = value.removeprefix("mailto:")
308
+ el = _el("a", {"class": cls, "href": href})
309
+ el.append_child(_text(text))
310
+ return el
311
+ if prop in _TEL_PROPERTIES:
312
+ cls = _class_value([f"p-{p}" for p in props])
313
+ href = value if value.startswith("tel:") else f"tel:{value}"
314
+ el = _el("a", {"class": cls, "href": href})
315
+ el.append_child(_text(value))
316
+ return el
317
+ if prop in _DATETIME_PROPERTIES:
318
+ cls = _class_value([f"dt-{p}" for p in props])
319
+ el = _el("time", {"class": cls, "datetime": value})
320
+ el.append_child(_text(value))
321
+ return el
322
+ tag = _get_property_element(prop, "p")
323
+ cls = _class_value([f"p-{p}" for p in props])
324
+ el = _el(tag, {"class": cls})
325
+ el.append_child(_text(value))
326
+ return el
327
+
328
+
329
+ def _render_string_property(
330
+ prefix: str,
331
+ props: Sequence[str],
332
+ value: str,
333
+ rel_urls: dict[str, RelUrl] | None = None,
334
+ ) -> SimpleDomNode:
335
+ """Render one or more properties with the same value as a single element."""
336
+ if prefix == "dt":
337
+ cls = _class_value([f"dt-{p}" for p in props])
338
+ el = _el("time", {"class": cls})
339
+ el.append_child(_text(value))
340
+ return el
341
+ if prefix == "u":
342
+ rel = _get_rels(value, rel_urls)
343
+ cls = _class_value([f"u-{p}" for p in props])
344
+ attrs: dict[str, str | None] = {"class": cls, "href": value}
345
+ if rel:
346
+ attrs["rel"] = rel
347
+ return _el("a", attrs)
348
+ if prefix == "e":
349
+ cls = _class_value([f"e-{p}" for p in props])
350
+ el = _el("div", {"class": cls})
351
+ el.append_child(_text(value))
352
+ return el
353
+ return _render_text_property(props, value, rel_urls)
354
+
355
+
356
+ def _render_e_property(prop: str, value: EValue) -> SimpleDomNode:
357
+ """Render an e-* property with HTML content."""
358
+ html = value.get("html")
359
+ cls = _class_value([f"e-{prop}"])
360
+ el = _el("div", {"class": cls})
361
+ if isinstance(html, str):
362
+ # Parse the HTML and append the children
363
+ for child in _parse_html_fragment(html):
364
+ el.append_child(child)
365
+ else:
366
+ el.append_child(_text(str(value.get("value", ""))))
367
+ return el
368
+
369
+
370
+ def _render_u_object_property(prop: str, value: UrlObject) -> SimpleDomNode:
371
+ """Render a u-* property with object value (img with alt/srcset)."""
372
+ url = value.get("value", "")
373
+ alt = value.get("alt")
374
+ cls = _class_value([f"u-{prop}"])
375
+ attrs: dict[str, str | None] = {"class": cls, "src": url}
376
+ if alt is not None:
377
+ attrs["alt"] = str(alt)
378
+ srcset = value.get("srcset")
379
+ if isinstance(srcset, dict) and srcset:
380
+ # Stable ordering by key.
381
+ parts = [f"{src} {key}" for key, src in sorted(srcset.items())]
382
+ attrs["srcset"] = ", ".join(parts)
383
+ return _el("img", attrs)
384
+
385
+
386
+ def _render_ruby_name_ipa(name: str, ipa: str) -> SimpleDomNode:
387
+ """Render name and ipa as a ruby annotation element."""
388
+ ruby = _el("ruby", {"aria-hidden": "true"})
389
+
390
+ # Name with class
391
+ name_el = _el("strong", {"class": "p-name"})
392
+ name_el.append_child(_text(name))
393
+ ruby.append_child(name_el)
394
+
395
+ # Opening parenthesis fallback
396
+ rp_open = _el("rp")
397
+ rp_open.append_child(_text("("))
398
+ ruby.append_child(rp_open)
399
+
400
+ # Ruby text with IPA
401
+ rt = _el("rt")
402
+ rt.append_child(_text("/ "))
403
+ ipa_el = _el("span", {"class": "p-ipa"})
404
+ ipa_el.append_child(_text(ipa))
405
+ rt.append_child(ipa_el)
406
+ rt.append_child(_text(" /"))
407
+ ruby.append_child(rt)
408
+
409
+ # Closing parenthesis fallback
410
+ rp_close = _el("rp")
411
+ rp_close.append_child(_text(")"))
412
+ ruby.append_child(rp_close)
413
+
414
+ return ruby
415
+
416
+
417
+ def _embedded_property_prefix(embedded: Mf2Item) -> str:
418
+ if isinstance(embedded.get("html"), str):
419
+ return "e"
420
+ value = embedded.get("value")
421
+ if isinstance(value, Mapping):
422
+ return "u"
423
+ return "p"
424
+
425
+
426
+ def _render_item(
427
+ item: Mf2Item,
428
+ *,
429
+ extra_classes: Sequence[str] = (),
430
+ as_property: bool = False,
431
+ property_prefix: str | None = None,
432
+ rel_urls: dict[str, RelUrl] | None = None,
433
+ ) -> SimpleDomNode:
434
+ classes: list[str] = []
435
+ classes.extend(str(c) for c in extra_classes if c)
436
+ item_types = item.get("type", [])
437
+ classes.extend(str(t) for t in item_types)
438
+ props = item.get("properties", {})
439
+ children = item.get("children", [])
440
+
441
+ # Use semantic element based on microformat type
442
+ tag = _get_semantic_element(item_types)
443
+ attrs: dict[str, str | None] = {}
444
+ item_id = item.get("id")
445
+ if isinstance(item_id, str) and item_id:
446
+ attrs["id"] = item_id
447
+ cls = _class_value(classes)
448
+ if cls:
449
+ attrs["class"] = cls
450
+ el = _el(tag, attrs)
451
+
452
+ if (
453
+ as_property
454
+ and property_prefix in {"p", "dt"}
455
+ and "value" in item
456
+ and not isinstance(item.get("value"), Mapping)
457
+ ):
458
+ vcp_node = _value_vcp_node(item.get("value"))
459
+ if vcp_node:
460
+ el.append_child(vcp_node)
461
+
462
+ embedded_value = item.get("value") if as_property else None
463
+
464
+ if as_property and property_prefix == "e":
465
+ html = item.get("html")
466
+ if isinstance(html, str):
467
+ for child in _parse_html_fragment(html):
468
+ el.append_child(child)
469
+ return el
470
+
471
+ # Track properties consumed by special renderers (e.g., ruby for name+ipa)
472
+ consumed_props: set[str] = set()
473
+
474
+ # Check if ruby rendering should be used for name+ipa
475
+ ruby_name_ipa: tuple[str, str] | None = None
476
+ names = props.get("name", [])
477
+ ipas = props.get("ipa", [])
478
+ if names and ipas:
479
+ name = names[0] if isinstance(names[0], str) else None
480
+ ipa = ipas[0] if isinstance(ipas[0], str) else None
481
+ if name and ipa:
482
+ ruby_name_ipa = (name, ipa)
483
+ consumed_props.add("name")
484
+ consumed_props.add("ipa")
485
+
486
+ # Group string properties by (value, category) for combined rendering.
487
+ # Key: (value, category), Value: list of property names
488
+ value_groups: dict[tuple[str, str], list[str]] = {}
489
+ # Track which (value, category) pairs have been rendered
490
+ rendered_groups: set[tuple[str, str]] = set()
491
+
492
+ for prop in sorted(props.keys(), key=_property_sort_key):
493
+ # Render ruby at the position where "name" would appear (after photo)
494
+ if prop == "name" and ruby_name_ipa:
495
+ el.append_child(_render_ruby_name_ipa(*ruby_name_ipa))
496
+ ruby_name_ipa = None # Only render once
497
+ if prop in consumed_props:
498
+ continue
499
+ for v in props[prop]:
500
+ if _is_mf2_item(v):
501
+ # Embedded microformat - render immediately.
502
+ embedded = cast(Mf2Item, v)
503
+ prefix = _embedded_property_prefix(embedded)
504
+ el.append_child(
505
+ _render_item(
506
+ embedded,
507
+ extra_classes=[f"{prefix}-{prop}"],
508
+ as_property=True,
509
+ property_prefix=prefix,
510
+ rel_urls=rel_urls,
511
+ ),
512
+ )
513
+ elif isinstance(v, dict) and "html" in v:
514
+ el.append_child(_render_e_property(prop, v)) # type: ignore[arg-type]
515
+ elif isinstance(v, dict) and ("alt" in v or "srcset" in v) and "value" in v:
516
+ el.append_child(_render_u_object_property(prop, v)) # type: ignore[arg-type]
517
+ # If this item is itself embedded as a property, prefer dt-* for `name`
518
+ # when its representative value differs from its `properties.name[0]`.
519
+ elif (
520
+ as_property
521
+ and property_prefix == "p"
522
+ and prop == "name"
523
+ and isinstance(embedded_value, str)
524
+ and isinstance(v, str)
525
+ and v != embedded_value
526
+ and not v.startswith(("http://", "https://"))
527
+ ):
528
+ el.append_child(_render_string_property("dt", [prop], v, rel_urls))
529
+ elif isinstance(v, str):
530
+ # Group string values by (value, category).
531
+ category = _get_render_category(prop, v)
532
+ key = (v, category)
533
+ if key not in value_groups:
534
+ value_groups[key] = []
535
+ value_groups[key].append(prop)
536
+ # Render on first occurrence, which maintains property order.
537
+ if key not in rendered_groups:
538
+ rendered_groups.add(key)
539
+ # Collect all props with this value across all properties.
540
+ group_props = []
541
+ for p in sorted(props.keys(), key=_property_sort_key):
542
+ if p in consumed_props:
543
+ continue
544
+ for pv in props[p]:
545
+ if (
546
+ isinstance(pv, str)
547
+ and pv == v
548
+ and _get_render_category(p, pv) == category
549
+ and p not in group_props
550
+ ):
551
+ group_props.append(p)
552
+ el.append_child(_render_text_property(group_props, v, rel_urls))
553
+ else:
554
+ el.append_child(_render_text_property([prop], str(v), rel_urls))
555
+
556
+ for child in children:
557
+ el.append_child(_render_item(child, rel_urls=rel_urls))
558
+
559
+ return el
560
+
561
+
562
+ def render(
563
+ doc: Mf2Document,
564
+ *,
565
+ pretty: bool = False,
566
+ indent_size: int = 2,
567
+ ) -> str:
568
+ """Render an mf2 document to HTML.
569
+
570
+ Args:
571
+ doc: The mf2 document to render.
572
+ pretty: If True, output nicely indented HTML. Default is False (minified).
573
+ indent_size: Number of spaces for each indentation level when pretty=True.
574
+ Default is 2.
575
+
576
+ Returns:
577
+ The rendered HTML string.
578
+ """
579
+ items = doc["items"]
580
+ rel_urls = doc["rel-urls"]
581
+
582
+ main = _el("main")
583
+
584
+ for item in items:
585
+ main.append_child(_render_item(item, rel_urls=rel_urls))
586
+
587
+ # Render rels in a semantic nav element, in stable order by URL.
588
+ if rel_urls:
589
+ nav = _el("nav")
590
+ for url, info in sorted(rel_urls.items(), key=lambda kv: str(kv[0])):
591
+ rels = info.get("rels", [])
592
+ attrs: dict[str, str | None] = {"href": url}
593
+ if rels:
594
+ attrs["rel"] = " ".join(rels)
595
+ a = _el("a", attrs)
596
+ text = info.get("text", url)
597
+ a.append_child(_text(text))
598
+ nav.append_child(a)
599
+ main.append_child(nav)
600
+
601
+ return main.to_html(pretty=pretty, indent_size=indent_size)
mf2dom/text.py ADDED
@@ -0,0 +1,66 @@
1
+ """Text extraction utilities used by mf2 parsing.
2
+
3
+ This implements a DOM-like `textContent` traversal with mf2-specific rules for
4
+ dropping certain elements and optionally replacing `<img>` elements.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import Any
10
+
11
+ from .dom import get_attr, is_element, iter_child_nodes
12
+ from .urls import try_urljoin
13
+
14
+ _DROP_TAGS = {"script", "style", "template"}
15
+
16
+
17
+ def text_content(
18
+ node: Any,
19
+ *,
20
+ replace_img: bool = False,
21
+ img_to_src: bool = True,
22
+ base_url: str | None = None,
23
+ ) -> str:
24
+ """Return DOM-like textContent for a subtree.
25
+
26
+ This intentionally preserves whitespace exactly as it appears in text nodes.
27
+ """
28
+
29
+ parts: list[str] = []
30
+ stack: list[Any] = [node]
31
+
32
+ while stack:
33
+ cur = stack.pop()
34
+ name = getattr(cur, "name", "")
35
+
36
+ if name in {"#comment", "!doctype"}:
37
+ continue
38
+
39
+ if name == "#text":
40
+ data = getattr(cur, "data", None)
41
+ if isinstance(data, str):
42
+ parts.append(data)
43
+ continue
44
+
45
+ if is_element(cur):
46
+ tag = cur.name.lower()
47
+ if tag in _DROP_TAGS:
48
+ continue
49
+
50
+ if tag == "img" and replace_img:
51
+ alt = get_attr(cur, "alt")
52
+ if alt is None and img_to_src:
53
+ src = get_attr(cur, "src")
54
+ if src is not None:
55
+ alt = try_urljoin(base_url, src) or src
56
+ if alt is not None:
57
+ parts.append(" ")
58
+ parts.append(alt)
59
+ parts.append(" ")
60
+ continue
61
+
62
+ if hasattr(cur, "children"):
63
+ # Depth-first traversal in document order.
64
+ stack.extend(reversed(list(iter_child_nodes(cur))))
65
+
66
+ return "".join(parts)