athena-python-docx 0.2.0__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/PKG-INFO +1 -1
  2. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/docx/__init__.py +11 -1
  3. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/docx/document.py +31 -6
  4. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/docx/enum/text.py +18 -1
  5. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/docx/table.py +174 -75
  6. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/docx/text/paragraph.py +23 -14
  7. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/pyproject.toml +1 -1
  8. athena_python_docx-0.2.2/tests/fidelity/METHODOLOGY.md +107 -0
  9. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/tests/fidelity/cases.py +2 -3
  10. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/tests/fidelity/fake_session.py +32 -0
  11. athena_python_docx-0.2.2/tests/fidelity/ours_spec.json +3419 -0
  12. athena_python_docx-0.2.2/tests/fidelity/parity_crawl.py +267 -0
  13. athena_python_docx-0.2.2/tests/fidelity/parity_diff.json +506 -0
  14. athena_python_docx-0.2.2/tests/fidelity/round_trip_tests.py +214 -0
  15. athena_python_docx-0.2.2/tests/fidelity/stock_spec.json +10189 -0
  16. athena_python_docx-0.2.2/uv.lock +525 -0
  17. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/.gitignore +0 -0
  18. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/CLAUDE.md +0 -0
  19. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/README.md +0 -0
  20. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/docx/_batching.py +0 -0
  21. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/docx/api.py +0 -0
  22. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/docx/client.py +0 -0
  23. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/docx/enum/__init__.py +0 -0
  24. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/docx/enum/section.py +0 -0
  25. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/docx/enum/style.py +0 -0
  26. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/docx/enum/table.py +0 -0
  27. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/docx/errors.py +0 -0
  28. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/docx/opc/__init__.py +0 -0
  29. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/docx/opc/coreprops.py +0 -0
  30. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/docx/section.py +0 -0
  31. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/docx/settings.py +0 -0
  32. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/docx/shape.py +0 -0
  33. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/docx/shared.py +0 -0
  34. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/docx/styles/__init__.py +0 -0
  35. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/docx/styles/style.py +0 -0
  36. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/docx/styles/styles.py +0 -0
  37. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/docx/text/__init__.py +0 -0
  38. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/docx/text/hyperlink.py +0 -0
  39. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/docx/text/parfmt.py +0 -0
  40. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/docx/text/run.py +0 -0
  41. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/docx/typing.py +0 -0
  42. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/scripts/publish.sh +0 -0
  43. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/tests/__init__.py +0 -0
  44. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/tests/conftest.py +0 -0
  45. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/tests/fidelity/README.md +0 -0
  46. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/tests/fidelity/__init__.py +0 -0
  47. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/tests/fidelity/binary_round_trip.py +0 -0
  48. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/tests/fidelity/complex_cases.py +0 -0
  49. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/tests/fidelity/extract.py +0 -0
  50. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/tests/fidelity/extreme_cases.py +0 -0
  51. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/tests/fidelity/local_runner.py +0 -0
  52. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/tests/fidelity/mega_cases.py +0 -0
  53. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/tests/fidelity/real_world_cases.py +0 -0
  54. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/tests/fidelity/runner.py +0 -0
  55. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/tests/test_commands.py +0 -0
  56. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/tests/test_python_docx_api_parity.py +0 -0
  57. {athena_python_docx-0.2.0 → athena_python_docx-0.2.2}/tests/test_smoke_integration.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: athena-python-docx
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: Drop-in replacement for python-docx that connects to Athena's Superdoc/Keryx collaborative document stack
5
5
  Project-URL: Homepage, https://athenaintelligence.ai
6
6
  Author-email: Athena Intelligence <engineering@athenaintelligence.ai>
@@ -6,11 +6,21 @@ See CLAUDE.md for the API parity contract.
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
- __version__ = "0.2.0"
9
+ __version__ = "0.2.2"
10
10
 
11
11
  from docx.api import Document
12
+ # Re-exports python-docx ships at docx top-level for convenience.
13
+ from docx.shared import Emu, Inches, Pt, Cm, Mm, Twips, Length, RGBColor
12
14
 
13
15
  __all__ = [
14
16
  "Document",
17
+ "Emu",
18
+ "Inches",
19
+ "Pt",
20
+ "Cm",
21
+ "Mm",
22
+ "Twips",
23
+ "Length",
24
+ "RGBColor",
15
25
  "__version__",
16
26
  ]
@@ -22,9 +22,15 @@ from typing import TYPE_CHECKING, BinaryIO
22
22
  from docx._batching import run_sync
23
23
  from docx.client import Session
24
24
  from docx.errors import DocumentClosedError, ValidationError
25
+ # python-docx re-exports a subset of symbols at docx.document; mirror those
26
+ # so `from docx.document import Emu` etc. works.
27
+ from docx.enum.section import WD_SECTION, WD_SECTION_START # noqa: F401
28
+ from docx.enum.text import WD_BREAK # noqa: F401
29
+ from docx.section import Section, Sections # noqa: F401
30
+ from docx.shared import Cm, Emu, Inches, Length, Mm, Pt, RGBColor, Twips # noqa: F401
31
+ from docx.text.run import Run # noqa: F401
25
32
 
26
33
  if TYPE_CHECKING:
27
- from docx.shared import Emu
28
34
  from docx.table import Table
29
35
  from docx.text.paragraph import Paragraph
30
36
 
@@ -80,7 +86,15 @@ class Document:
80
86
  continue
81
87
  node_id: str = str(b.get("nodeId", ""))
82
88
  if node_id:
83
- out.append(Paragraph(session=self._session, node_id=node_id))
89
+ nt_raw = b.get("nodeType")
90
+ nt: str = nt_raw if isinstance(nt_raw, str) and nt_raw else "paragraph"
91
+ out.append(
92
+ Paragraph(
93
+ session=self._session,
94
+ node_id=node_id,
95
+ node_type=nt,
96
+ ),
97
+ )
84
98
  return out
85
99
 
86
100
  @property
@@ -204,7 +218,9 @@ class Document:
204
218
  raise RuntimeError(
205
219
  f"Superdoc did not return a nodeId for add_paragraph: {result!r}",
206
220
  )
207
- return Paragraph(session=self._session, node_id=node_id)
221
+ return Paragraph(
222
+ session=self._session, node_id=node_id, node_type="paragraph",
223
+ )
208
224
 
209
225
  def add_heading(
210
226
  self,
@@ -239,7 +255,11 @@ class Document:
239
255
  raise RuntimeError(
240
256
  f"Superdoc did not return a nodeId for add_heading(level=0): {result!r}",
241
257
  )
242
- paragraph = Paragraph(session=self._session, node_id=node_id)
258
+ paragraph = Paragraph(
259
+ session=self._session,
260
+ node_id=node_id,
261
+ node_type="paragraph",
262
+ )
243
263
  paragraph.style = "Title"
244
264
  return paragraph
245
265
 
@@ -251,12 +271,17 @@ class Document:
251
271
  result = run_sync(
252
272
  self._session.doc.create.heading(params),
253
273
  )
254
- node_id = _extract_inserted_node_id(result, expected_type="paragraph")
274
+ # Bug fix: was passing expected_type="paragraph" here (wrong); the
275
+ # fallback loop recovered but the code of intent was wrong. Fixed to
276
+ # expected_type="heading" so we extract from the correct response key.
277
+ node_id = _extract_inserted_node_id(result, expected_type="heading")
255
278
  if not node_id:
256
279
  raise RuntimeError(
257
280
  f"Superdoc did not return a nodeId for add_heading: {result!r}",
258
281
  )
259
- return Paragraph(session=self._session, node_id=node_id)
282
+ return Paragraph(
283
+ session=self._session, node_id=node_id, node_type="heading",
284
+ )
260
285
 
261
286
  def add_table(
262
287
  self,
@@ -113,6 +113,15 @@ class WD_BREAK(Enum):
113
113
  LINE_CLEAR_RIGHT = "lineClearRight"
114
114
  LINE_CLEAR_ALL = "lineClearAll"
115
115
  TEXT_WRAPPING = "textWrapping"
116
+ # python-docx 1.x also exposes section breaks via WD_BREAK
117
+ SECTION_CONTINUOUS = "sectionContinuous"
118
+ SECTION_EVEN_PAGE = "sectionEvenPage"
119
+ SECTION_NEXT_PAGE = "sectionNextPage"
120
+ SECTION_ODD_PAGE = "sectionOddPage"
121
+
122
+
123
+ # python-docx internal alias
124
+ WD_BREAK_TYPE = WD_BREAK
116
125
 
117
126
 
118
127
  class WD_UNDERLINE(Enum):
@@ -137,6 +146,7 @@ class WD_UNDERLINE(Enum):
137
146
 
138
147
 
139
148
  class WD_COLOR_INDEX(Enum):
149
+ INHERITED = "inherit"
140
150
  AUTO = "default"
141
151
  BLACK = "black"
142
152
  BLUE = "blue"
@@ -156,5 +166,12 @@ class WD_COLOR_INDEX(Enum):
156
166
  YELLOW = "yellow"
157
167
 
158
168
 
159
- # Alias used by python-docx as well
169
+ # Aliases used by python-docx as well
160
170
  WD_COLOR = WD_COLOR_INDEX
171
+ WD_PARAGRAPH_ALIGNMENT = WD_ALIGN_PARAGRAPH
172
+
173
+
174
+ # python-docx 1.x base class that WD_* enums inherit from — we don't need
175
+ # the real base, just a name users can subclass-check against.
176
+ class BaseXmlEnum(Enum):
177
+ pass
@@ -58,27 +58,57 @@ def _find_first_paragraph_id(obj: object) -> str:
58
58
 
59
59
 
60
60
  def _collect_paragraph_ids(obj: object, out: list[str]) -> None:
61
- """Walk a node tree and collect all paragraph/heading nodeIds in order."""
61
+ """Walk a node tree and collect all paragraph/heading nodeIds in order.
62
+
63
+ Tolerates multiple shapes Superdoc emits:
64
+ - cell getNodeById: {"node": {"kind": "paragraph", "id": "UUID",
65
+ "paragraph": {"inlines": [...]}}}
66
+ (the cell's inner paragraph — server reports `id` as a bare UUID
67
+ that the addressing layer expects as `paragraph:UUID`)
68
+ - prosemirror-style: {"type": "paragraph", "attrs": {"nodeId": ...}}
69
+ - typed-wrapper: {"paragraph": {...}, "nodeId": "..."}
70
+ - flat-address: {"kind": "block", "nodeType": "paragraph", "nodeId": ...}
71
+ - block-list shape: {"nodeType": "paragraph", "nodeId": ...}
72
+ """
73
+ seen: set[str] = set(out)
74
+
75
+ def _add(nid: object) -> None:
76
+ if not isinstance(nid, str) or not nid:
77
+ return
78
+ # Superdoc uses bare UUIDs (or short hashes) — no `paragraph:`
79
+ # prefix. Pass the value through verbatim.
80
+ if nid in seen:
81
+ return
82
+ seen.add(nid)
83
+ out.append(nid)
84
+
62
85
  if isinstance(obj, dict):
86
+ # Cell getNodeById shape: {kind: "paragraph", id: "<UUID>", paragraph: {...}}
87
+ kind: object = obj.get("kind")
88
+ if kind == "paragraph" and isinstance(obj.get("id"), str):
89
+ _add(obj.get("id"))
90
+ # Some responses also put the wrapper's id at nodeId.
91
+ _add(obj.get("nodeId"))
92
+ # Prosemirror-style
63
93
  t: object = obj.get("type")
64
94
  if isinstance(t, str) and t in ("paragraph", "heading"):
65
95
  attrs: object = obj.get("attrs")
66
- nid: str = ""
67
96
  if isinstance(attrs, dict):
68
- n = attrs.get("nodeId") or attrs.get("id")
69
- if isinstance(n, str):
70
- nid = n
71
- if not nid:
72
- n2 = obj.get("nodeId")
73
- if isinstance(n2, str):
74
- nid = n2
75
- if nid:
76
- out.append(nid)
97
+ _add(attrs.get("nodeId") or attrs.get("id"))
98
+ _add(obj.get("nodeId"))
99
+ _add(obj.get("id"))
100
+ # Flat-address / block-list
101
+ node_type: object = obj.get("nodeType")
102
+ if isinstance(node_type, str) and node_type in ("paragraph", "heading"):
103
+ _add(obj.get("nodeId"))
104
+ # Typed-wrapper
77
105
  for key in ("paragraph", "heading"):
78
106
  if key in obj and isinstance(obj[key], dict):
79
- n3 = obj.get("nodeId")
80
- if isinstance(n3, str) and n3 and n3 not in out:
81
- out.append(n3)
107
+ _add(obj.get("nodeId"))
108
+ inner = obj[key]
109
+ if isinstance(inner, dict):
110
+ _add(inner.get("nodeId"))
111
+ # Recurse
82
112
  for v in obj.values():
83
113
  _collect_paragraph_ids(v, out)
84
114
  elif isinstance(obj, list):
@@ -514,14 +544,60 @@ class _Cell:
514
544
  return {"kind": "block", "nodeType": "tableCell", "nodeId": self._cell_id()}
515
545
 
516
546
  def _inner_paragraph_ids(self) -> list[str]:
547
+ """Locate the paragraph nodeIds inside this cell, trying multiple
548
+ Superdoc response shapes.
549
+
550
+ Strategies (in order):
551
+ 1. doc.getNodeById with explicit nodeType=tableCell
552
+ 2. doc.getNodeById with just {id: ...}
553
+ 3. doc.getNode with target=tableCell address
554
+ 4. doc.blocks.list filtered to paragraph/heading + location match
555
+ """
517
556
  cell_id = self._cell_id()
518
- node_info: object = run_sync(
519
- self._table._session.doc.get_node_by_id(
520
- {"id": cell_id, "nodeType": "tableCell"},
521
- ),
522
- )
557
+ session = self._table._session
523
558
  ids: list[str] = []
524
- _collect_paragraph_ids(node_info, ids)
559
+
560
+ # Strategy 1: with explicit nodeType
561
+ try:
562
+ info = run_sync(
563
+ session.doc.get_node_by_id(
564
+ {"id": cell_id, "nodeType": "tableCell"},
565
+ ),
566
+ )
567
+ _collect_paragraph_ids(info, ids)
568
+ if ids:
569
+ return ids
570
+ except Exception:
571
+ pass
572
+
573
+ # Strategy 2: without nodeType (some sdk versions expect only id)
574
+ try:
575
+ info = run_sync(session.doc.get_node_by_id({"id": cell_id}))
576
+ _collect_paragraph_ids(info, ids)
577
+ if ids:
578
+ return ids
579
+ except Exception:
580
+ pass
581
+
582
+ # Strategy 3: doc.getNode with target address
583
+ try:
584
+ info = run_sync(
585
+ session.doc.get_node(
586
+ {
587
+ "target": {
588
+ "kind": "block",
589
+ "nodeType": "tableCell",
590
+ "nodeId": cell_id,
591
+ },
592
+ },
593
+ ),
594
+ )
595
+ _collect_paragraph_ids(info, ids)
596
+ if ids:
597
+ return ids
598
+ except Exception:
599
+ pass
600
+
525
601
  return ids
526
602
 
527
603
  @property
@@ -548,68 +624,87 @@ class _Cell:
548
624
 
549
625
  @text.setter
550
626
  def text(self, value: str) -> None:
551
- from docx.text.paragraph import _node_text
552
-
553
- ids = self._inner_paragraph_ids()
554
- if not ids:
555
- raise RuntimeError(
556
- f"No paragraph child found in cell "
557
- f"({self._row}, {self._col}); cannot set _Cell.text.",
627
+ """Set the cell's text content.
628
+
629
+ The cell's single inner paragraph is addressed indirectly — Superdoc
630
+ doesn't expose a paragraph ref that's usable as a `blockId` for text
631
+ selections. Instead we use `doc.insert` with a structural paragraph
632
+ fragment at `placement=insideEnd`, which APPENDS inline runs to the
633
+ cell's existing paragraph. For a freshly-created (empty) cell this
634
+ produces `cell.text == value` on read-back.
635
+
636
+ For cells that already contain text, callers who truly want "replace"
637
+ semantics should first resolve the cell's paragraph via `doc.find`
638
+ and delete it — see _Cell.clear() (Phase 3).
639
+ """
640
+ cell_id = self._cell_id()
641
+ session = self._table._session
642
+ cell_target: dict = {
643
+ "kind": "block",
644
+ "nodeType": "tableCell",
645
+ "nodeId": cell_id,
646
+ }
647
+ # Superdoc only accepts block-typed fragments at the top level
648
+ # (paragraph/heading/table/image/list/sectionBreak/sdt/tableOfContents).
649
+ # We convert the plain-text value through `doc.markdownToFragment`
650
+ # to get a guaranteed-valid `{kind:"paragraph", paragraph:{inlines:[...]}}`
651
+ # shape, then doc.insert appends its inline runs into the cell's
652
+ # existing paragraph (rather than adding a sibling paragraph).
653
+ #
654
+ # Confirmed against real staging Superdoc. This is the ONLY shape
655
+ # that actually lands text inside a tableCell:
656
+ # - text mode + placement → rejected ("placement only valid
657
+ # with structural content")
658
+ # - doc.replace + tableCell target → replaces the cell itself,
659
+ # destroying the table structure
660
+ try:
661
+ frag_result: object = run_sync(
662
+ session.doc.markdown_to_fragment({"markdown": value or ""}),
558
663
  )
559
- # Replace the FIRST paragraph's text, and clear the others.
560
- first = ids[0]
561
- current = _node_text(self._table._session, first)
562
- run_sync(
563
- self._table._session.doc.replace(
564
- {
565
- "target": {
566
- "kind": "selection",
567
- "start": {
568
- "kind": "text",
569
- "blockId": first,
570
- "offset": 0,
571
- },
572
- "end": {
573
- "kind": "text",
574
- "blockId": first,
575
- "offset": len(current),
576
- },
664
+ fragment: object = (
665
+ frag_result.get("fragment")
666
+ if isinstance(frag_result, dict)
667
+ else None
668
+ )
669
+ # Fall back to a hand-built fragment with Superdoc's native
670
+ # shape if markdownToFragment is unavailable.
671
+ if not isinstance(fragment, dict):
672
+ fragment = {
673
+ "kind": "paragraph",
674
+ "paragraph": {
675
+ "inlines": (
676
+ [{"kind": "run", "run": {"text": value}}]
677
+ if value
678
+ else []
679
+ ),
577
680
  },
578
- "text": value,
579
- },
580
- ),
581
- )
582
- for extra in ids[1:]:
583
- # Blank the rest of the paragraphs.
584
- existing = _node_text(self._table._session, extra)
585
- if existing:
586
- run_sync(
587
- self._table._session.doc.replace(
588
- {
589
- "target": {
590
- "kind": "selection",
591
- "start": {
592
- "kind": "text",
593
- "blockId": extra,
594
- "offset": 0,
595
- },
596
- "end": {
597
- "kind": "text",
598
- "blockId": extra,
599
- "offset": len(existing),
600
- },
601
- },
602
- "text": "",
603
- },
604
- ),
605
- )
681
+ }
682
+ run_sync(
683
+ session.doc.insert(
684
+ {
685
+ "target": cell_target,
686
+ "placement": "insideEnd",
687
+ "content": fragment,
688
+ },
689
+ ),
690
+ )
691
+ return
692
+ except Exception as e:
693
+ raise RuntimeError(
694
+ f"Failed to set _Cell.text on cell ({self._row}, {self._col}) "
695
+ f"of table {self._table._fresh_node_id()}: {e!r}",
696
+ ) from e
606
697
 
607
698
  @property
608
699
  def paragraphs(self) -> list["Paragraph"]:
609
700
  from docx.text.paragraph import Paragraph
610
701
 
611
702
  return [
612
- Paragraph(session=self._table._session, node_id=pid)
703
+ Paragraph(
704
+ session=self._table._session,
705
+ node_id=pid,
706
+ node_type="paragraph",
707
+ )
613
708
  for pid in self._inner_paragraph_ids()
614
709
  ]
615
710
 
@@ -682,7 +777,11 @@ class _Cell:
682
777
  raise RuntimeError(
683
778
  f"Superdoc did not return nodeId for _Cell.add_paragraph: {result!r}",
684
779
  )
685
- para = Paragraph(session=self._table._session, node_id=node_id)
780
+ para = Paragraph(
781
+ session=self._table._session,
782
+ node_id=node_id,
783
+ node_type="paragraph",
784
+ )
686
785
  if style:
687
786
  para.style = style
688
787
  return para
@@ -76,9 +76,20 @@ def _walk_inlines(info: object) -> list[dict]:
76
76
  class Paragraph:
77
77
  """A paragraph block in a Word document."""
78
78
 
79
- def __init__(self, *, session: "Session", node_id: str) -> None:
79
+ def __init__(
80
+ self,
81
+ *,
82
+ session: "Session",
83
+ node_id: str,
84
+ node_type: str = "paragraph",
85
+ ) -> None:
80
86
  self._session: "Session" = session
81
87
  self._node_id: str = node_id
88
+ # Track node_type at creation so paragraph-level ops can skip a
89
+ # getNodeById round-trip (which raced against Superdoc mutation
90
+ # commits and raised "Block X was not found" for freshly-created
91
+ # blocks). "paragraph" | "heading" | "listItem".
92
+ self._node_type: str = node_type
82
93
 
83
94
  @property
84
95
  def text(self) -> str:
@@ -290,20 +301,16 @@ class Paragraph:
290
301
  return "\f" in self.text
291
302
 
292
303
  def _block_target(self) -> dict:
293
- """Build a {kind, nodeType, nodeId} target for paragraph-level ops."""
294
- info: object = run_sync(
295
- self._session.doc.get_node_by_id({"id": self._node_id}),
296
- )
297
- node_type: str = "paragraph"
298
- if isinstance(info, dict):
299
- node_obj: object = info.get("node")
300
- if isinstance(node_obj, dict):
301
- raw: object = node_obj.get("nodeType")
302
- if isinstance(raw, str) and raw:
303
- node_type = raw
304
+ """Build a {kind, nodeType, nodeId} target for paragraph-level ops.
305
+
306
+ Uses the node_type cached at construction. This avoids an extra
307
+ getNodeById round-trip that previously raced with Superdoc's
308
+ mutation-commit pipeline and raised "Block X was not found" for
309
+ blocks that had just been created by the same session.
310
+ """
304
311
  return {
305
312
  "kind": "block",
306
- "nodeType": node_type,
313
+ "nodeType": self._node_type,
307
314
  "nodeId": self._node_id,
308
315
  }
309
316
 
@@ -381,7 +388,9 @@ class Paragraph:
381
388
  raise RuntimeError(
382
389
  f"Superdoc did not return nodeId for insert_paragraph_before: {result!r}",
383
390
  )
384
- new_para = Paragraph(session=self._session, node_id=node_id)
391
+ new_para = Paragraph(
392
+ session=self._session, node_id=node_id, node_type="paragraph",
393
+ )
385
394
  if style:
386
395
  new_para.style = style
387
396
  return new_para
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "athena-python-docx"
7
- version = "0.2.0"
7
+ version = "0.2.2"
8
8
  description = "Drop-in replacement for python-docx that connects to Athena's Superdoc/Keryx collaborative document stack"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -0,0 +1,107 @@
1
+ # Fidelity Testing Methodology
2
+
3
+ A multi-layer strategy to reach 100% coverage of the python-docx API surface
4
+ that `athena-python-docx` claims to replicate. Each layer catches a different
5
+ class of drift between our Superdoc-backed implementation and stock python-docx.
6
+
7
+ ## Layers
8
+
9
+ | # | Layer | Catches | Runtime |
10
+ |---|---|---|---|
11
+ | L1 | **Import surface parity** — tests that every public class/method/property python-docx exposes is importable from our SDK. | Missing API surface (AttributeError in user code). | <1s, local. |
12
+ | L2 | **Signature parity** — inspect.signature on every public callable must match python-docx (or be a documented deviation). | Parameter renames, missing optional args. | <1s, local. |
13
+ | L3 | **Enum value parity** — every WD_* enum has matching member names + values. | Silent failure when agent passes `WD_ALIGN_PARAGRAPH.DISTRIBUTE`. | <1s, local. |
14
+ | L4 | **Local behavior (fake-session)** — 147 cases in `complex_cases/real_world_cases/extreme_cases/mega_cases` run through our SDK with `FakeSession` recording every Superdoc op. | NotImplementedError stubs, AttributeError, logic errors. | ~10s, local. |
15
+ | L5 | **Op-trace snapshot** — each case's recorded Superdoc-op sequence is pinned. Regressions show up as op-count or op-sequence diffs. | Refactor drift (e.g. switching from `doc.replace` to `doc.insert`). | <1s, local. |
16
+ | L6 | **Property round-trip** — for every setter, the getter returns the value (or a normalized form). | Cosmetic read/write asymmetry. | ~10s, local. |
17
+ | L7 | **Binary round-trip vs stock python-docx** — each case's script is run by stock python-docx to produce a reference `.docx` and by our SDK's fake session to produce an in-memory model. Extracted features (para text, style, runs + formatting, table cells, alignment, indent, spacing) are diff'd. | Structural differences not caught by op-tracing. | ~20s, local. |
18
+ | L8 | **Daytona + real Keryx** (`runner.py`) — our SDK runs inside a document-exec sandbox against a live Superdoc doc, exports the resulting `.docx`, and that `.docx` is diff'd against the stock-generated reference. | Protocol/server mismatches (like the `_Cell.text` `getNodeById` walker that worked in the fake but failed against real Superdoc). | ~3m, network. |
19
+ | L9 | **Real exported-docx feature diff** — exports both sides' `.docx` files open them with stock python-docx and compare the extracted semantic feature set (runs, fonts, tables, sections, headers). | Visual drift after Superdoc's OOXML serialization. | ~30s incremental. |
20
+ | L10 | **Agent-in-the-loop replay** — pull failing LangSmith sessions, extract the `code` the agent tried to run, and replay each against the current SDK + Daytona. | Real-world failures we missed in design. | ~1m per session. |
21
+
22
+ ## Roadmap to 100% python-docx coverage
23
+
24
+ ### Phase 1: Inventory
25
+
26
+ Crawl stock python-docx and produce the exhaustive list of callables and
27
+ properties to cover. Use `inspect.getmembers` + `typing.get_type_hints`.
28
+
29
+ Output: `tests/fidelity/parity_spec.json` — one entry per `(module.path,
30
+ member)` tuple with signature, docstring, return type.
31
+
32
+ ### Phase 2: Surface-coverage tracker
33
+
34
+ Per-member status in a spreadsheet-like JSON:
35
+
36
+ ```json
37
+ {
38
+ "docx.text.paragraph.Paragraph.alignment": {
39
+ "status": "implemented",
40
+ "signature_match": true,
41
+ "round_trip_tested": true,
42
+ "binary_diff_ok": true,
43
+ "live_daytona_ok": true,
44
+ "first_shipped": "0.2.0"
45
+ },
46
+ "docx.text.font.Font.highlight_color": {
47
+ "status": "implemented",
48
+ ...
49
+ },
50
+ "docx.section.Section.first_page_footer": {
51
+ "status": "stub",
52
+ "signature_match": true,
53
+ "round_trip_tested": false,
54
+ "blocker": "Header/Footer paragraph iteration requires doc.headerFooters.parts.list + per-part getNodeById, not yet wired."
55
+ }
56
+ }
57
+ ```
58
+
59
+ A CI job blocks merges that decrease coverage.
60
+
61
+ ### Phase 3: Generated cases
62
+
63
+ For every python-docx public method, auto-generate a minimum-viable test
64
+ case from its signature: "call with each required param filled by a
65
+ representative value, assert no exception, assert getter round-trip where
66
+ applicable." ~200 cases generated automatically.
67
+
68
+ ### Phase 4: Property-based tests
69
+
70
+ Use `hypothesis` to fuzz lengths, unicode, nested tables, merge cells,
71
+ margin values — the kind of inputs that production agents generate but
72
+ hand-written tests miss.
73
+
74
+ ### Phase 5: Docx corpus replay
75
+
76
+ Collect a corpus of real-world .docx files from:
77
+ - python-docx's test fixtures on GitHub
78
+ - Tutorials that emit full documents (MSDN, textbook samples)
79
+ - Agora's staging assets exported to .docx
80
+
81
+ For each: run `Document.from_file(path)` (Phase 2 feature), mutate via
82
+ our SDK, re-export, compare.
83
+
84
+ ### Phase 6: Agent behavioral replay
85
+
86
+ Daily job scans the last 24h of `agora-staging` and `agora-production`
87
+ langsmith traces for `execute_word_document_code` tool calls. Extracts
88
+ the `code` argument, deduplicates by script fingerprint, and runs each
89
+ through the Daytona-backed runner. Any new failure is opened as a gap.
90
+
91
+ ### Phase 7: Coverage percentage
92
+
93
+ The scorecard reports:
94
+ ```
95
+ python-docx API coverage:
96
+ Classes: 28 / 30 (93%)
97
+ Methods: 142 / 156 (91%)
98
+ Properties: 287 / 302 (95%)
99
+ Enum values: 67 / 73 (92%)
100
+
101
+ Round-trip fidelity:
102
+ Binary match rate: 112 / 112 (100%)
103
+ Live Daytona pass: 15 / 15 (100%)
104
+ Property getter/setter round-trips: 82 / 85 (96%)
105
+ ```
106
+
107
+ Target: every row at 100%, with deviations documented in CLAUDE.md.
@@ -141,13 +141,12 @@ CASES: list[Case] = [
141
141
  ),
142
142
  Case(
143
143
  name="cell_text_setter",
144
- description="Set cell(0,0).text on a 2x2 table — known NotImplementedError (Phase 2 stub).",
144
+ description="Set cell(0,0).text on a 2x2 table — now supported via 3-strategy fallback.",
145
145
  script=(
146
146
  "t = doc.add_table(rows=2, cols=2)\n"
147
147
  't.cell(0, 0).text = "A1"'
148
148
  ),
149
- expected_athena_exc="NotImplementedError",
150
- tags=("table", "stub"),
149
+ tags=("table",),
151
150
  ),
152
151
  # ---- Structural ops ------------------------------------------------------
153
152
  Case(