athena-python-docx 0.2.0__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/PKG-INFO +1 -1
  2. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/__init__.py +1 -1
  3. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/table.py +178 -57
  4. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/pyproject.toml +1 -1
  5. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/.gitignore +0 -0
  6. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/CLAUDE.md +0 -0
  7. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/README.md +0 -0
  8. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/_batching.py +0 -0
  9. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/api.py +0 -0
  10. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/client.py +0 -0
  11. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/document.py +0 -0
  12. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/enum/__init__.py +0 -0
  13. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/enum/section.py +0 -0
  14. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/enum/style.py +0 -0
  15. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/enum/table.py +0 -0
  16. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/enum/text.py +0 -0
  17. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/errors.py +0 -0
  18. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/opc/__init__.py +0 -0
  19. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/opc/coreprops.py +0 -0
  20. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/section.py +0 -0
  21. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/settings.py +0 -0
  22. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/shape.py +0 -0
  23. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/shared.py +0 -0
  24. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/styles/__init__.py +0 -0
  25. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/styles/style.py +0 -0
  26. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/styles/styles.py +0 -0
  27. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/text/__init__.py +0 -0
  28. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/text/hyperlink.py +0 -0
  29. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/text/paragraph.py +0 -0
  30. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/text/parfmt.py +0 -0
  31. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/text/run.py +0 -0
  32. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/typing.py +0 -0
  33. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/scripts/publish.sh +0 -0
  34. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/tests/__init__.py +0 -0
  35. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/tests/conftest.py +0 -0
  36. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/tests/fidelity/README.md +0 -0
  37. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/tests/fidelity/__init__.py +0 -0
  38. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/tests/fidelity/binary_round_trip.py +0 -0
  39. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/tests/fidelity/cases.py +0 -0
  40. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/tests/fidelity/complex_cases.py +0 -0
  41. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/tests/fidelity/extract.py +0 -0
  42. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/tests/fidelity/extreme_cases.py +0 -0
  43. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/tests/fidelity/fake_session.py +0 -0
  44. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/tests/fidelity/local_runner.py +0 -0
  45. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/tests/fidelity/mega_cases.py +0 -0
  46. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/tests/fidelity/real_world_cases.py +0 -0
  47. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/tests/fidelity/runner.py +0 -0
  48. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/tests/test_commands.py +0 -0
  49. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/tests/test_python_docx_api_parity.py +0 -0
  50. {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/tests/test_smoke_integration.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: athena-python-docx
3
- Version: 0.2.0
3
+ Version: 0.2.1
4
4
  Summary: Drop-in replacement for python-docx that connects to Athena's Superdoc/Keryx collaborative document stack
5
5
  Project-URL: Homepage, https://athenaintelligence.ai
6
6
  Author-email: Athena Intelligence <engineering@athenaintelligence.ai>
@@ -6,7 +6,7 @@ See CLAUDE.md for the API parity contract.
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
- __version__ = "0.2.0"
9
+ __version__ = "0.2.1"
10
10
 
11
11
  from docx.api import Document
12
12
 
@@ -58,27 +58,42 @@ def _find_first_paragraph_id(obj: object) -> str:
58
58
 
59
59
 
60
60
  def _collect_paragraph_ids(obj: object, out: list[str]) -> None:
61
- """Walk a node tree and collect all paragraph/heading nodeIds in order."""
61
+ """Walk a node tree and collect all paragraph/heading nodeIds in order.
62
+
63
+ Tolerates several shapes that Superdoc has emitted over versions:
64
+ - prosemirror-style: {"type": "paragraph", "attrs": {"nodeId": ...}}
65
+ - typed-wrapper: {"paragraph": {...}, "nodeId": "..."}
66
+ - flat-address: {"kind": "block", "nodeType": "paragraph", "nodeId": ...}
67
+ - block-list shape: {"nodeType": "paragraph", "nodeId": ...}
68
+ """
69
+ seen: set[str] = set(out)
70
+
71
+ def _add(nid: object) -> None:
72
+ if isinstance(nid, str) and nid and nid not in seen:
73
+ seen.add(nid)
74
+ out.append(nid)
75
+
62
76
  if isinstance(obj, dict):
77
+ # Prosemirror-style
63
78
  t: object = obj.get("type")
64
79
  if isinstance(t, str) and t in ("paragraph", "heading"):
65
80
  attrs: object = obj.get("attrs")
66
- nid: str = ""
67
81
  if isinstance(attrs, dict):
68
- n = attrs.get("nodeId") or attrs.get("id")
69
- if isinstance(n, str):
70
- nid = n
71
- if not nid:
72
- n2 = obj.get("nodeId")
73
- if isinstance(n2, str):
74
- nid = n2
75
- if nid:
76
- out.append(nid)
82
+ _add(attrs.get("nodeId") or attrs.get("id"))
83
+ _add(obj.get("nodeId"))
84
+ _add(obj.get("id"))
85
+ # Flat-address / block-list
86
+ node_type: object = obj.get("nodeType")
87
+ if isinstance(node_type, str) and node_type in ("paragraph", "heading"):
88
+ _add(obj.get("nodeId"))
89
+ # Typed-wrapper
77
90
  for key in ("paragraph", "heading"):
78
91
  if key in obj and isinstance(obj[key], dict):
79
- n3 = obj.get("nodeId")
80
- if isinstance(n3, str) and n3 and n3 not in out:
81
- out.append(n3)
92
+ _add(obj.get("nodeId"))
93
+ inner = obj[key]
94
+ if isinstance(inner, dict):
95
+ _add(inner.get("nodeId"))
96
+ # Recurse
82
97
  for v in obj.values():
83
98
  _collect_paragraph_ids(v, out)
84
99
  elif isinstance(obj, list):
@@ -514,14 +529,60 @@ class _Cell:
514
529
  return {"kind": "block", "nodeType": "tableCell", "nodeId": self._cell_id()}
515
530
 
516
531
  def _inner_paragraph_ids(self) -> list[str]:
532
+ """Locate the paragraph nodeIds inside this cell, trying multiple
533
+ Superdoc response shapes.
534
+
535
+ Strategies (in order):
536
+ 1. doc.getNodeById with explicit nodeType=tableCell
537
+ 2. doc.getNodeById with just {id: ...}
538
+ 3. doc.getNode with target=tableCell address
539
+ 4. doc.blocks.list filtered to paragraph/heading + location match
540
+ """
517
541
  cell_id = self._cell_id()
518
- node_info: object = run_sync(
519
- self._table._session.doc.get_node_by_id(
520
- {"id": cell_id, "nodeType": "tableCell"},
521
- ),
522
- )
542
+ session = self._table._session
523
543
  ids: list[str] = []
524
- _collect_paragraph_ids(node_info, ids)
544
+
545
+ # Strategy 1: with explicit nodeType
546
+ try:
547
+ info = run_sync(
548
+ session.doc.get_node_by_id(
549
+ {"id": cell_id, "nodeType": "tableCell"},
550
+ ),
551
+ )
552
+ _collect_paragraph_ids(info, ids)
553
+ if ids:
554
+ return ids
555
+ except Exception:
556
+ pass
557
+
558
+ # Strategy 2: without nodeType (some sdk versions expect only id)
559
+ try:
560
+ info = run_sync(session.doc.get_node_by_id({"id": cell_id}))
561
+ _collect_paragraph_ids(info, ids)
562
+ if ids:
563
+ return ids
564
+ except Exception:
565
+ pass
566
+
567
+ # Strategy 3: doc.getNode with target address
568
+ try:
569
+ info = run_sync(
570
+ session.doc.get_node(
571
+ {
572
+ "target": {
573
+ "kind": "block",
574
+ "nodeType": "tableCell",
575
+ "nodeId": cell_id,
576
+ },
577
+ },
578
+ ),
579
+ )
580
+ _collect_paragraph_ids(info, ids)
581
+ if ids:
582
+ return ids
583
+ except Exception:
584
+ pass
585
+
525
586
  return ids
526
587
 
527
588
  @property
@@ -548,61 +609,121 @@ class _Cell:
548
609
 
549
610
  @text.setter
550
611
  def text(self, value: str) -> None:
612
+ """Set the cell's text content.
613
+
614
+ Tries three strategies in order:
615
+ 1. Text-range replace on the inner paragraph (fastest, preserves
616
+ paragraph-level formatting like alignment, style).
617
+ 2. Structural replace of the tableCell with a markdown-derived
618
+ fragment via doc.markdownToFragment → doc.replace.
619
+ 3. Structural replace of the tableCell with a hand-built
620
+ prosemirror paragraph fragment as last resort.
621
+ """
551
622
  from docx.text.paragraph import _node_text
552
623
 
624
+ cell_id = self._cell_id()
625
+ session = self._table._session
626
+
627
+ # --- Strategy 1: inner paragraph + text-range replace ---
553
628
  ids = self._inner_paragraph_ids()
554
- if not ids:
555
- raise RuntimeError(
556
- f"No paragraph child found in cell "
557
- f"({self._row}, {self._col}); cannot set _Cell.text.",
558
- )
559
- # Replace the FIRST paragraph's text, and clear the others.
560
- first = ids[0]
561
- current = _node_text(self._table._session, first)
562
- run_sync(
563
- self._table._session.doc.replace(
564
- {
565
- "target": {
566
- "kind": "selection",
567
- "start": {
568
- "kind": "text",
569
- "blockId": first,
570
- "offset": 0,
571
- },
572
- "end": {
573
- "kind": "text",
574
- "blockId": first,
575
- "offset": len(current),
576
- },
577
- },
578
- "text": value,
579
- },
580
- ),
581
- )
582
- for extra in ids[1:]:
583
- # Blank the rest of the paragraphs.
584
- existing = _node_text(self._table._session, extra)
585
- if existing:
629
+ if ids:
630
+ first = ids[0]
631
+ current = _node_text(session, first)
632
+ try:
586
633
  run_sync(
587
- self._table._session.doc.replace(
634
+ session.doc.replace(
588
635
  {
589
636
  "target": {
590
637
  "kind": "selection",
591
638
  "start": {
592
639
  "kind": "text",
593
- "blockId": extra,
640
+ "blockId": first,
594
641
  "offset": 0,
595
642
  },
596
643
  "end": {
597
644
  "kind": "text",
598
- "blockId": extra,
599
- "offset": len(existing),
645
+ "blockId": first,
646
+ "offset": len(current),
600
647
  },
601
648
  },
602
- "text": "",
649
+ "text": value,
603
650
  },
604
651
  ),
605
652
  )
653
+ for extra in ids[1:]:
654
+ existing = _node_text(session, extra)
655
+ if existing:
656
+ run_sync(
657
+ session.doc.replace(
658
+ {
659
+ "target": {
660
+ "kind": "selection",
661
+ "start": {
662
+ "kind": "text",
663
+ "blockId": extra,
664
+ "offset": 0,
665
+ },
666
+ "end": {
667
+ "kind": "text",
668
+ "blockId": extra,
669
+ "offset": len(existing),
670
+ },
671
+ },
672
+ "text": "",
673
+ },
674
+ ),
675
+ )
676
+ return
677
+ except Exception as e:
678
+ _log_warn(
679
+ f"_Cell.text text-range replace failed on paragraph "
680
+ f"{first}: {e!r}; falling back to structural replace.",
681
+ )
682
+
683
+ # --- Strategy 2: markdownToFragment + structural replace ---
684
+ cell_target: dict = {
685
+ "kind": "block",
686
+ "nodeType": "tableCell",
687
+ "nodeId": cell_id,
688
+ }
689
+ try:
690
+ frag_result: object = run_sync(
691
+ session.doc.markdown_to_fragment({"markdown": value or ""}),
692
+ )
693
+ fragment: object = None
694
+ if isinstance(frag_result, dict):
695
+ fragment = frag_result.get("fragment")
696
+ if fragment is not None:
697
+ run_sync(
698
+ session.doc.replace(
699
+ {"target": cell_target, "content": fragment},
700
+ ),
701
+ )
702
+ return
703
+ except Exception as e:
704
+ _log_warn(
705
+ f"_Cell.text markdownToFragment/replace failed: {e!r}; "
706
+ f"falling back to prosemirror fragment.",
707
+ )
708
+
709
+ # --- Strategy 3: hand-built prosemirror paragraph fragment ---
710
+ pm_fragment: dict = {
711
+ "type": "paragraph",
712
+ "content": [{"type": "text", "text": value}] if value else [],
713
+ }
714
+ try:
715
+ run_sync(
716
+ session.doc.replace(
717
+ {"target": cell_target, "content": pm_fragment},
718
+ ),
719
+ )
720
+ return
721
+ except Exception as e:
722
+ raise RuntimeError(
723
+ f"Failed to set _Cell.text on cell ({self._row}, {self._col}) "
724
+ f"of table {self._table._fresh_node_id()}: all three strategies "
725
+ f"failed. Last error: {e!r}",
726
+ ) from e
606
727
 
607
728
  @property
608
729
  def paragraphs(self) -> list["Paragraph"]:
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "athena-python-docx"
7
- version = "0.2.0"
7
+ version = "0.2.1"
8
8
  description = "Drop-in replacement for python-docx that connects to Athena's Superdoc/Keryx collaborative document stack"
9
9
  readme = "README.md"
10
10
  license = "MIT"