athena-python-docx 0.2.0__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/PKG-INFO +1 -1
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/__init__.py +1 -1
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/table.py +178 -57
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/pyproject.toml +1 -1
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/.gitignore +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/CLAUDE.md +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/README.md +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/_batching.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/api.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/client.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/document.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/enum/__init__.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/enum/section.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/enum/style.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/enum/table.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/enum/text.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/errors.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/opc/__init__.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/opc/coreprops.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/section.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/settings.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/shape.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/shared.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/styles/__init__.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/styles/style.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/styles/styles.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/text/__init__.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/text/hyperlink.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/text/paragraph.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/text/parfmt.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/text/run.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/docx/typing.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/scripts/publish.sh +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/tests/__init__.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/tests/conftest.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/tests/fidelity/README.md +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/tests/fidelity/__init__.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/tests/fidelity/binary_round_trip.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/tests/fidelity/cases.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/tests/fidelity/complex_cases.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/tests/fidelity/extract.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/tests/fidelity/extreme_cases.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/tests/fidelity/fake_session.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/tests/fidelity/local_runner.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/tests/fidelity/mega_cases.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/tests/fidelity/real_world_cases.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/tests/fidelity/runner.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/tests/test_commands.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/tests/test_python_docx_api_parity.py +0 -0
- {athena_python_docx-0.2.0 → athena_python_docx-0.2.1}/tests/test_smoke_integration.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: athena-python-docx
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.1
|
|
4
4
|
Summary: Drop-in replacement for python-docx that connects to Athena's Superdoc/Keryx collaborative document stack
|
|
5
5
|
Project-URL: Homepage, https://athenaintelligence.ai
|
|
6
6
|
Author-email: Athena Intelligence <engineering@athenaintelligence.ai>
|
|
@@ -58,27 +58,42 @@ def _find_first_paragraph_id(obj: object) -> str:
|
|
|
58
58
|
|
|
59
59
|
|
|
60
60
|
def _collect_paragraph_ids(obj: object, out: list[str]) -> None:
|
|
61
|
-
"""Walk a node tree and collect all paragraph/heading nodeIds in order.
|
|
61
|
+
"""Walk a node tree and collect all paragraph/heading nodeIds in order.
|
|
62
|
+
|
|
63
|
+
Tolerates several shapes that Superdoc has emitted over versions:
|
|
64
|
+
- prosemirror-style: {"type": "paragraph", "attrs": {"nodeId": ...}}
|
|
65
|
+
- typed-wrapper: {"paragraph": {...}, "nodeId": "..."}
|
|
66
|
+
- flat-address: {"kind": "block", "nodeType": "paragraph", "nodeId": ...}
|
|
67
|
+
- block-list shape: {"nodeType": "paragraph", "nodeId": ...}
|
|
68
|
+
"""
|
|
69
|
+
seen: set[str] = set(out)
|
|
70
|
+
|
|
71
|
+
def _add(nid: object) -> None:
|
|
72
|
+
if isinstance(nid, str) and nid and nid not in seen:
|
|
73
|
+
seen.add(nid)
|
|
74
|
+
out.append(nid)
|
|
75
|
+
|
|
62
76
|
if isinstance(obj, dict):
|
|
77
|
+
# Prosemirror-style
|
|
63
78
|
t: object = obj.get("type")
|
|
64
79
|
if isinstance(t, str) and t in ("paragraph", "heading"):
|
|
65
80
|
attrs: object = obj.get("attrs")
|
|
66
|
-
nid: str = ""
|
|
67
81
|
if isinstance(attrs, dict):
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
out.append(nid)
|
|
82
|
+
_add(attrs.get("nodeId") or attrs.get("id"))
|
|
83
|
+
_add(obj.get("nodeId"))
|
|
84
|
+
_add(obj.get("id"))
|
|
85
|
+
# Flat-address / block-list
|
|
86
|
+
node_type: object = obj.get("nodeType")
|
|
87
|
+
if isinstance(node_type, str) and node_type in ("paragraph", "heading"):
|
|
88
|
+
_add(obj.get("nodeId"))
|
|
89
|
+
# Typed-wrapper
|
|
77
90
|
for key in ("paragraph", "heading"):
|
|
78
91
|
if key in obj and isinstance(obj[key], dict):
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
92
|
+
_add(obj.get("nodeId"))
|
|
93
|
+
inner = obj[key]
|
|
94
|
+
if isinstance(inner, dict):
|
|
95
|
+
_add(inner.get("nodeId"))
|
|
96
|
+
# Recurse
|
|
82
97
|
for v in obj.values():
|
|
83
98
|
_collect_paragraph_ids(v, out)
|
|
84
99
|
elif isinstance(obj, list):
|
|
@@ -514,14 +529,60 @@ class _Cell:
|
|
|
514
529
|
return {"kind": "block", "nodeType": "tableCell", "nodeId": self._cell_id()}
|
|
515
530
|
|
|
516
531
|
def _inner_paragraph_ids(self) -> list[str]:
|
|
532
|
+
"""Locate the paragraph nodeIds inside this cell, trying multiple
|
|
533
|
+
Superdoc response shapes.
|
|
534
|
+
|
|
535
|
+
Strategies (in order):
|
|
536
|
+
1. doc.getNodeById with explicit nodeType=tableCell
|
|
537
|
+
2. doc.getNodeById with just {id: ...}
|
|
538
|
+
3. doc.getNode with target=tableCell address
|
|
539
|
+
4. doc.blocks.list filtered to paragraph/heading + location match
|
|
540
|
+
"""
|
|
517
541
|
cell_id = self._cell_id()
|
|
518
|
-
|
|
519
|
-
self._table._session.doc.get_node_by_id(
|
|
520
|
-
{"id": cell_id, "nodeType": "tableCell"},
|
|
521
|
-
),
|
|
522
|
-
)
|
|
542
|
+
session = self._table._session
|
|
523
543
|
ids: list[str] = []
|
|
524
|
-
|
|
544
|
+
|
|
545
|
+
# Strategy 1: with explicit nodeType
|
|
546
|
+
try:
|
|
547
|
+
info = run_sync(
|
|
548
|
+
session.doc.get_node_by_id(
|
|
549
|
+
{"id": cell_id, "nodeType": "tableCell"},
|
|
550
|
+
),
|
|
551
|
+
)
|
|
552
|
+
_collect_paragraph_ids(info, ids)
|
|
553
|
+
if ids:
|
|
554
|
+
return ids
|
|
555
|
+
except Exception:
|
|
556
|
+
pass
|
|
557
|
+
|
|
558
|
+
# Strategy 2: without nodeType (some sdk versions expect only id)
|
|
559
|
+
try:
|
|
560
|
+
info = run_sync(session.doc.get_node_by_id({"id": cell_id}))
|
|
561
|
+
_collect_paragraph_ids(info, ids)
|
|
562
|
+
if ids:
|
|
563
|
+
return ids
|
|
564
|
+
except Exception:
|
|
565
|
+
pass
|
|
566
|
+
|
|
567
|
+
# Strategy 3: doc.getNode with target address
|
|
568
|
+
try:
|
|
569
|
+
info = run_sync(
|
|
570
|
+
session.doc.get_node(
|
|
571
|
+
{
|
|
572
|
+
"target": {
|
|
573
|
+
"kind": "block",
|
|
574
|
+
"nodeType": "tableCell",
|
|
575
|
+
"nodeId": cell_id,
|
|
576
|
+
},
|
|
577
|
+
},
|
|
578
|
+
),
|
|
579
|
+
)
|
|
580
|
+
_collect_paragraph_ids(info, ids)
|
|
581
|
+
if ids:
|
|
582
|
+
return ids
|
|
583
|
+
except Exception:
|
|
584
|
+
pass
|
|
585
|
+
|
|
525
586
|
return ids
|
|
526
587
|
|
|
527
588
|
@property
|
|
@@ -548,61 +609,121 @@ class _Cell:
|
|
|
548
609
|
|
|
549
610
|
@text.setter
|
|
550
611
|
def text(self, value: str) -> None:
|
|
612
|
+
"""Set the cell's text content.
|
|
613
|
+
|
|
614
|
+
Tries three strategies in order:
|
|
615
|
+
1. Text-range replace on the inner paragraph (fastest, preserves
|
|
616
|
+
paragraph-level formatting like alignment, style).
|
|
617
|
+
2. Structural replace of the tableCell with a markdown-derived
|
|
618
|
+
fragment via doc.markdownToFragment → doc.replace.
|
|
619
|
+
3. Structural replace of the tableCell with a hand-built
|
|
620
|
+
prosemirror paragraph fragment as last resort.
|
|
621
|
+
"""
|
|
551
622
|
from docx.text.paragraph import _node_text
|
|
552
623
|
|
|
624
|
+
cell_id = self._cell_id()
|
|
625
|
+
session = self._table._session
|
|
626
|
+
|
|
627
|
+
# --- Strategy 1: inner paragraph + text-range replace ---
|
|
553
628
|
ids = self._inner_paragraph_ids()
|
|
554
|
-
if
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
)
|
|
559
|
-
# Replace the FIRST paragraph's text, and clear the others.
|
|
560
|
-
first = ids[0]
|
|
561
|
-
current = _node_text(self._table._session, first)
|
|
562
|
-
run_sync(
|
|
563
|
-
self._table._session.doc.replace(
|
|
564
|
-
{
|
|
565
|
-
"target": {
|
|
566
|
-
"kind": "selection",
|
|
567
|
-
"start": {
|
|
568
|
-
"kind": "text",
|
|
569
|
-
"blockId": first,
|
|
570
|
-
"offset": 0,
|
|
571
|
-
},
|
|
572
|
-
"end": {
|
|
573
|
-
"kind": "text",
|
|
574
|
-
"blockId": first,
|
|
575
|
-
"offset": len(current),
|
|
576
|
-
},
|
|
577
|
-
},
|
|
578
|
-
"text": value,
|
|
579
|
-
},
|
|
580
|
-
),
|
|
581
|
-
)
|
|
582
|
-
for extra in ids[1:]:
|
|
583
|
-
# Blank the rest of the paragraphs.
|
|
584
|
-
existing = _node_text(self._table._session, extra)
|
|
585
|
-
if existing:
|
|
629
|
+
if ids:
|
|
630
|
+
first = ids[0]
|
|
631
|
+
current = _node_text(session, first)
|
|
632
|
+
try:
|
|
586
633
|
run_sync(
|
|
587
|
-
|
|
634
|
+
session.doc.replace(
|
|
588
635
|
{
|
|
589
636
|
"target": {
|
|
590
637
|
"kind": "selection",
|
|
591
638
|
"start": {
|
|
592
639
|
"kind": "text",
|
|
593
|
-
"blockId":
|
|
640
|
+
"blockId": first,
|
|
594
641
|
"offset": 0,
|
|
595
642
|
},
|
|
596
643
|
"end": {
|
|
597
644
|
"kind": "text",
|
|
598
|
-
"blockId":
|
|
599
|
-
"offset": len(
|
|
645
|
+
"blockId": first,
|
|
646
|
+
"offset": len(current),
|
|
600
647
|
},
|
|
601
648
|
},
|
|
602
|
-
"text":
|
|
649
|
+
"text": value,
|
|
603
650
|
},
|
|
604
651
|
),
|
|
605
652
|
)
|
|
653
|
+
for extra in ids[1:]:
|
|
654
|
+
existing = _node_text(session, extra)
|
|
655
|
+
if existing:
|
|
656
|
+
run_sync(
|
|
657
|
+
session.doc.replace(
|
|
658
|
+
{
|
|
659
|
+
"target": {
|
|
660
|
+
"kind": "selection",
|
|
661
|
+
"start": {
|
|
662
|
+
"kind": "text",
|
|
663
|
+
"blockId": extra,
|
|
664
|
+
"offset": 0,
|
|
665
|
+
},
|
|
666
|
+
"end": {
|
|
667
|
+
"kind": "text",
|
|
668
|
+
"blockId": extra,
|
|
669
|
+
"offset": len(existing),
|
|
670
|
+
},
|
|
671
|
+
},
|
|
672
|
+
"text": "",
|
|
673
|
+
},
|
|
674
|
+
),
|
|
675
|
+
)
|
|
676
|
+
return
|
|
677
|
+
except Exception as e:
|
|
678
|
+
_log_warn(
|
|
679
|
+
f"_Cell.text text-range replace failed on paragraph "
|
|
680
|
+
f"{first}: {e!r}; falling back to structural replace.",
|
|
681
|
+
)
|
|
682
|
+
|
|
683
|
+
# --- Strategy 2: markdownToFragment + structural replace ---
|
|
684
|
+
cell_target: dict = {
|
|
685
|
+
"kind": "block",
|
|
686
|
+
"nodeType": "tableCell",
|
|
687
|
+
"nodeId": cell_id,
|
|
688
|
+
}
|
|
689
|
+
try:
|
|
690
|
+
frag_result: object = run_sync(
|
|
691
|
+
session.doc.markdown_to_fragment({"markdown": value or ""}),
|
|
692
|
+
)
|
|
693
|
+
fragment: object = None
|
|
694
|
+
if isinstance(frag_result, dict):
|
|
695
|
+
fragment = frag_result.get("fragment")
|
|
696
|
+
if fragment is not None:
|
|
697
|
+
run_sync(
|
|
698
|
+
session.doc.replace(
|
|
699
|
+
{"target": cell_target, "content": fragment},
|
|
700
|
+
),
|
|
701
|
+
)
|
|
702
|
+
return
|
|
703
|
+
except Exception as e:
|
|
704
|
+
_log_warn(
|
|
705
|
+
f"_Cell.text markdownToFragment/replace failed: {e!r}; "
|
|
706
|
+
f"falling back to prosemirror fragment.",
|
|
707
|
+
)
|
|
708
|
+
|
|
709
|
+
# --- Strategy 3: hand-built prosemirror paragraph fragment ---
|
|
710
|
+
pm_fragment: dict = {
|
|
711
|
+
"type": "paragraph",
|
|
712
|
+
"content": [{"type": "text", "text": value}] if value else [],
|
|
713
|
+
}
|
|
714
|
+
try:
|
|
715
|
+
run_sync(
|
|
716
|
+
session.doc.replace(
|
|
717
|
+
{"target": cell_target, "content": pm_fragment},
|
|
718
|
+
),
|
|
719
|
+
)
|
|
720
|
+
return
|
|
721
|
+
except Exception as e:
|
|
722
|
+
raise RuntimeError(
|
|
723
|
+
f"Failed to set _Cell.text on cell ({self._row}, {self._col}) "
|
|
724
|
+
f"of table {self._table._fresh_node_id()}: all three strategies "
|
|
725
|
+
f"failed. Last error: {e!r}",
|
|
726
|
+
) from e
|
|
606
727
|
|
|
607
728
|
@property
|
|
608
729
|
def paragraphs(self) -> list["Paragraph"]:
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "athena-python-docx"
|
|
7
|
-
version = "0.2.
|
|
7
|
+
version = "0.2.1"
|
|
8
8
|
description = "Drop-in replacement for python-docx that connects to Athena's Superdoc/Keryx collaborative document stack"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "MIT"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|