athena-python-docx 0.1.5__tar.gz → 0.1.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/PKG-INFO +1 -1
  2. {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/docx/__init__.py +1 -1
  3. {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/pyproject.toml +1 -1
  4. athena_python_docx-0.1.6/tests/fidelity/README.md +71 -0
  5. athena_python_docx-0.1.6/tests/fidelity/__init__.py +14 -0
  6. athena_python_docx-0.1.6/tests/fidelity/cases.py +155 -0
  7. athena_python_docx-0.1.6/tests/fidelity/extract.py +259 -0
  8. athena_python_docx-0.1.6/tests/fidelity/runner.py +453 -0
  9. {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/.gitignore +0 -0
  10. {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/CLAUDE.md +0 -0
  11. {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/README.md +0 -0
  12. {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/docx/_batching.py +0 -0
  13. {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/docx/api.py +0 -0
  14. {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/docx/client.py +0 -0
  15. {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/docx/document.py +0 -0
  16. {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/docx/enum/__init__.py +0 -0
  17. {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/docx/enum/table.py +0 -0
  18. {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/docx/enum/text.py +0 -0
  19. {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/docx/errors.py +0 -0
  20. {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/docx/shared.py +0 -0
  21. {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/docx/table.py +0 -0
  22. {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/docx/text/__init__.py +0 -0
  23. {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/docx/text/paragraph.py +0 -0
  24. {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/docx/text/run.py +0 -0
  25. {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/docx/typing.py +0 -0
  26. {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/scripts/publish.sh +0 -0
  27. {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/tests/__init__.py +0 -0
  28. {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/tests/conftest.py +0 -0
  29. {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/tests/test_commands.py +0 -0
  30. {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/tests/test_python_docx_api_parity.py +0 -0
  31. {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/tests/test_smoke_integration.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: athena-python-docx
3
- Version: 0.1.5
3
+ Version: 0.1.6
4
4
  Summary: Drop-in replacement for python-docx that connects to Athena's Superdoc/Keryx collaborative document stack
5
5
  Project-URL: Homepage, https://athenaintelligence.ai
6
6
  Author-email: Athena Intelligence <engineering@athenaintelligence.ai>
@@ -6,7 +6,7 @@ See CLAUDE.md for the API parity contract.
6
6
 
7
7
  from __future__ import annotations
8
8
 
9
- __version__ = "0.1.5"
9
+ __version__ = "0.1.6"
10
10
 
11
11
  from docx.api import Document
12
12
 
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "athena-python-docx"
7
- version = "0.1.5"
7
+ version = "0.1.6"
8
8
  description = "Drop-in replacement for python-docx that connects to Athena's Superdoc/Keryx collaborative document stack"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -0,0 +1,71 @@
1
+ # Fidelity tests
2
+
3
+ End-to-end comparison of `athena-python-docx` against stock `python-docx`.
4
+ For each test case (a small script using the python-docx API), the runner:
5
+
6
+ 1. Executes the script via **stock `python-docx`** (installed to an
7
+ isolated site-packages at `/tmp/docx-fidelity-stock-site-packages`)
8
+ and saves the resulting `.docx` to `/tmp/docx-fidelity-artifacts/`.
9
+ 2. Executes the same script via **`athena-python-docx`** inside a
10
+ Daytona sandbox (the `document-exec:v<N>` image). The resulting Y.Doc
11
+ is exported to `.docx` by Superdoc and downloaded back to
12
+ `/tmp/docx-fidelity-artifacts/`.
13
+ 3. Reads both `.docx` files back with stock `python-docx` and extracts a
14
+ normalized feature set (paragraph text + style, runs + bold/italic/
15
+ underline/color, table row/col counts + cell texts).
16
+ 4. Compares the feature sets and prints a scorecard.
17
+
18
+ Stylistic differences that stem from the two libraries' different default
19
+ templates (font, margins, theme colors) are intentionally ignored — the
20
+ focus is on the content-level operations an agent actually cares about.
21
+
22
+ ## Running
23
+
24
+ ```bash
25
+ cd agora && doppler run --project agora --config dev -- \
26
+ uv run python \
27
+ "$(git rev-parse --show-toplevel)/docx-studio/python-sdk/tests/fidelity/runner.py"
28
+ ```
29
+
30
+ Configuration via env vars:
31
+
32
+ | Var | Default | Purpose |
33
+ |-----|---------|---------|
34
+ | `ATHENA_DOCX_FIDELITY_ASSET_ID` | `asset_de593a96-…-2b16ad` (existing staging test doc) | SuperDoc asset to write against — contents are cleared via `doc.clear_content()` between cases |
35
+ | `DAYTONA_DOCUMENT_EXEC_SNAPSHOT` | `document-exec:v1` (from agora settings) | Snapshot to spin up |
36
+
37
+ ## Interpreting results
38
+
39
+ | Status | Meaning |
40
+ |---|---|
41
+ | `✅ OK` | Athena script succeeded and content matches stock python-docx |
42
+ | `⚠️ DIFF` | Athena script succeeded but content differs — see listed diffs |
43
+ | `🟡 STUB_HIT` | Case hit an expected `NotImplementedError` stub (a known Phase-2 gap) |
44
+ | `🟠 UNEXPECTED_PASS` | Case was expected to raise a stub error, but didn't — probably means the Phase-2 gap has been filled; re-classify the case |
45
+ | `🔴 WRONG_EXC` | Case raised, but the wrong exception type |
46
+ | `❌ SCRIPT_ERROR` | Athena side crashed with an unexpected exception |
47
+ | `❌ SETUP_ERROR` | Pre-flight `doc.clear_content()` failed |
48
+ | `❌ EXPORT_ERROR` | Superdoc couldn't save the Y.Doc to `.docx` |
49
+ | `❌ DOWNLOAD_ERROR` | `fs.download_file` on the Daytona sandbox failed |
50
+
51
+ ## Adding cases
52
+
53
+ Edit `cases.py` and add a `Case(...)` entry. Keep scripts minimal (one
54
+ surface per case where possible). If the operation is a known Phase-2
55
+ stub, set `expected_athena_exc="NotImplementedError"` so the case is
56
+ scored as `STUB_HIT` rather than as a failure.
57
+
58
+ ## Known caveats
59
+
60
+ - The runner shares **one** SuperDoc asset across all cases, clearing
61
+ content between each via `doc.clear_content()`. If `clear_content`
62
+ ever regresses, every case after the first will be corrupted. The
63
+ runner reports that as `SETUP_ERROR`.
64
+ - Stock python-docx's default template pads the document with a trailing
65
+ empty paragraph. The comparator filters empty paragraphs before
66
+ counting, but paragraph indices in diff output are 0-based on the
67
+ non-empty subset only.
68
+ - `skip_content_diff=True` on a case tells the runner to only check that
69
+ the athena side didn't raise; useful for structural ops
70
+ (`add_page_break`) where the exported OOXML legitimately differs from
71
+ the stock output.
@@ -0,0 +1,14 @@
1
+ """Fidelity tests for athena-python-docx vs stock python-docx.
2
+
3
+ Each test case is a small script that uses the python-docx `docx` namespace.
4
+ The same script runs against:
5
+ - stock `python-docx` (in an isolated venv) -> produces .docx A
6
+ - `athena-python-docx` via Daytona + Superdoc -> produces .docx B
7
+
8
+ Both .docx files are then read back through stock python-docx for feature
9
+ extraction, and the extracted structures are compared. Differences that
10
+ matter (missing text, wrong style, different cell count) are scored as
11
+ FAIL; stylistic differences (default font, theme color) are tolerated.
12
+
13
+ See `README.md` for running.
14
+ """
@@ -0,0 +1,155 @@
1
+ """Fidelity test cases.
2
+
3
+ Each case is a snippet that operates on a pre-bound ``doc`` (the Document
4
+ instance). The same snippet runs against stock python-docx and against
5
+ athena-python-docx via Daytona.
6
+
7
+ Conventions
8
+ -----------
9
+ - Use only the python-docx public API — no private `_element` access, no
10
+ imports beyond what's in the preamble.
11
+ - Keep cases minimal (one surface per case where possible) so a failure
12
+ pinpoints the SDK gap.
13
+ - Mark known-stubbed operations with ``expected_exc`` — the case passes
14
+ if the stub actually raises (regression detection).
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from dataclasses import dataclass, field
20
+
21
+
22
+ @dataclass(frozen=True)
23
+ class Case:
24
+ name: str
25
+ description: str
26
+ script: str
27
+ # When set, the athena-python-docx run is expected to raise this exception
28
+ # type. The local python-docx run is never expected to raise — if it does,
29
+ # the case is broken and reported as an infrastructure error.
30
+ expected_athena_exc: str | None = None
31
+ # If True, the athena side is not compared against stock — only asserted
32
+ # to succeed. Use when the op has no content-level effect worth diffing
33
+ # (e.g. add_page_break), or when the stock and Superdoc template diverge
34
+ # too much for a meaningful diff.
35
+ skip_content_diff: bool = False
36
+ # Optional tags for filtering.
37
+ tags: tuple[str, ...] = field(default_factory=tuple)
38
+
39
+
40
+ CASES: list[Case] = [
41
+ # ---- Core append ops -----------------------------------------------------
42
+ Case(
43
+ name="add_paragraph_with_text",
44
+ description="One plain paragraph containing a short string.",
45
+ script='doc.add_paragraph("hello world")',
46
+ tags=("paragraph",),
47
+ ),
48
+ Case(
49
+ name="add_paragraph_empty",
50
+ description="A paragraph with no text (docx.add_paragraph()).",
51
+ script="doc.add_paragraph()",
52
+ tags=("paragraph",),
53
+ ),
54
+ Case(
55
+ name="add_heading_level_1",
56
+ description="Heading 1 with text.",
57
+ script='doc.add_heading("H1 heading", level=1)',
58
+ tags=("heading",),
59
+ ),
60
+ Case(
61
+ name="add_heading_level_2",
62
+ description="Heading 2 with text.",
63
+ script='doc.add_heading("H2 heading", level=2)',
64
+ tags=("heading",),
65
+ ),
66
+ Case(
67
+ name="add_heading_level_3",
68
+ description="Heading 3 with text.",
69
+ script='doc.add_heading("H3 heading", level=3)',
70
+ tags=("heading",),
71
+ ),
72
+ # ---- Runs + inline formatting --------------------------------------------
73
+ Case(
74
+ name="add_run_bold_italic",
75
+ description="Paragraph with two runs, bold + italic.",
76
+ script=(
77
+ "p = doc.add_paragraph()\n"
78
+ 'r1 = p.add_run("bold run")\n'
79
+ "r1.bold = True\n"
80
+ 'r2 = p.add_run("italic run")\n'
81
+ "r2.italic = True"
82
+ ),
83
+ tags=("run", "formatting"),
84
+ ),
85
+ Case(
86
+ name="add_run_colored",
87
+ description="Paragraph with a run colored green via RGBColor.",
88
+ script=(
89
+ "from docx.shared import RGBColor\n"
90
+ "p = doc.add_paragraph()\n"
91
+ 'r = p.add_run("green text")\n'
92
+ "r.font.color.rgb = RGBColor(0x00, 0x66, 0x00)"
93
+ ),
94
+ tags=("run", "formatting", "color"),
95
+ ),
96
+ Case(
97
+ name="add_run_font_size",
98
+ description="Paragraph with a run set to 18pt.",
99
+ script=(
100
+ "from docx.shared import Pt\n"
101
+ "p = doc.add_paragraph()\n"
102
+ 'r = p.add_run("sized 18pt")\n'
103
+ "r.font.size = Pt(18)"
104
+ ),
105
+ tags=("run", "formatting", "font"),
106
+ ),
107
+ # ---- Tables --------------------------------------------------------------
108
+ Case(
109
+ name="add_table_2x2_empty",
110
+ description="Create an empty 2x2 table; verify row/col count.",
111
+ script="doc.add_table(rows=2, cols=2)",
112
+ tags=("table",),
113
+ ),
114
+ Case(
115
+ name="add_table_3x3_with_style",
116
+ description="Create a 3x3 TableGrid table.",
117
+ script='doc.add_table(rows=3, cols=3, style="TableGrid")',
118
+ tags=("table", "style"),
119
+ ),
120
+ Case(
121
+ name="cell_text_setter",
122
+ description="Set cell(0,0).text on a 2x2 table — known NotImplementedError in 0.1.5.",
123
+ script=(
124
+ "t = doc.add_table(rows=2, cols=2)\n"
125
+ 't.cell(0, 0).text = "A1"'
126
+ ),
127
+ expected_athena_exc="NotImplementedError",
128
+ tags=("table", "stub"),
129
+ ),
130
+ # ---- Structural ops ------------------------------------------------------
131
+ Case(
132
+ name="add_page_break",
133
+ description="Append a page break; content-diff skipped (template differs).",
134
+ script="doc.add_page_break()",
135
+ skip_content_diff=True,
136
+ tags=("structural",),
137
+ ),
138
+ # ---- Multi-op combo ------------------------------------------------------
139
+ Case(
140
+ name="combo_heading_paragraph_table",
141
+ description="Heading + paragraph with formatted runs + 2x2 table (no cell text).",
142
+ script=(
143
+ "from docx.shared import RGBColor\n"
144
+ 'doc.add_heading("Report", level=1)\n'
145
+ "p = doc.add_paragraph()\n"
146
+ 'r1 = p.add_run("Revenue grew ")\n'
147
+ 'r2 = p.add_run("12%")\n'
148
+ "r2.bold = True\n"
149
+ "r2.font.color.rgb = RGBColor(0x00, 0x66, 0x00)\n"
150
+ 'p.add_run(" YoY.")\n'
151
+ 'doc.add_table(rows=2, cols=2, style="TableGrid")'
152
+ ),
153
+ tags=("combo",),
154
+ ),
155
+ ]
@@ -0,0 +1,259 @@
1
+ """Feature extractor — reads a .docx (produced by either SDK) with stock
2
+ python-docx and returns a normalized structure for comparison.
3
+
4
+ The extractor focuses on features that SHOULD round-trip regardless of
5
+ template differences between stock python-docx and Superdoc:
6
+ - paragraph text content and style name
7
+ - runs within each paragraph: text + bold + italic + underline + color
8
+ - table row/col counts + cell texts
9
+
10
+ It does NOT try to compare defaults, fonts, margins, or theme colors —
11
+ those legitimately differ between a blank python-docx template and the
12
+ Superdoc SuperDoc-default template, and would produce false positives.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import importlib
18
+ import sys
19
+ import types
20
+ from pathlib import Path
21
+ from typing import TypedDict
22
+
23
+
24
+ # ── isolated python-docx loader ────────────────────────────────────────
25
+ # We run this extractor in a process that has the athena-python-docx
26
+ # `docx` namespace installed. To read real .docx files we need STOCK
27
+ # python-docx. Load it from an isolated path and return a module handle.
28
+
29
+ _STOCK_CACHE: types.ModuleType | None = None
30
+
31
+
32
+ def _load_stock_python_docx(stock_site_packages: str) -> types.ModuleType:
33
+ """Import the stock python-docx module from an isolated site-packages.
34
+
35
+ Mirrors the pattern from pptx-studio's
36
+ test_python_pptx_api_parity.py::_load_standard_module.
37
+ """
38
+ global _STOCK_CACHE
39
+ if _STOCK_CACHE is not None:
40
+ return _STOCK_CACHE
41
+
42
+ original_path = sys.path.copy()
43
+ original_modules = {k: v for k, v in sys.modules.items() if k.startswith("docx")}
44
+ for key in list(sys.modules):
45
+ if key.startswith("docx"):
46
+ del sys.modules[key]
47
+
48
+ sys.path.insert(0, stock_site_packages)
49
+ try:
50
+ mod = importlib.import_module("docx")
51
+ _STOCK_CACHE = mod
52
+ return mod
53
+ finally:
54
+ sys.path[:] = original_path
55
+ # Restore our own `docx.*` modules so later imports of the Athena
56
+ # SDK work. (The cached stock module stays importable as long as
57
+ # _STOCK_CACHE holds a reference.)
58
+ for key in list(sys.modules):
59
+ if key.startswith("docx"):
60
+ del sys.modules[key]
61
+ sys.modules.update(original_modules)
62
+
63
+
64
+ # ── feature types ──────────────────────────────────────────────────────
65
+
66
+ class RunFeature(TypedDict, total=False):
67
+ text: str
68
+ bold: bool
69
+ italic: bool
70
+ underline: bool
71
+ color: str | None # hex string, e.g. "006600", or None
72
+
73
+
74
+ class ParaFeature(TypedDict, total=False):
75
+ text: str
76
+ style: str
77
+ runs: list[RunFeature]
78
+
79
+
80
+ class CellFeature(TypedDict):
81
+ text: str
82
+
83
+
84
+ class TableFeature(TypedDict):
85
+ rows: int
86
+ cols: int
87
+ cells: list[list[CellFeature]] # rows × cols
88
+
89
+
90
+ class DocxFeatures(TypedDict):
91
+ paragraphs: list[ParaFeature]
92
+ tables: list[TableFeature]
93
+ paragraph_count: int
94
+ table_count: int
95
+
96
+
97
+ # ── the extractor ──────────────────────────────────────────────────────
98
+
99
+
100
+ def _ensure_styles_with_effects(docx_path: Path) -> Path:
101
+ """Patch a .docx to contain a stub `word/stylesWithEffects.xml`.
102
+
103
+ New python-docx (≥0.8.10) requires that part when `word/_rels/
104
+ document.xml.rels` references it (older Office template legacy).
105
+ Superdoc's export doesn't include it, which makes python-docx raise
106
+ "There is no item named 'word/stylesWithEffects.xml' in the archive"
107
+ the first time a style is dereferenced.
108
+
109
+ We add a minimal valid-but-empty styles document. Mutation happens in
110
+ a sibling tmpfile so the original export stays intact for inspection.
111
+ """
112
+ import shutil
113
+ import zipfile
114
+
115
+ import tempfile
116
+ with zipfile.ZipFile(docx_path, "r") as z:
117
+ names = set(z.namelist())
118
+ if "word/stylesWithEffects.xml" in names:
119
+ return docx_path
120
+ patched = Path(tempfile.mkstemp(prefix="patched_", suffix=".docx")[1])
121
+ shutil.copy2(docx_path, patched)
122
+ stub = (
123
+ b'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
124
+ b'<w:styles xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">'
125
+ b'</w:styles>'
126
+ )
127
+ with zipfile.ZipFile(patched, "a") as z:
128
+ if "word/stylesWithEffects.xml" not in set(z.namelist()):
129
+ z.writestr("word/stylesWithEffects.xml", stub)
130
+ return patched
131
+
132
+
133
+ def extract_features(docx_path: Path, stock_site_packages: str) -> DocxFeatures:
134
+ """Extract normalized features from a .docx file."""
135
+ docx_path = _ensure_styles_with_effects(docx_path)
136
+ docx_mod = _load_stock_python_docx(stock_site_packages)
137
+ # Open the file. We re-import via the isolated stock docx to avoid
138
+ # confusion with our SDK's `docx` namespace.
139
+ orig_sys_path = sys.path.copy()
140
+ orig_mods = {k: v for k, v in sys.modules.items() if k.startswith("docx")}
141
+ for key in list(sys.modules):
142
+ if key.startswith("docx"):
143
+ del sys.modules[key]
144
+ sys.path.insert(0, stock_site_packages)
145
+ try:
146
+ docx_mod = importlib.import_module("docx")
147
+ stock_Document = docx_mod.Document # type: ignore[attr-defined]
148
+
149
+ doc = stock_Document(str(docx_path))
150
+
151
+ paragraphs: list[ParaFeature] = []
152
+ for p in doc.paragraphs:
153
+ runs: list[RunFeature] = []
154
+ for r in p.runs:
155
+ rf: RunFeature = {
156
+ "text": r.text,
157
+ "bold": bool(r.bold) if r.bold is not None else False,
158
+ "italic": bool(r.italic) if r.italic is not None else False,
159
+ "underline": bool(r.underline) if r.underline is not None else False,
160
+ "color": None,
161
+ }
162
+ try:
163
+ rgb = r.font.color.rgb
164
+ if rgb is not None:
165
+ rf["color"] = str(rgb)
166
+ except Exception:
167
+ pass
168
+ runs.append(rf)
169
+ style_name = ""
170
+ try:
171
+ style_name = p.style.name if p.style is not None else ""
172
+ except Exception:
173
+ style_name = ""
174
+ paragraphs.append({
175
+ "text": p.text,
176
+ "style": style_name,
177
+ "runs": runs,
178
+ })
179
+
180
+ tables: list[TableFeature] = []
181
+ for t in doc.tables:
182
+ rows = len(t.rows)
183
+ cols = len(t.rows[0].cells) if rows > 0 else 0
184
+ cells: list[list[CellFeature]] = []
185
+ for row in t.rows:
186
+ cells.append([{"text": c.text} for c in row.cells])
187
+ tables.append({
188
+ "rows": rows,
189
+ "cols": cols,
190
+ "cells": cells,
191
+ })
192
+
193
+ return {
194
+ "paragraphs": paragraphs,
195
+ "tables": tables,
196
+ "paragraph_count": len(paragraphs),
197
+ "table_count": len(tables),
198
+ }
199
+ finally:
200
+ sys.path[:] = orig_sys_path
201
+ for key in list(sys.modules):
202
+ if key.startswith("docx"):
203
+ del sys.modules[key]
204
+ sys.modules.update(orig_mods)
205
+
206
+
207
+ # ── comparator ─────────────────────────────────────────────────────────
208
+
209
+
210
+ def compare(stock: DocxFeatures, athena: DocxFeatures) -> list[str]:
211
+ """Return a list of semantic differences. Empty list = pass."""
212
+ diffs: list[str] = []
213
+
214
+ # Paragraph count. Superdoc's Word-default template often inserts an
215
+ # extra empty paragraph at the end (no text, no runs). We tolerate
216
+ # a +/- 1 gap as long as the non-empty content matches.
217
+ stock_nonempty = [p for p in stock["paragraphs"] if p["text"] or p["runs"]]
218
+ athena_nonempty = [p for p in athena["paragraphs"] if p["text"] or p["runs"]]
219
+ if len(stock_nonempty) != len(athena_nonempty):
220
+ diffs.append(
221
+ f"non-empty paragraph count: stock={len(stock_nonempty)} "
222
+ f"athena={len(athena_nonempty)}",
223
+ )
224
+
225
+ for i, (s, a) in enumerate(zip(stock_nonempty, athena_nonempty)):
226
+ if s["text"] != a["text"]:
227
+ diffs.append(
228
+ f"paragraph[{i}] text: stock={s['text']!r} athena={a['text']!r}",
229
+ )
230
+ # Style name: stock uses "Heading 1" style names; Superdoc may
231
+ # use "Heading1" (no space) or the same. Normalize whitespace.
232
+ s_style = (s.get("style") or "").replace(" ", "").lower()
233
+ a_style = (a.get("style") or "").replace(" ", "").lower()
234
+ if s_style != a_style:
235
+ diffs.append(
236
+ f"paragraph[{i}] style: stock={s['style']!r} athena={a['style']!r}",
237
+ )
238
+
239
+ # Table structure
240
+ if stock["table_count"] != athena["table_count"]:
241
+ diffs.append(
242
+ f"table count: stock={stock['table_count']} athena={athena['table_count']}",
243
+ )
244
+ for i, (st, at) in enumerate(zip(stock["tables"], athena["tables"])):
245
+ if st["rows"] != at["rows"] or st["cols"] != at["cols"]:
246
+ diffs.append(
247
+ f"table[{i}] shape: stock={st['rows']}x{st['cols']} "
248
+ f"athena={at['rows']}x{at['cols']}",
249
+ )
250
+ # Cell content
251
+ for r, (s_row, a_row) in enumerate(zip(st["cells"], at["cells"])):
252
+ for c, (s_cell, a_cell) in enumerate(zip(s_row, a_row)):
253
+ if s_cell["text"] != a_cell["text"]:
254
+ diffs.append(
255
+ f"table[{i}].cell({r},{c}).text: "
256
+ f"stock={s_cell['text']!r} athena={a_cell['text']!r}",
257
+ )
258
+
259
+ return diffs
@@ -0,0 +1,453 @@
1
+ """Fidelity runner — orchestrates stock python-docx + athena-python-docx
2
+ across all cases in ``cases.CASES`` and prints a scorecard.
3
+
4
+ Usage
5
+ -----
6
+ cd agora && doppler run --project agora --config dev -- \
7
+ uv run python ../docx-studio/python-sdk/tests/fidelity/runner.py
8
+
9
+ The runner needs:
10
+ - a Daytona API key (from agora settings) — used to spin up a sandbox
11
+ and execute the athena-python-docx side
12
+ - a Keryx private key + WS URL (from agora settings) — to sign a token
13
+ for the sandbox
14
+ - a reusable SuperDoc asset to write against (default asset on staging
15
+ is hard-coded; override with ATHENA_DOCX_FIDELITY_ASSET_ID)
16
+ - stock python-docx installed somewhere (we install it on-demand into
17
+ ``/tmp/docx-fidelity-stock-site-packages`` if missing)
18
+
19
+ Output is a table listing each case with status (PASS / FAIL / STUB-HIT /
20
+ ERROR), plus any diffs.
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import asyncio
26
+ import json
27
+ import os
28
+ import subprocess
29
+ import sys
30
+ import time
31
+ from pathlib import Path
32
+ from typing import Any
33
+
34
+ # Allow running this file as a script: add the sdk root so "docx.*" and
35
+ # the tests/fidelity package both resolve.
36
+ _SDK_ROOT = Path(__file__).resolve().parents[2]
37
+ sys.path.insert(0, str(_SDK_ROOT))
38
+
39
+ from tests.fidelity.cases import CASES, Case # noqa: E402
40
+ from tests.fidelity.extract import compare, extract_features # noqa: E402
41
+
42
+
43
+ # ── configuration ─────────────────────────────────────────────────────
44
+
45
+ DEFAULT_ASSET_ID = os.environ.get(
46
+ "ATHENA_DOCX_FIDELITY_ASSET_ID",
47
+ "asset_de593a96-3031-45ea-bc52-60cb4b2b16ad", # "PR18540 SuperDoc Test"
48
+ )
49
+ STOCK_SITE_PACKAGES = Path("/tmp/docx-fidelity-stock-site-packages")
50
+ ARTIFACT_DIR = Path("/tmp/docx-fidelity-artifacts")
51
+
52
+
53
+ # ── stock python-docx setup ───────────────────────────────────────────
54
+
55
+
56
+ def ensure_stock_python_docx() -> Path:
57
+ """Install stock python-docx into an isolated directory if not present."""
58
+ marker = STOCK_SITE_PACKAGES / "docx" / "__init__.py"
59
+ if marker.exists():
60
+ return STOCK_SITE_PACKAGES
61
+ print(f"[fidelity] installing stock python-docx -> {STOCK_SITE_PACKAGES}")
62
+ STOCK_SITE_PACKAGES.mkdir(parents=True, exist_ok=True)
63
+ # Use `uv pip install --target` — agora's .venv is uv-managed and
64
+ # doesn't include stdlib `pip`. Fall back to `python -m pip` only if
65
+ # `uv` isn't on PATH (shouldn't happen inside `doppler run -- uv run`).
66
+ uv = _which("uv")
67
+ if uv:
68
+ cmd = [uv, "pip", "install", "--quiet", "--target", str(STOCK_SITE_PACKAGES), "python-docx>=1.1"]
69
+ else:
70
+ cmd = [sys.executable, "-m", "pip", "install", "--quiet", "--target", str(STOCK_SITE_PACKAGES), "python-docx>=1.1"]
71
+ subprocess.run(cmd, check=True)
72
+ return STOCK_SITE_PACKAGES
73
+
74
+
75
+ def _which(name: str) -> str | None:
76
+ import shutil
77
+ return shutil.which(name)
78
+
79
+
80
+ def run_stock_script(script: str, out_path: Path) -> None:
81
+ """Run a script with stock python-docx, writing output to `out_path`.
82
+
83
+ The subprocess is invoked with PYTHONPATH pointed at our isolated
84
+ stock site-packages so we don't collide with our SDK's `docx` ns.
85
+ """
86
+ stock = ensure_stock_python_docx()
87
+ wrapped = (
88
+ "import sys\n"
89
+ f"sys.path.insert(0, {str(stock)!r})\n"
90
+ "from docx import Document\n"
91
+ "doc = Document()\n"
92
+ f"{script}\n"
93
+ f"doc.save({str(out_path)!r})\n"
94
+ )
95
+ result = subprocess.run(
96
+ [sys.executable, "-c", wrapped],
97
+ capture_output=True,
98
+ text=True,
99
+ timeout=30,
100
+ )
101
+ if result.returncode != 0:
102
+ raise RuntimeError(
103
+ f"stock python-docx subprocess failed:\n"
104
+ f" stdout: {result.stdout}\n"
105
+ f" stderr: {result.stderr}",
106
+ )
107
+
108
+
109
+ # ── athena-python-docx via Daytona ────────────────────────────────────
110
+
111
+
112
+ def build_sandbox_script(cases_batch: list[Case], asset_id: str) -> str:
113
+ """Return a Python program to run inside the Daytona sandbox.
114
+
115
+ It iterates cases, for each:
116
+ - clears the doc via superdoc-sdk.doc.clear_content()
117
+ - executes the case's script with `doc` pre-bound
118
+ - exports the resulting .docx to /tmp/fidelity_out/<case_name>.docx
119
+ - records success/exception into /tmp/fidelity_results.json
120
+ """
121
+ cases_json = json.dumps(
122
+ [{"name": c.name, "script": c.script, "expected_exc": c.expected_athena_exc} for c in cases_batch],
123
+ )
124
+ return f'''
125
+ import asyncio
126
+ import json
127
+ import os
128
+ import traceback
129
+ from pathlib import Path
130
+
131
+ OUT_DIR = Path("/tmp/fidelity_out")
132
+ OUT_DIR.mkdir(parents=True, exist_ok=True)
133
+ ASSET_ID = {asset_id!r}
134
+ CASES = json.loads({cases_json!r})
135
+
136
+ from superdoc import AsyncSuperDocClient
137
+ from superdoc.generated.client import DocOpenParams, DocOpenParamsCollaborationVariant1
138
+
139
+ async def with_session(callback):
140
+ token = os.environ["SUPERDOC_COLLAB_TOKEN"]
141
+ ws_url = os.environ["KERYX_WS_URL"]
142
+ ws_id = os.environ["ATHENA_WORKSPACE_ID"]
143
+ client = AsyncSuperDocClient(
144
+ user={{"name":"fidelity-runner","email":"fidelity@athenaintel.com"}},
145
+ env={{"SUPERDOC_COLLAB_TOKEN": token}},
146
+ request_timeout_ms=30_000,
147
+ watchdog_timeout_ms=60_000,
148
+ )
149
+ try:
150
+ collab: DocOpenParamsCollaborationVariant1 = {{
151
+ "url": f"{{ws_url}}/ws/{{ws_id}}",
152
+ "documentId": ASSET_ID,
153
+ "tokenEnv": "SUPERDOC_COLLAB_TOKEN",
154
+ "providerType": "y-websocket",
155
+ }}
156
+ h = await client.open({{"collaboration": collab}})
157
+ try:
158
+ return await callback(h)
159
+ finally:
160
+ await asyncio.sleep(0.5)
161
+ await h.close({{"discard": True}})
162
+ finally:
163
+ await client.dispose()
164
+
165
+
166
+ async def clear_asset():
167
+ async def _clear(h):
168
+ try:
169
+ await h.clear_content({{}})
170
+ except Exception as e:
171
+ # If "already empty" fine
172
+ if "already empty" not in str(e).lower():
173
+ raise
174
+ await with_session(_clear)
175
+
176
+
177
+ async def export_to(path):
178
+ async def _export(h):
179
+ await h.save({{"out": str(path)}})
180
+ await with_session(_export)
181
+
182
+
183
+ def run_case_locally(script):
184
+ """Run script against athena-python-docx via sync facade."""
185
+ from docx import Document
186
+ with Document(ASSET_ID) as doc:
187
+ # RGBColor and Pt are intentionally not pre-bound — each script that
188
+ # needs them imports them explicitly (matching how agents write code).
189
+ from docx.shared import RGBColor, Pt
190
+ local_ns = dict(doc=doc, RGBColor=RGBColor, Pt=Pt)
191
+ exec(compile(script, "<case>", "exec"), local_ns, local_ns)
192
+ doc.save()
193
+
194
+
195
+ results = []
196
+ for case in CASES:
197
+ name = case["name"]
198
+ script = case["script"]
199
+ expected = case["expected_exc"]
200
+
201
+ # Step 1: clear the asset
202
+ try:
203
+ asyncio.run(clear_asset())
204
+ except Exception as e:
205
+ results.append({{
206
+ "name": name,
207
+ "status": "SETUP_ERROR",
208
+ "detail": f"clear_content failed: {{e!r}}",
209
+ }})
210
+ continue
211
+
212
+ # Step 2: run the case's script via athena-python-docx
213
+ script_error = None
214
+ try:
215
+ run_case_locally(script)
216
+ except Exception as e:
217
+ script_error = (type(e).__name__, str(e)[:400])
218
+
219
+ # Step 3: compare error vs expectation
220
+ if expected is not None:
221
+ if script_error is not None and script_error[0] == expected:
222
+ results.append({{"name": name, "status": "STUB_HIT", "detail": script_error[1][:200]}})
223
+ continue
224
+ if script_error is None:
225
+ results.append({{"name": name, "status": "UNEXPECTED_PASS", "detail": f"expected {{expected}} but succeeded"}})
226
+ continue
227
+ results.append({{
228
+ "name": name,
229
+ "status": "WRONG_EXC",
230
+ "detail": f"expected {{expected}}, got {{script_error[0]}}: {{script_error[1]}}",
231
+ }})
232
+ continue
233
+
234
+ if script_error is not None:
235
+ results.append({{
236
+ "name": name,
237
+ "status": "SCRIPT_ERROR",
238
+ "detail": f"{{script_error[0]}}: {{script_error[1]}}",
239
+ }})
240
+ continue
241
+
242
+ # Step 4: export and record artifact
243
+ out_path = OUT_DIR / f"{{name}}.docx"
244
+ try:
245
+ asyncio.run(export_to(out_path))
246
+ except Exception as e:
247
+ results.append({{"name": name, "status": "EXPORT_ERROR", "detail": f"{{type(e).__name__}}: {{e}}"}})
248
+ continue
249
+
250
+ size = out_path.stat().st_size if out_path.exists() else 0
251
+ results.append({{"name": name, "status": "OK", "detail": f"exported {{size}} bytes", "export_path": str(out_path)}})
252
+
253
+ Path("/tmp/fidelity_results.json").write_text(json.dumps(results, indent=2))
254
+ print(json.dumps(results, indent=2))
255
+ '''
256
+
257
+
258
+ async def run_daytona_batch(cases: list[Case], asset_id: str) -> tuple[list[dict], dict[str, bytes]]:
259
+ """Spin up a sandbox, run all cases, download every exported .docx."""
260
+ from agora.services.keryx.client import get_keryx_client
261
+ from agora.services.auth.docx_auth import build_docx_collab_bundle # noqa: F401 ensures agora is loadable
262
+ from agora.settings import settings
263
+ from daytona_sdk import AsyncDaytona, CreateSandboxFromSnapshotParams, DaytonaConfig
264
+
265
+ routing_bundle = await _build_collab_bundle(asset_id)
266
+ kc = get_keryx_client()
267
+ # Fresh token for the sandbox.
268
+ token = kc._sign_token(user_id="fidelity-runner") # noqa: SLF001
269
+
270
+ daytona = AsyncDaytona(
271
+ config=DaytonaConfig(
272
+ api_key=settings.daytona_full_api_key,
273
+ api_url=settings.daytona_api_url,
274
+ organization_id=settings.daytona_organization_id,
275
+ target=settings.daytona_target,
276
+ ),
277
+ )
278
+ snapshot = settings.daytona_document_exec_snapshot or "document-exec:v1"
279
+ print(f"[fidelity] daytona snapshot: {snapshot}")
280
+
281
+ sandbox = await daytona.create(
282
+ CreateSandboxFromSnapshotParams(
283
+ name=f"docx-fidelity-{int(time.time())}",
284
+ snapshot=snapshot,
285
+ env_vars={
286
+ "SUPERDOC_COLLAB_TOKEN": token,
287
+ "KERYX_WS_URL": settings.keryx_ws_url,
288
+ "ATHENA_WORKSPACE_ID": routing_bundle["ATHENA_WORKSPACE_ID"],
289
+ },
290
+ auto_stop_interval=30,
291
+ auto_archive_interval=60,
292
+ ),
293
+ timeout=120,
294
+ )
295
+ print(f"[fidelity] sandbox {sandbox.id} started")
296
+
297
+ try:
298
+ script = build_sandbox_script(cases, asset_id)
299
+ await sandbox.fs.upload_file(script.encode(), "/tmp/fidelity_runner.py")
300
+ exec_result = await sandbox.process.exec(
301
+ "python3 /tmp/fidelity_runner.py",
302
+ timeout=600,
303
+ )
304
+ print(f"[fidelity] sandbox exit_code={exec_result.exit_code}")
305
+ # Load the results JSON
306
+ raw = await sandbox.fs.download_file("/tmp/fidelity_results.json")
307
+ results: list[dict] = json.loads(raw.decode())
308
+
309
+ # Download every successfully-exported .docx artifact
310
+ artifacts: dict[str, bytes] = {}
311
+ for r in results:
312
+ if r["status"] == "OK" and "export_path" in r:
313
+ try:
314
+ data = await sandbox.fs.download_file(r["export_path"])
315
+ artifacts[r["name"]] = data
316
+ except Exception as e:
317
+ r["status"] = "DOWNLOAD_ERROR"
318
+ r["detail"] = f"fs.download_file: {e}"
319
+ return results, artifacts
320
+ finally:
321
+ try:
322
+ await sandbox.delete()
323
+ except Exception as e:
324
+ print(f"[fidelity] sandbox delete failed: {e}")
325
+
326
+
327
+ async def _build_collab_bundle(asset_id: str) -> dict[str, str]:
328
+ from agora.services.auth.docx_auth import build_docx_collab_bundle
329
+ b = await build_docx_collab_bundle(asset_id=asset_id, user_id="fidelity-runner")
330
+ return dict(b)
331
+
332
+
333
+ # ── scorecard ─────────────────────────────────────────────────────────
334
+
335
+
336
+ def print_scorecard(
337
+ cases: list[Case],
338
+ daytona_results: list[dict],
339
+ diffs_by_name: dict[str, list[str]],
340
+ ) -> int:
341
+ """Print a scorecard. Return exit code (0 = all green)."""
342
+ print()
343
+ print("=" * 88)
344
+ print("FIDELITY SCORECARD — athena-python-docx 0.1.5 vs stock python-docx")
345
+ print("=" * 88)
346
+
347
+ by_name = {r["name"]: r for r in daytona_results}
348
+ status_icons = {
349
+ "OK": "✅",
350
+ "STUB_HIT": "🟡",
351
+ "UNEXPECTED_PASS": "🟠",
352
+ "WRONG_EXC": "🔴",
353
+ "SCRIPT_ERROR": "❌",
354
+ "SETUP_ERROR": "❌",
355
+ "EXPORT_ERROR": "❌",
356
+ "DOWNLOAD_ERROR": "❌",
357
+ }
358
+
359
+ fails = 0
360
+ for case in cases:
361
+ r = by_name.get(case.name, {"status": "MISSING", "detail": "not run"})
362
+ status = r["status"]
363
+ icon = status_icons.get(status, "❓")
364
+ diff_suffix = ""
365
+ if status == "OK" and not case.skip_content_diff:
366
+ diffs = diffs_by_name.get(case.name, [])
367
+ if diffs:
368
+ status = "DIFF"
369
+ icon = "⚠️"
370
+ diff_suffix = f" ({len(diffs)} diff(s))"
371
+
372
+ line = f" {icon} [{status:14}] {case.name:32} — {case.description}{diff_suffix}"
373
+ print(line)
374
+ if status not in {"OK", "STUB_HIT"}:
375
+ if status == "DIFF":
376
+ for d in diffs_by_name.get(case.name, [])[:3]:
377
+ print(f" ↳ {d}")
378
+ if len(diffs_by_name.get(case.name, [])) > 3:
379
+ print(f" ↳ …and {len(diffs_by_name[case.name])-3} more")
380
+ else:
381
+ detail = r.get("detail", "")
382
+ if detail:
383
+ print(f" ↳ {detail[:150]}")
384
+ if status not in {"STUB_HIT"}:
385
+ fails += 1
386
+
387
+ print()
388
+ n_ok = sum(1 for r in daytona_results if r["status"] == "OK")
389
+ n_stub = sum(1 for r in daytona_results if r["status"] == "STUB_HIT")
390
+ n_diff = sum(
391
+ 1 for c in cases
392
+ if by_name.get(c.name, {}).get("status") == "OK"
393
+ and diffs_by_name.get(c.name)
394
+ )
395
+ total = len(cases)
396
+ print(f" {n_ok}/{total} cases ran successfully in Athena SDK")
397
+ print(f" {n_stub}/{total} cases hit expected NotImplementedError stubs (known Phase-2 gaps)")
398
+ print(f" {n_diff}/{n_ok} successful-run cases have content-level diffs vs stock python-docx")
399
+ print(f" exit code: {0 if fails == 0 else 1}")
400
+ return 0 if fails == 0 else 1
401
+
402
+
403
+ # ── main ──────────────────────────────────────────────────────────────
404
+
405
+
406
+ async def main() -> int:
407
+ print(f"[fidelity] case count: {len(CASES)}")
408
+ print(f"[fidelity] target asset: {DEFAULT_ASSET_ID}")
409
+ ensure_stock_python_docx()
410
+ ARTIFACT_DIR.mkdir(parents=True, exist_ok=True)
411
+
412
+ # 1) Generate all stock python-docx outputs locally
413
+ print("[fidelity] running stock python-docx side ...")
414
+ stock_paths: dict[str, Path] = {}
415
+ for case in CASES:
416
+ out = ARTIFACT_DIR / f"stock_{case.name}.docx"
417
+ try:
418
+ run_stock_script(case.script, out)
419
+ stock_paths[case.name] = out
420
+ except Exception as e:
421
+ print(f" !! {case.name} stock subprocess error: {e}")
422
+
423
+ # 2) Run the athena side via Daytona in a single batch
424
+ print("[fidelity] running athena-python-docx via Daytona ...")
425
+ daytona_results, athena_artifacts = await run_daytona_batch(CASES, DEFAULT_ASSET_ID)
426
+
427
+ # 3) Save athena artifacts locally and extract/compare features
428
+ diffs_by_name: dict[str, list[str]] = {}
429
+ for case in CASES:
430
+ if case.name not in athena_artifacts:
431
+ continue
432
+ if case.skip_content_diff:
433
+ continue
434
+ athena_path = ARTIFACT_DIR / f"athena_{case.name}.docx"
435
+ athena_path.write_bytes(athena_artifacts[case.name])
436
+ stock_path = stock_paths.get(case.name)
437
+ if stock_path is None:
438
+ continue
439
+ try:
440
+ stock_feat = extract_features(stock_path, str(STOCK_SITE_PACKAGES))
441
+ ath_feat = extract_features(athena_path, str(STOCK_SITE_PACKAGES))
442
+ diffs_by_name[case.name] = compare(stock_feat, ath_feat)
443
+ except Exception as e:
444
+ diffs_by_name[case.name] = [f"extractor error: {e}"]
445
+
446
+ # 4) Scorecard
447
+ code = print_scorecard(CASES, daytona_results, diffs_by_name)
448
+ print(f"\n[fidelity] artifacts in {ARTIFACT_DIR}")
449
+ return code
450
+
451
+
452
+ if __name__ == "__main__":
453
+ sys.exit(asyncio.run(main()))