athena-python-docx 0.1.5__tar.gz → 0.1.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/PKG-INFO +1 -1
- {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/docx/__init__.py +1 -1
- {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/pyproject.toml +1 -1
- athena_python_docx-0.1.6/tests/fidelity/README.md +71 -0
- athena_python_docx-0.1.6/tests/fidelity/__init__.py +14 -0
- athena_python_docx-0.1.6/tests/fidelity/cases.py +155 -0
- athena_python_docx-0.1.6/tests/fidelity/extract.py +259 -0
- athena_python_docx-0.1.6/tests/fidelity/runner.py +453 -0
- {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/.gitignore +0 -0
- {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/CLAUDE.md +0 -0
- {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/README.md +0 -0
- {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/docx/_batching.py +0 -0
- {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/docx/api.py +0 -0
- {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/docx/client.py +0 -0
- {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/docx/document.py +0 -0
- {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/docx/enum/__init__.py +0 -0
- {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/docx/enum/table.py +0 -0
- {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/docx/enum/text.py +0 -0
- {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/docx/errors.py +0 -0
- {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/docx/shared.py +0 -0
- {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/docx/table.py +0 -0
- {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/docx/text/__init__.py +0 -0
- {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/docx/text/paragraph.py +0 -0
- {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/docx/text/run.py +0 -0
- {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/docx/typing.py +0 -0
- {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/scripts/publish.sh +0 -0
- {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/tests/__init__.py +0 -0
- {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/tests/conftest.py +0 -0
- {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/tests/test_commands.py +0 -0
- {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/tests/test_python_docx_api_parity.py +0 -0
- {athena_python_docx-0.1.5 → athena_python_docx-0.1.6}/tests/test_smoke_integration.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: athena-python-docx
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.6
|
|
4
4
|
Summary: Drop-in replacement for python-docx that connects to Athena's Superdoc/Keryx collaborative document stack
|
|
5
5
|
Project-URL: Homepage, https://athenaintelligence.ai
|
|
6
6
|
Author-email: Athena Intelligence <engineering@athenaintelligence.ai>
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "athena-python-docx"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.6"
|
|
8
8
|
description = "Drop-in replacement for python-docx that connects to Athena's Superdoc/Keryx collaborative document stack"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "MIT"
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# Fidelity tests
|
|
2
|
+
|
|
3
|
+
End-to-end comparison of `athena-python-docx` against stock `python-docx`.
|
|
4
|
+
For each test case (a small script using the python-docx API), the runner:
|
|
5
|
+
|
|
6
|
+
1. Executes the script via **stock `python-docx`** (installed to an
|
|
7
|
+
isolated site-packages at `/tmp/docx-fidelity-stock-site-packages`)
|
|
8
|
+
and saves the resulting `.docx` to `/tmp/docx-fidelity-artifacts/`.
|
|
9
|
+
2. Executes the same script via **`athena-python-docx`** inside a
|
|
10
|
+
Daytona sandbox (the `document-exec:v<N>` image). The resulting Y.Doc
|
|
11
|
+
is exported to `.docx` by Superdoc and downloaded back to
|
|
12
|
+
`/tmp/docx-fidelity-artifacts/`.
|
|
13
|
+
3. Reads both `.docx` files back with stock `python-docx` and extracts a
|
|
14
|
+
normalized feature set (paragraph text + style, runs + bold/italic/
|
|
15
|
+
underline/color, table row/col counts + cell texts).
|
|
16
|
+
4. Compares the feature sets and prints a scorecard.
|
|
17
|
+
|
|
18
|
+
Stylistic differences that stem from the two libraries' different default
|
|
19
|
+
templates (font, margins, theme colors) are intentionally ignored — the
|
|
20
|
+
focus is on the content-level operations an agent actually cares about.
|
|
21
|
+
|
|
22
|
+
## Running
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
cd agora && doppler run --project agora --config dev -- \
|
|
26
|
+
uv run python \
|
|
27
|
+
"$(git rev-parse --show-toplevel)/docx-studio/python-sdk/tests/fidelity/runner.py"
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Configuration via env vars:
|
|
31
|
+
|
|
32
|
+
| Var | Default | Purpose |
|
|
33
|
+
|-----|---------|---------|
|
|
34
|
+
| `ATHENA_DOCX_FIDELITY_ASSET_ID` | `asset_de593a96-…-2b16ad` (existing staging test doc) | SuperDoc asset to write against — contents are cleared via `doc.clear_content()` between cases |
|
|
35
|
+
| `DAYTONA_DOCUMENT_EXEC_SNAPSHOT` | `document-exec:v1` (from agora settings) | Snapshot to spin up |
|
|
36
|
+
|
|
37
|
+
## Interpreting results
|
|
38
|
+
|
|
39
|
+
| Status | Meaning |
|
|
40
|
+
|---|---|
|
|
41
|
+
| `✅ OK` | Athena script succeeded and content matches stock python-docx |
|
|
42
|
+
| `⚠️ DIFF` | Athena script succeeded but content differs — see listed diffs |
|
|
43
|
+
| `🟡 STUB_HIT` | Case hit an expected `NotImplementedError` stub (a known Phase-2 gap) |
|
|
44
|
+
| `🟠 UNEXPECTED_PASS` | Case was expected to raise a stub error, but didn't — probably means the Phase-2 gap has been filled; re-classify the case |
|
|
45
|
+
| `🔴 WRONG_EXC` | Case raised, but the wrong exception type |
|
|
46
|
+
| `❌ SCRIPT_ERROR` | Athena side crashed with an unexpected exception |
|
|
47
|
+
| `❌ SETUP_ERROR` | Pre-flight `doc.clear_content()` failed |
|
|
48
|
+
| `❌ EXPORT_ERROR` | Superdoc couldn't save the Y.Doc to `.docx` |
|
|
49
|
+
| `❌ DOWNLOAD_ERROR` | `fs.download_file` on the Daytona sandbox failed |
|
|
50
|
+
|
|
51
|
+
## Adding cases
|
|
52
|
+
|
|
53
|
+
Edit `cases.py` and add a `Case(...)` entry. Keep scripts minimal (one
|
|
54
|
+
surface per case where possible). If the operation is a known Phase-2
|
|
55
|
+
stub, set `expected_athena_exc="NotImplementedError"` so the case is
|
|
56
|
+
scored as `STUB_HIT` rather than as a failure.
|
|
57
|
+
|
|
58
|
+
## Known caveats
|
|
59
|
+
|
|
60
|
+
- The runner shares **one** SuperDoc asset across all cases, clearing
|
|
61
|
+
content between each via `doc.clear_content()`. If `clear_content`
|
|
62
|
+
ever regresses, every case after the first will be corrupted. The
|
|
63
|
+
runner reports that as `SETUP_ERROR`.
|
|
64
|
+
- Stock python-docx's default template pads the document with a trailing
|
|
65
|
+
empty paragraph. The comparator filters empty paragraphs before
|
|
66
|
+
counting, but paragraph indices in diff output are 0-based on the
|
|
67
|
+
non-empty subset only.
|
|
68
|
+
- `skip_content_diff=True` on a case tells the runner to only check that
|
|
69
|
+
the athena side didn't raise; useful for structural ops
|
|
70
|
+
(`add_page_break`) where the exported OOXML legitimately differs from
|
|
71
|
+
the stock output.
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Fidelity tests for athena-python-docx vs stock python-docx.
|
|
2
|
+
|
|
3
|
+
Each test case is a small script that uses the python-docx `docx` namespace.
|
|
4
|
+
The same script runs against:
|
|
5
|
+
- stock `python-docx` (in an isolated venv) -> produces .docx A
|
|
6
|
+
- `athena-python-docx` via Daytona + Superdoc -> produces .docx B
|
|
7
|
+
|
|
8
|
+
Both .docx files are then read back through stock python-docx for feature
|
|
9
|
+
extraction, and the extracted structures are compared. Differences that
|
|
10
|
+
matter (missing text, wrong style, different cell count) are scored as
|
|
11
|
+
FAIL; stylistic differences (default font, theme color) are tolerated.
|
|
12
|
+
|
|
13
|
+
See `README.md` for running.
|
|
14
|
+
"""
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
"""Fidelity test cases.
|
|
2
|
+
|
|
3
|
+
Each case is a snippet that operates on a pre-bound ``doc`` (the Document
|
|
4
|
+
instance). The same snippet runs against stock python-docx and against
|
|
5
|
+
athena-python-docx via Daytona.
|
|
6
|
+
|
|
7
|
+
Conventions
|
|
8
|
+
-----------
|
|
9
|
+
- Use only the python-docx public API — no private `_element` access, no
|
|
10
|
+
imports beyond what's in the preamble.
|
|
11
|
+
- Keep cases minimal (one surface per case where possible) so a failure
|
|
12
|
+
pinpoints the SDK gap.
|
|
13
|
+
- Mark known-stubbed operations with ``expected_exc`` — the case passes
|
|
14
|
+
if the stub actually raises (regression detection).
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
from dataclasses import dataclass, field
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass(frozen=True)
|
|
23
|
+
class Case:
|
|
24
|
+
name: str
|
|
25
|
+
description: str
|
|
26
|
+
script: str
|
|
27
|
+
# When set, the athena-python-docx run is expected to raise this exception
|
|
28
|
+
# type. The local python-docx run is never expected to raise — if it does,
|
|
29
|
+
# the case is broken and reported as an infrastructure error.
|
|
30
|
+
expected_athena_exc: str | None = None
|
|
31
|
+
# If True, the athena side is not compared against stock — only asserted
|
|
32
|
+
# to succeed. Use when the op has no content-level effect worth diffing
|
|
33
|
+
# (e.g. add_page_break), or when the stock and Superdoc template diverge
|
|
34
|
+
# too much for a meaningful diff.
|
|
35
|
+
skip_content_diff: bool = False
|
|
36
|
+
# Optional tags for filtering.
|
|
37
|
+
tags: tuple[str, ...] = field(default_factory=tuple)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
CASES: list[Case] = [
|
|
41
|
+
# ---- Core append ops -----------------------------------------------------
|
|
42
|
+
Case(
|
|
43
|
+
name="add_paragraph_with_text",
|
|
44
|
+
description="One plain paragraph containing a short string.",
|
|
45
|
+
script='doc.add_paragraph("hello world")',
|
|
46
|
+
tags=("paragraph",),
|
|
47
|
+
),
|
|
48
|
+
Case(
|
|
49
|
+
name="add_paragraph_empty",
|
|
50
|
+
description="A paragraph with no text (docx.add_paragraph()).",
|
|
51
|
+
script="doc.add_paragraph()",
|
|
52
|
+
tags=("paragraph",),
|
|
53
|
+
),
|
|
54
|
+
Case(
|
|
55
|
+
name="add_heading_level_1",
|
|
56
|
+
description="Heading 1 with text.",
|
|
57
|
+
script='doc.add_heading("H1 heading", level=1)',
|
|
58
|
+
tags=("heading",),
|
|
59
|
+
),
|
|
60
|
+
Case(
|
|
61
|
+
name="add_heading_level_2",
|
|
62
|
+
description="Heading 2 with text.",
|
|
63
|
+
script='doc.add_heading("H2 heading", level=2)',
|
|
64
|
+
tags=("heading",),
|
|
65
|
+
),
|
|
66
|
+
Case(
|
|
67
|
+
name="add_heading_level_3",
|
|
68
|
+
description="Heading 3 with text.",
|
|
69
|
+
script='doc.add_heading("H3 heading", level=3)',
|
|
70
|
+
tags=("heading",),
|
|
71
|
+
),
|
|
72
|
+
# ---- Runs + inline formatting --------------------------------------------
|
|
73
|
+
Case(
|
|
74
|
+
name="add_run_bold_italic",
|
|
75
|
+
description="Paragraph with two runs, bold + italic.",
|
|
76
|
+
script=(
|
|
77
|
+
"p = doc.add_paragraph()\n"
|
|
78
|
+
'r1 = p.add_run("bold run")\n'
|
|
79
|
+
"r1.bold = True\n"
|
|
80
|
+
'r2 = p.add_run("italic run")\n'
|
|
81
|
+
"r2.italic = True"
|
|
82
|
+
),
|
|
83
|
+
tags=("run", "formatting"),
|
|
84
|
+
),
|
|
85
|
+
Case(
|
|
86
|
+
name="add_run_colored",
|
|
87
|
+
description="Paragraph with a run colored green via RGBColor.",
|
|
88
|
+
script=(
|
|
89
|
+
"from docx.shared import RGBColor\n"
|
|
90
|
+
"p = doc.add_paragraph()\n"
|
|
91
|
+
'r = p.add_run("green text")\n'
|
|
92
|
+
"r.font.color.rgb = RGBColor(0x00, 0x66, 0x00)"
|
|
93
|
+
),
|
|
94
|
+
tags=("run", "formatting", "color"),
|
|
95
|
+
),
|
|
96
|
+
Case(
|
|
97
|
+
name="add_run_font_size",
|
|
98
|
+
description="Paragraph with a run set to 18pt.",
|
|
99
|
+
script=(
|
|
100
|
+
"from docx.shared import Pt\n"
|
|
101
|
+
"p = doc.add_paragraph()\n"
|
|
102
|
+
'r = p.add_run("sized 18pt")\n'
|
|
103
|
+
"r.font.size = Pt(18)"
|
|
104
|
+
),
|
|
105
|
+
tags=("run", "formatting", "font"),
|
|
106
|
+
),
|
|
107
|
+
# ---- Tables --------------------------------------------------------------
|
|
108
|
+
Case(
|
|
109
|
+
name="add_table_2x2_empty",
|
|
110
|
+
description="Create an empty 2x2 table; verify row/col count.",
|
|
111
|
+
script="doc.add_table(rows=2, cols=2)",
|
|
112
|
+
tags=("table",),
|
|
113
|
+
),
|
|
114
|
+
Case(
|
|
115
|
+
name="add_table_3x3_with_style",
|
|
116
|
+
description="Create a 3x3 TableGrid table.",
|
|
117
|
+
script='doc.add_table(rows=3, cols=3, style="TableGrid")',
|
|
118
|
+
tags=("table", "style"),
|
|
119
|
+
),
|
|
120
|
+
Case(
|
|
121
|
+
name="cell_text_setter",
|
|
122
|
+
description="Set cell(0,0).text on a 2x2 table — known NotImplementedError in 0.1.5.",
|
|
123
|
+
script=(
|
|
124
|
+
"t = doc.add_table(rows=2, cols=2)\n"
|
|
125
|
+
't.cell(0, 0).text = "A1"'
|
|
126
|
+
),
|
|
127
|
+
expected_athena_exc="NotImplementedError",
|
|
128
|
+
tags=("table", "stub"),
|
|
129
|
+
),
|
|
130
|
+
# ---- Structural ops ------------------------------------------------------
|
|
131
|
+
Case(
|
|
132
|
+
name="add_page_break",
|
|
133
|
+
description="Append a page break; content-diff skipped (template differs).",
|
|
134
|
+
script="doc.add_page_break()",
|
|
135
|
+
skip_content_diff=True,
|
|
136
|
+
tags=("structural",),
|
|
137
|
+
),
|
|
138
|
+
# ---- Multi-op combo ------------------------------------------------------
|
|
139
|
+
Case(
|
|
140
|
+
name="combo_heading_paragraph_table",
|
|
141
|
+
description="Heading + paragraph with formatted runs + 2x2 table (no cell text).",
|
|
142
|
+
script=(
|
|
143
|
+
"from docx.shared import RGBColor\n"
|
|
144
|
+
'doc.add_heading("Report", level=1)\n'
|
|
145
|
+
"p = doc.add_paragraph()\n"
|
|
146
|
+
'r1 = p.add_run("Revenue grew ")\n'
|
|
147
|
+
'r2 = p.add_run("12%")\n'
|
|
148
|
+
"r2.bold = True\n"
|
|
149
|
+
"r2.font.color.rgb = RGBColor(0x00, 0x66, 0x00)\n"
|
|
150
|
+
'p.add_run(" YoY.")\n'
|
|
151
|
+
'doc.add_table(rows=2, cols=2, style="TableGrid")'
|
|
152
|
+
),
|
|
153
|
+
tags=("combo",),
|
|
154
|
+
),
|
|
155
|
+
]
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
"""Feature extractor — reads a .docx (produced by either SDK) with stock
|
|
2
|
+
python-docx and returns a normalized structure for comparison.
|
|
3
|
+
|
|
4
|
+
The extractor focuses on features that SHOULD round-trip regardless of
|
|
5
|
+
template differences between stock python-docx and Superdoc:
|
|
6
|
+
- paragraph text content and style name
|
|
7
|
+
- runs within each paragraph: text + bold + italic + underline + color
|
|
8
|
+
- table row/col counts + cell texts
|
|
9
|
+
|
|
10
|
+
It does NOT try to compare defaults, fonts, margins, or theme colors —
|
|
11
|
+
those legitimately differ between a blank python-docx template and the
|
|
12
|
+
Superdoc SuperDoc-default template, and would produce false positives.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import importlib
|
|
18
|
+
import sys
|
|
19
|
+
import types
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import TypedDict
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# ── isolated python-docx loader ────────────────────────────────────────
|
|
25
|
+
# We run this extractor in a process that has the athena-python-docx
|
|
26
|
+
# `docx` namespace installed. To read real .docx files we need STOCK
|
|
27
|
+
# python-docx. Load it from an isolated path and return a module handle.
|
|
28
|
+
|
|
29
|
+
_STOCK_CACHE: types.ModuleType | None = None
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _load_stock_python_docx(stock_site_packages: str) -> types.ModuleType:
|
|
33
|
+
"""Import the stock python-docx module from an isolated site-packages.
|
|
34
|
+
|
|
35
|
+
Mirrors the pattern from pptx-studio's
|
|
36
|
+
test_python_pptx_api_parity.py::_load_standard_module.
|
|
37
|
+
"""
|
|
38
|
+
global _STOCK_CACHE
|
|
39
|
+
if _STOCK_CACHE is not None:
|
|
40
|
+
return _STOCK_CACHE
|
|
41
|
+
|
|
42
|
+
original_path = sys.path.copy()
|
|
43
|
+
original_modules = {k: v for k, v in sys.modules.items() if k.startswith("docx")}
|
|
44
|
+
for key in list(sys.modules):
|
|
45
|
+
if key.startswith("docx"):
|
|
46
|
+
del sys.modules[key]
|
|
47
|
+
|
|
48
|
+
sys.path.insert(0, stock_site_packages)
|
|
49
|
+
try:
|
|
50
|
+
mod = importlib.import_module("docx")
|
|
51
|
+
_STOCK_CACHE = mod
|
|
52
|
+
return mod
|
|
53
|
+
finally:
|
|
54
|
+
sys.path[:] = original_path
|
|
55
|
+
# Restore our own `docx.*` modules so later imports of the Athena
|
|
56
|
+
# SDK work. (The cached stock module stays importable as long as
|
|
57
|
+
# _STOCK_CACHE holds a reference.)
|
|
58
|
+
for key in list(sys.modules):
|
|
59
|
+
if key.startswith("docx"):
|
|
60
|
+
del sys.modules[key]
|
|
61
|
+
sys.modules.update(original_modules)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
# ── feature types ──────────────────────────────────────────────────────
|
|
65
|
+
|
|
66
|
+
class RunFeature(TypedDict, total=False):
|
|
67
|
+
text: str
|
|
68
|
+
bold: bool
|
|
69
|
+
italic: bool
|
|
70
|
+
underline: bool
|
|
71
|
+
color: str | None # hex string, e.g. "006600", or None
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class ParaFeature(TypedDict, total=False):
|
|
75
|
+
text: str
|
|
76
|
+
style: str
|
|
77
|
+
runs: list[RunFeature]
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class CellFeature(TypedDict):
|
|
81
|
+
text: str
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class TableFeature(TypedDict):
|
|
85
|
+
rows: int
|
|
86
|
+
cols: int
|
|
87
|
+
cells: list[list[CellFeature]] # rows × cols
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class DocxFeatures(TypedDict):
|
|
91
|
+
paragraphs: list[ParaFeature]
|
|
92
|
+
tables: list[TableFeature]
|
|
93
|
+
paragraph_count: int
|
|
94
|
+
table_count: int
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
# ── the extractor ──────────────────────────────────────────────────────
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _ensure_styles_with_effects(docx_path: Path) -> Path:
|
|
101
|
+
"""Patch a .docx to contain a stub `word/stylesWithEffects.xml`.
|
|
102
|
+
|
|
103
|
+
New python-docx (≥0.8.10) requires that part when `word/_rels/
|
|
104
|
+
document.xml.rels` references it (older Office template legacy).
|
|
105
|
+
Superdoc's export doesn't include it, which makes python-docx raise
|
|
106
|
+
"There is no item named 'word/stylesWithEffects.xml' in the archive"
|
|
107
|
+
the first time a style is dereferenced.
|
|
108
|
+
|
|
109
|
+
We add a minimal valid-but-empty styles document. Mutation happens in
|
|
110
|
+
a sibling tmpfile so the original export stays intact for inspection.
|
|
111
|
+
"""
|
|
112
|
+
import shutil
|
|
113
|
+
import zipfile
|
|
114
|
+
|
|
115
|
+
import tempfile
|
|
116
|
+
with zipfile.ZipFile(docx_path, "r") as z:
|
|
117
|
+
names = set(z.namelist())
|
|
118
|
+
if "word/stylesWithEffects.xml" in names:
|
|
119
|
+
return docx_path
|
|
120
|
+
patched = Path(tempfile.mkstemp(prefix="patched_", suffix=".docx")[1])
|
|
121
|
+
shutil.copy2(docx_path, patched)
|
|
122
|
+
stub = (
|
|
123
|
+
b'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
|
|
124
|
+
b'<w:styles xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">'
|
|
125
|
+
b'</w:styles>'
|
|
126
|
+
)
|
|
127
|
+
with zipfile.ZipFile(patched, "a") as z:
|
|
128
|
+
if "word/stylesWithEffects.xml" not in set(z.namelist()):
|
|
129
|
+
z.writestr("word/stylesWithEffects.xml", stub)
|
|
130
|
+
return patched
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def extract_features(docx_path: Path, stock_site_packages: str) -> DocxFeatures:
|
|
134
|
+
"""Extract normalized features from a .docx file."""
|
|
135
|
+
docx_path = _ensure_styles_with_effects(docx_path)
|
|
136
|
+
docx_mod = _load_stock_python_docx(stock_site_packages)
|
|
137
|
+
# Open the file. We re-import via the isolated stock docx to avoid
|
|
138
|
+
# confusion with our SDK's `docx` namespace.
|
|
139
|
+
orig_sys_path = sys.path.copy()
|
|
140
|
+
orig_mods = {k: v for k, v in sys.modules.items() if k.startswith("docx")}
|
|
141
|
+
for key in list(sys.modules):
|
|
142
|
+
if key.startswith("docx"):
|
|
143
|
+
del sys.modules[key]
|
|
144
|
+
sys.path.insert(0, stock_site_packages)
|
|
145
|
+
try:
|
|
146
|
+
docx_mod = importlib.import_module("docx")
|
|
147
|
+
stock_Document = docx_mod.Document # type: ignore[attr-defined]
|
|
148
|
+
|
|
149
|
+
doc = stock_Document(str(docx_path))
|
|
150
|
+
|
|
151
|
+
paragraphs: list[ParaFeature] = []
|
|
152
|
+
for p in doc.paragraphs:
|
|
153
|
+
runs: list[RunFeature] = []
|
|
154
|
+
for r in p.runs:
|
|
155
|
+
rf: RunFeature = {
|
|
156
|
+
"text": r.text,
|
|
157
|
+
"bold": bool(r.bold) if r.bold is not None else False,
|
|
158
|
+
"italic": bool(r.italic) if r.italic is not None else False,
|
|
159
|
+
"underline": bool(r.underline) if r.underline is not None else False,
|
|
160
|
+
"color": None,
|
|
161
|
+
}
|
|
162
|
+
try:
|
|
163
|
+
rgb = r.font.color.rgb
|
|
164
|
+
if rgb is not None:
|
|
165
|
+
rf["color"] = str(rgb)
|
|
166
|
+
except Exception:
|
|
167
|
+
pass
|
|
168
|
+
runs.append(rf)
|
|
169
|
+
style_name = ""
|
|
170
|
+
try:
|
|
171
|
+
style_name = p.style.name if p.style is not None else ""
|
|
172
|
+
except Exception:
|
|
173
|
+
style_name = ""
|
|
174
|
+
paragraphs.append({
|
|
175
|
+
"text": p.text,
|
|
176
|
+
"style": style_name,
|
|
177
|
+
"runs": runs,
|
|
178
|
+
})
|
|
179
|
+
|
|
180
|
+
tables: list[TableFeature] = []
|
|
181
|
+
for t in doc.tables:
|
|
182
|
+
rows = len(t.rows)
|
|
183
|
+
cols = len(t.rows[0].cells) if rows > 0 else 0
|
|
184
|
+
cells: list[list[CellFeature]] = []
|
|
185
|
+
for row in t.rows:
|
|
186
|
+
cells.append([{"text": c.text} for c in row.cells])
|
|
187
|
+
tables.append({
|
|
188
|
+
"rows": rows,
|
|
189
|
+
"cols": cols,
|
|
190
|
+
"cells": cells,
|
|
191
|
+
})
|
|
192
|
+
|
|
193
|
+
return {
|
|
194
|
+
"paragraphs": paragraphs,
|
|
195
|
+
"tables": tables,
|
|
196
|
+
"paragraph_count": len(paragraphs),
|
|
197
|
+
"table_count": len(tables),
|
|
198
|
+
}
|
|
199
|
+
finally:
|
|
200
|
+
sys.path[:] = orig_sys_path
|
|
201
|
+
for key in list(sys.modules):
|
|
202
|
+
if key.startswith("docx"):
|
|
203
|
+
del sys.modules[key]
|
|
204
|
+
sys.modules.update(orig_mods)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
# ── comparator ─────────────────────────────────────────────────────────
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def compare(stock: DocxFeatures, athena: DocxFeatures) -> list[str]:
|
|
211
|
+
"""Return a list of semantic differences. Empty list = pass."""
|
|
212
|
+
diffs: list[str] = []
|
|
213
|
+
|
|
214
|
+
# Paragraph count. Superdoc's Word-default template often inserts an
|
|
215
|
+
# extra empty paragraph at the end (no text, no runs). We tolerate
|
|
216
|
+
# a +/- 1 gap as long as the non-empty content matches.
|
|
217
|
+
stock_nonempty = [p for p in stock["paragraphs"] if p["text"] or p["runs"]]
|
|
218
|
+
athena_nonempty = [p for p in athena["paragraphs"] if p["text"] or p["runs"]]
|
|
219
|
+
if len(stock_nonempty) != len(athena_nonempty):
|
|
220
|
+
diffs.append(
|
|
221
|
+
f"non-empty paragraph count: stock={len(stock_nonempty)} "
|
|
222
|
+
f"athena={len(athena_nonempty)}",
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
for i, (s, a) in enumerate(zip(stock_nonempty, athena_nonempty)):
|
|
226
|
+
if s["text"] != a["text"]:
|
|
227
|
+
diffs.append(
|
|
228
|
+
f"paragraph[{i}] text: stock={s['text']!r} athena={a['text']!r}",
|
|
229
|
+
)
|
|
230
|
+
# Style name: stock uses "Heading 1" style names; Superdoc may
|
|
231
|
+
# use "Heading1" (no space) or the same. Normalize whitespace.
|
|
232
|
+
s_style = (s.get("style") or "").replace(" ", "").lower()
|
|
233
|
+
a_style = (a.get("style") or "").replace(" ", "").lower()
|
|
234
|
+
if s_style != a_style:
|
|
235
|
+
diffs.append(
|
|
236
|
+
f"paragraph[{i}] style: stock={s['style']!r} athena={a['style']!r}",
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
# Table structure
|
|
240
|
+
if stock["table_count"] != athena["table_count"]:
|
|
241
|
+
diffs.append(
|
|
242
|
+
f"table count: stock={stock['table_count']} athena={athena['table_count']}",
|
|
243
|
+
)
|
|
244
|
+
for i, (st, at) in enumerate(zip(stock["tables"], athena["tables"])):
|
|
245
|
+
if st["rows"] != at["rows"] or st["cols"] != at["cols"]:
|
|
246
|
+
diffs.append(
|
|
247
|
+
f"table[{i}] shape: stock={st['rows']}x{st['cols']} "
|
|
248
|
+
f"athena={at['rows']}x{at['cols']}",
|
|
249
|
+
)
|
|
250
|
+
# Cell content
|
|
251
|
+
for r, (s_row, a_row) in enumerate(zip(st["cells"], at["cells"])):
|
|
252
|
+
for c, (s_cell, a_cell) in enumerate(zip(s_row, a_row)):
|
|
253
|
+
if s_cell["text"] != a_cell["text"]:
|
|
254
|
+
diffs.append(
|
|
255
|
+
f"table[{i}].cell({r},{c}).text: "
|
|
256
|
+
f"stock={s_cell['text']!r} athena={a_cell['text']!r}",
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
return diffs
|
|
@@ -0,0 +1,453 @@
|
|
|
1
|
+
"""Fidelity runner — orchestrates stock python-docx + athena-python-docx
|
|
2
|
+
across all cases in ``cases.CASES`` and prints a scorecard.
|
|
3
|
+
|
|
4
|
+
Usage
|
|
5
|
+
-----
|
|
6
|
+
cd agora && doppler run --project agora --config dev -- \
|
|
7
|
+
uv run python ../docx-studio/python-sdk/tests/fidelity/runner.py
|
|
8
|
+
|
|
9
|
+
The runner needs:
|
|
10
|
+
- a Daytona API key (from agora settings) — used to spin up a sandbox
|
|
11
|
+
and execute the athena-python-docx side
|
|
12
|
+
- a Keryx private key + WS URL (from agora settings) — to sign a token
|
|
13
|
+
for the sandbox
|
|
14
|
+
- a reusable SuperDoc asset to write against (default asset on staging
|
|
15
|
+
is hard-coded; override with ATHENA_DOCX_FIDELITY_ASSET_ID)
|
|
16
|
+
- stock python-docx installed somewhere (we install it on-demand into
|
|
17
|
+
``/tmp/docx-fidelity-stock-site-packages`` if missing)
|
|
18
|
+
|
|
19
|
+
Output is a table listing each case with status (PASS / FAIL / STUB-HIT /
|
|
20
|
+
ERROR), plus any diffs.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import asyncio
|
|
26
|
+
import json
|
|
27
|
+
import os
|
|
28
|
+
import subprocess
|
|
29
|
+
import sys
|
|
30
|
+
import time
|
|
31
|
+
from pathlib import Path
|
|
32
|
+
from typing import Any
|
|
33
|
+
|
|
34
|
+
# Allow running this file as a script: add the sdk root so "docx.*" and
|
|
35
|
+
# the tests/fidelity package both resolve.
|
|
36
|
+
_SDK_ROOT = Path(__file__).resolve().parents[2]
|
|
37
|
+
sys.path.insert(0, str(_SDK_ROOT))
|
|
38
|
+
|
|
39
|
+
from tests.fidelity.cases import CASES, Case # noqa: E402
|
|
40
|
+
from tests.fidelity.extract import compare, extract_features # noqa: E402
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# ── configuration ─────────────────────────────────────────────────────
|
|
44
|
+
|
|
45
|
+
DEFAULT_ASSET_ID = os.environ.get(
|
|
46
|
+
"ATHENA_DOCX_FIDELITY_ASSET_ID",
|
|
47
|
+
"asset_de593a96-3031-45ea-bc52-60cb4b2b16ad", # "PR18540 SuperDoc Test"
|
|
48
|
+
)
|
|
49
|
+
STOCK_SITE_PACKAGES = Path("/tmp/docx-fidelity-stock-site-packages")
|
|
50
|
+
ARTIFACT_DIR = Path("/tmp/docx-fidelity-artifacts")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# ── stock python-docx setup ───────────────────────────────────────────
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def ensure_stock_python_docx() -> Path:
|
|
57
|
+
"""Install stock python-docx into an isolated directory if not present."""
|
|
58
|
+
marker = STOCK_SITE_PACKAGES / "docx" / "__init__.py"
|
|
59
|
+
if marker.exists():
|
|
60
|
+
return STOCK_SITE_PACKAGES
|
|
61
|
+
print(f"[fidelity] installing stock python-docx -> {STOCK_SITE_PACKAGES}")
|
|
62
|
+
STOCK_SITE_PACKAGES.mkdir(parents=True, exist_ok=True)
|
|
63
|
+
# Use `uv pip install --target` — agora's .venv is uv-managed and
|
|
64
|
+
# doesn't include stdlib `pip`. Fall back to `python -m pip` only if
|
|
65
|
+
# `uv` isn't on PATH (shouldn't happen inside `doppler run -- uv run`).
|
|
66
|
+
uv = _which("uv")
|
|
67
|
+
if uv:
|
|
68
|
+
cmd = [uv, "pip", "install", "--quiet", "--target", str(STOCK_SITE_PACKAGES), "python-docx>=1.1"]
|
|
69
|
+
else:
|
|
70
|
+
cmd = [sys.executable, "-m", "pip", "install", "--quiet", "--target", str(STOCK_SITE_PACKAGES), "python-docx>=1.1"]
|
|
71
|
+
subprocess.run(cmd, check=True)
|
|
72
|
+
return STOCK_SITE_PACKAGES
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _which(name: str) -> str | None:
|
|
76
|
+
import shutil
|
|
77
|
+
return shutil.which(name)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def run_stock_script(script: str, out_path: Path) -> None:
|
|
81
|
+
"""Run a script with stock python-docx, writing output to `out_path`.
|
|
82
|
+
|
|
83
|
+
The subprocess is invoked with PYTHONPATH pointed at our isolated
|
|
84
|
+
stock site-packages so we don't collide with our SDK's `docx` ns.
|
|
85
|
+
"""
|
|
86
|
+
stock = ensure_stock_python_docx()
|
|
87
|
+
wrapped = (
|
|
88
|
+
"import sys\n"
|
|
89
|
+
f"sys.path.insert(0, {str(stock)!r})\n"
|
|
90
|
+
"from docx import Document\n"
|
|
91
|
+
"doc = Document()\n"
|
|
92
|
+
f"{script}\n"
|
|
93
|
+
f"doc.save({str(out_path)!r})\n"
|
|
94
|
+
)
|
|
95
|
+
result = subprocess.run(
|
|
96
|
+
[sys.executable, "-c", wrapped],
|
|
97
|
+
capture_output=True,
|
|
98
|
+
text=True,
|
|
99
|
+
timeout=30,
|
|
100
|
+
)
|
|
101
|
+
if result.returncode != 0:
|
|
102
|
+
raise RuntimeError(
|
|
103
|
+
f"stock python-docx subprocess failed:\n"
|
|
104
|
+
f" stdout: {result.stdout}\n"
|
|
105
|
+
f" stderr: {result.stderr}",
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
# ── athena-python-docx via Daytona ────────────────────────────────────
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def build_sandbox_script(cases_batch: list[Case], asset_id: str) -> str:
|
|
113
|
+
"""Return a Python program to run inside the Daytona sandbox.
|
|
114
|
+
|
|
115
|
+
It iterates cases, for each:
|
|
116
|
+
- clears the doc via superdoc-sdk.doc.clear_content()
|
|
117
|
+
- executes the case's script with `doc` pre-bound
|
|
118
|
+
- exports the resulting .docx to /tmp/fidelity_out/<case_name>.docx
|
|
119
|
+
- records success/exception into /tmp/fidelity_results.json
|
|
120
|
+
"""
|
|
121
|
+
cases_json = json.dumps(
|
|
122
|
+
[{"name": c.name, "script": c.script, "expected_exc": c.expected_athena_exc} for c in cases_batch],
|
|
123
|
+
)
|
|
124
|
+
return f'''
|
|
125
|
+
import asyncio
|
|
126
|
+
import json
|
|
127
|
+
import os
|
|
128
|
+
import traceback
|
|
129
|
+
from pathlib import Path
|
|
130
|
+
|
|
131
|
+
OUT_DIR = Path("/tmp/fidelity_out")
|
|
132
|
+
OUT_DIR.mkdir(parents=True, exist_ok=True)
|
|
133
|
+
ASSET_ID = {asset_id!r}
|
|
134
|
+
CASES = json.loads({cases_json!r})
|
|
135
|
+
|
|
136
|
+
from superdoc import AsyncSuperDocClient
|
|
137
|
+
from superdoc.generated.client import DocOpenParams, DocOpenParamsCollaborationVariant1
|
|
138
|
+
|
|
139
|
+
async def with_session(callback):
|
|
140
|
+
token = os.environ["SUPERDOC_COLLAB_TOKEN"]
|
|
141
|
+
ws_url = os.environ["KERYX_WS_URL"]
|
|
142
|
+
ws_id = os.environ["ATHENA_WORKSPACE_ID"]
|
|
143
|
+
client = AsyncSuperDocClient(
|
|
144
|
+
user={{"name":"fidelity-runner","email":"fidelity@athenaintel.com"}},
|
|
145
|
+
env={{"SUPERDOC_COLLAB_TOKEN": token}},
|
|
146
|
+
request_timeout_ms=30_000,
|
|
147
|
+
watchdog_timeout_ms=60_000,
|
|
148
|
+
)
|
|
149
|
+
try:
|
|
150
|
+
collab: DocOpenParamsCollaborationVariant1 = {{
|
|
151
|
+
"url": f"{{ws_url}}/ws/{{ws_id}}",
|
|
152
|
+
"documentId": ASSET_ID,
|
|
153
|
+
"tokenEnv": "SUPERDOC_COLLAB_TOKEN",
|
|
154
|
+
"providerType": "y-websocket",
|
|
155
|
+
}}
|
|
156
|
+
h = await client.open({{"collaboration": collab}})
|
|
157
|
+
try:
|
|
158
|
+
return await callback(h)
|
|
159
|
+
finally:
|
|
160
|
+
await asyncio.sleep(0.5)
|
|
161
|
+
await h.close({{"discard": True}})
|
|
162
|
+
finally:
|
|
163
|
+
await client.dispose()
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
async def clear_asset():
|
|
167
|
+
async def _clear(h):
|
|
168
|
+
try:
|
|
169
|
+
await h.clear_content({{}})
|
|
170
|
+
except Exception as e:
|
|
171
|
+
# If "already empty" fine
|
|
172
|
+
if "already empty" not in str(e).lower():
|
|
173
|
+
raise
|
|
174
|
+
await with_session(_clear)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
async def export_to(path):
|
|
178
|
+
async def _export(h):
|
|
179
|
+
await h.save({{"out": str(path)}})
|
|
180
|
+
await with_session(_export)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def run_case_locally(script):
|
|
184
|
+
"""Run script against athena-python-docx via sync facade."""
|
|
185
|
+
from docx import Document
|
|
186
|
+
with Document(ASSET_ID) as doc:
|
|
187
|
+
# RGBColor and Pt are intentionally not pre-bound — each script that
|
|
188
|
+
# needs them imports them explicitly (matching how agents write code).
|
|
189
|
+
from docx.shared import RGBColor, Pt
|
|
190
|
+
local_ns = dict(doc=doc, RGBColor=RGBColor, Pt=Pt)
|
|
191
|
+
exec(compile(script, "<case>", "exec"), local_ns, local_ns)
|
|
192
|
+
doc.save()
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
results = []
|
|
196
|
+
for case in CASES:
|
|
197
|
+
name = case["name"]
|
|
198
|
+
script = case["script"]
|
|
199
|
+
expected = case["expected_exc"]
|
|
200
|
+
|
|
201
|
+
# Step 1: clear the asset
|
|
202
|
+
try:
|
|
203
|
+
asyncio.run(clear_asset())
|
|
204
|
+
except Exception as e:
|
|
205
|
+
results.append({{
|
|
206
|
+
"name": name,
|
|
207
|
+
"status": "SETUP_ERROR",
|
|
208
|
+
"detail": f"clear_content failed: {{e!r}}",
|
|
209
|
+
}})
|
|
210
|
+
continue
|
|
211
|
+
|
|
212
|
+
# Step 2: run the case's script via athena-python-docx
|
|
213
|
+
script_error = None
|
|
214
|
+
try:
|
|
215
|
+
run_case_locally(script)
|
|
216
|
+
except Exception as e:
|
|
217
|
+
script_error = (type(e).__name__, str(e)[:400])
|
|
218
|
+
|
|
219
|
+
# Step 3: compare error vs expectation
|
|
220
|
+
if expected is not None:
|
|
221
|
+
if script_error is not None and script_error[0] == expected:
|
|
222
|
+
results.append({{"name": name, "status": "STUB_HIT", "detail": script_error[1][:200]}})
|
|
223
|
+
continue
|
|
224
|
+
if script_error is None:
|
|
225
|
+
results.append({{"name": name, "status": "UNEXPECTED_PASS", "detail": f"expected {{expected}} but succeeded"}})
|
|
226
|
+
continue
|
|
227
|
+
results.append({{
|
|
228
|
+
"name": name,
|
|
229
|
+
"status": "WRONG_EXC",
|
|
230
|
+
"detail": f"expected {{expected}}, got {{script_error[0]}}: {{script_error[1]}}",
|
|
231
|
+
}})
|
|
232
|
+
continue
|
|
233
|
+
|
|
234
|
+
if script_error is not None:
|
|
235
|
+
results.append({{
|
|
236
|
+
"name": name,
|
|
237
|
+
"status": "SCRIPT_ERROR",
|
|
238
|
+
"detail": f"{{script_error[0]}}: {{script_error[1]}}",
|
|
239
|
+
}})
|
|
240
|
+
continue
|
|
241
|
+
|
|
242
|
+
# Step 4: export and record artifact
|
|
243
|
+
out_path = OUT_DIR / f"{{name}}.docx"
|
|
244
|
+
try:
|
|
245
|
+
asyncio.run(export_to(out_path))
|
|
246
|
+
except Exception as e:
|
|
247
|
+
results.append({{"name": name, "status": "EXPORT_ERROR", "detail": f"{{type(e).__name__}}: {{e}}"}})
|
|
248
|
+
continue
|
|
249
|
+
|
|
250
|
+
size = out_path.stat().st_size if out_path.exists() else 0
|
|
251
|
+
results.append({{"name": name, "status": "OK", "detail": f"exported {{size}} bytes", "export_path": str(out_path)}})
|
|
252
|
+
|
|
253
|
+
Path("/tmp/fidelity_results.json").write_text(json.dumps(results, indent=2))
|
|
254
|
+
print(json.dumps(results, indent=2))
|
|
255
|
+
'''
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
async def run_daytona_batch(cases: list[Case], asset_id: str) -> tuple[list[dict], dict[str, bytes]]:
|
|
259
|
+
"""Spin up a sandbox, run all cases, download every exported .docx."""
|
|
260
|
+
from agora.services.keryx.client import get_keryx_client
|
|
261
|
+
from agora.services.auth.docx_auth import build_docx_collab_bundle # noqa: F401 ensures agora is loadable
|
|
262
|
+
from agora.settings import settings
|
|
263
|
+
from daytona_sdk import AsyncDaytona, CreateSandboxFromSnapshotParams, DaytonaConfig
|
|
264
|
+
|
|
265
|
+
routing_bundle = await _build_collab_bundle(asset_id)
|
|
266
|
+
kc = get_keryx_client()
|
|
267
|
+
# Fresh token for the sandbox.
|
|
268
|
+
token = kc._sign_token(user_id="fidelity-runner") # noqa: SLF001
|
|
269
|
+
|
|
270
|
+
daytona = AsyncDaytona(
|
|
271
|
+
config=DaytonaConfig(
|
|
272
|
+
api_key=settings.daytona_full_api_key,
|
|
273
|
+
api_url=settings.daytona_api_url,
|
|
274
|
+
organization_id=settings.daytona_organization_id,
|
|
275
|
+
target=settings.daytona_target,
|
|
276
|
+
),
|
|
277
|
+
)
|
|
278
|
+
snapshot = settings.daytona_document_exec_snapshot or "document-exec:v1"
|
|
279
|
+
print(f"[fidelity] daytona snapshot: {snapshot}")
|
|
280
|
+
|
|
281
|
+
sandbox = await daytona.create(
|
|
282
|
+
CreateSandboxFromSnapshotParams(
|
|
283
|
+
name=f"docx-fidelity-{int(time.time())}",
|
|
284
|
+
snapshot=snapshot,
|
|
285
|
+
env_vars={
|
|
286
|
+
"SUPERDOC_COLLAB_TOKEN": token,
|
|
287
|
+
"KERYX_WS_URL": settings.keryx_ws_url,
|
|
288
|
+
"ATHENA_WORKSPACE_ID": routing_bundle["ATHENA_WORKSPACE_ID"],
|
|
289
|
+
},
|
|
290
|
+
auto_stop_interval=30,
|
|
291
|
+
auto_archive_interval=60,
|
|
292
|
+
),
|
|
293
|
+
timeout=120,
|
|
294
|
+
)
|
|
295
|
+
print(f"[fidelity] sandbox {sandbox.id} started")
|
|
296
|
+
|
|
297
|
+
try:
|
|
298
|
+
script = build_sandbox_script(cases, asset_id)
|
|
299
|
+
await sandbox.fs.upload_file(script.encode(), "/tmp/fidelity_runner.py")
|
|
300
|
+
exec_result = await sandbox.process.exec(
|
|
301
|
+
"python3 /tmp/fidelity_runner.py",
|
|
302
|
+
timeout=600,
|
|
303
|
+
)
|
|
304
|
+
print(f"[fidelity] sandbox exit_code={exec_result.exit_code}")
|
|
305
|
+
# Load the results JSON
|
|
306
|
+
raw = await sandbox.fs.download_file("/tmp/fidelity_results.json")
|
|
307
|
+
results: list[dict] = json.loads(raw.decode())
|
|
308
|
+
|
|
309
|
+
# Download every successfully-exported .docx artifact
|
|
310
|
+
artifacts: dict[str, bytes] = {}
|
|
311
|
+
for r in results:
|
|
312
|
+
if r["status"] == "OK" and "export_path" in r:
|
|
313
|
+
try:
|
|
314
|
+
data = await sandbox.fs.download_file(r["export_path"])
|
|
315
|
+
artifacts[r["name"]] = data
|
|
316
|
+
except Exception as e:
|
|
317
|
+
r["status"] = "DOWNLOAD_ERROR"
|
|
318
|
+
r["detail"] = f"fs.download_file: {e}"
|
|
319
|
+
return results, artifacts
|
|
320
|
+
finally:
|
|
321
|
+
try:
|
|
322
|
+
await sandbox.delete()
|
|
323
|
+
except Exception as e:
|
|
324
|
+
print(f"[fidelity] sandbox delete failed: {e}")
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
async def _build_collab_bundle(asset_id: str) -> dict[str, str]:
|
|
328
|
+
from agora.services.auth.docx_auth import build_docx_collab_bundle
|
|
329
|
+
b = await build_docx_collab_bundle(asset_id=asset_id, user_id="fidelity-runner")
|
|
330
|
+
return dict(b)
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
# ── scorecard ─────────────────────────────────────────────────────────
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
def print_scorecard(
|
|
337
|
+
cases: list[Case],
|
|
338
|
+
daytona_results: list[dict],
|
|
339
|
+
diffs_by_name: dict[str, list[str]],
|
|
340
|
+
) -> int:
|
|
341
|
+
"""Print a scorecard. Return exit code (0 = all green)."""
|
|
342
|
+
print()
|
|
343
|
+
print("=" * 88)
|
|
344
|
+
print("FIDELITY SCORECARD — athena-python-docx 0.1.5 vs stock python-docx")
|
|
345
|
+
print("=" * 88)
|
|
346
|
+
|
|
347
|
+
by_name = {r["name"]: r for r in daytona_results}
|
|
348
|
+
status_icons = {
|
|
349
|
+
"OK": "✅",
|
|
350
|
+
"STUB_HIT": "🟡",
|
|
351
|
+
"UNEXPECTED_PASS": "🟠",
|
|
352
|
+
"WRONG_EXC": "🔴",
|
|
353
|
+
"SCRIPT_ERROR": "❌",
|
|
354
|
+
"SETUP_ERROR": "❌",
|
|
355
|
+
"EXPORT_ERROR": "❌",
|
|
356
|
+
"DOWNLOAD_ERROR": "❌",
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
fails = 0
|
|
360
|
+
for case in cases:
|
|
361
|
+
r = by_name.get(case.name, {"status": "MISSING", "detail": "not run"})
|
|
362
|
+
status = r["status"]
|
|
363
|
+
icon = status_icons.get(status, "❓")
|
|
364
|
+
diff_suffix = ""
|
|
365
|
+
if status == "OK" and not case.skip_content_diff:
|
|
366
|
+
diffs = diffs_by_name.get(case.name, [])
|
|
367
|
+
if diffs:
|
|
368
|
+
status = "DIFF"
|
|
369
|
+
icon = "⚠️"
|
|
370
|
+
diff_suffix = f" ({len(diffs)} diff(s))"
|
|
371
|
+
|
|
372
|
+
line = f" {icon} [{status:14}] {case.name:32} — {case.description}{diff_suffix}"
|
|
373
|
+
print(line)
|
|
374
|
+
if status not in {"OK", "STUB_HIT"}:
|
|
375
|
+
if status == "DIFF":
|
|
376
|
+
for d in diffs_by_name.get(case.name, [])[:3]:
|
|
377
|
+
print(f" ↳ {d}")
|
|
378
|
+
if len(diffs_by_name.get(case.name, [])) > 3:
|
|
379
|
+
print(f" ↳ …and {len(diffs_by_name[case.name])-3} more")
|
|
380
|
+
else:
|
|
381
|
+
detail = r.get("detail", "")
|
|
382
|
+
if detail:
|
|
383
|
+
print(f" ↳ {detail[:150]}")
|
|
384
|
+
if status not in {"STUB_HIT"}:
|
|
385
|
+
fails += 1
|
|
386
|
+
|
|
387
|
+
print()
|
|
388
|
+
n_ok = sum(1 for r in daytona_results if r["status"] == "OK")
|
|
389
|
+
n_stub = sum(1 for r in daytona_results if r["status"] == "STUB_HIT")
|
|
390
|
+
n_diff = sum(
|
|
391
|
+
1 for c in cases
|
|
392
|
+
if by_name.get(c.name, {}).get("status") == "OK"
|
|
393
|
+
and diffs_by_name.get(c.name)
|
|
394
|
+
)
|
|
395
|
+
total = len(cases)
|
|
396
|
+
print(f" {n_ok}/{total} cases ran successfully in Athena SDK")
|
|
397
|
+
print(f" {n_stub}/{total} cases hit expected NotImplementedError stubs (known Phase-2 gaps)")
|
|
398
|
+
print(f" {n_diff}/{n_ok} successful-run cases have content-level diffs vs stock python-docx")
|
|
399
|
+
print(f" exit code: {0 if fails == 0 else 1}")
|
|
400
|
+
return 0 if fails == 0 else 1
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
# ── main ──────────────────────────────────────────────────────────────
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
async def main() -> int:
|
|
407
|
+
print(f"[fidelity] case count: {len(CASES)}")
|
|
408
|
+
print(f"[fidelity] target asset: {DEFAULT_ASSET_ID}")
|
|
409
|
+
ensure_stock_python_docx()
|
|
410
|
+
ARTIFACT_DIR.mkdir(parents=True, exist_ok=True)
|
|
411
|
+
|
|
412
|
+
# 1) Generate all stock python-docx outputs locally
|
|
413
|
+
print("[fidelity] running stock python-docx side ...")
|
|
414
|
+
stock_paths: dict[str, Path] = {}
|
|
415
|
+
for case in CASES:
|
|
416
|
+
out = ARTIFACT_DIR / f"stock_{case.name}.docx"
|
|
417
|
+
try:
|
|
418
|
+
run_stock_script(case.script, out)
|
|
419
|
+
stock_paths[case.name] = out
|
|
420
|
+
except Exception as e:
|
|
421
|
+
print(f" !! {case.name} stock subprocess error: {e}")
|
|
422
|
+
|
|
423
|
+
# 2) Run the athena side via Daytona in a single batch
|
|
424
|
+
print("[fidelity] running athena-python-docx via Daytona ...")
|
|
425
|
+
daytona_results, athena_artifacts = await run_daytona_batch(CASES, DEFAULT_ASSET_ID)
|
|
426
|
+
|
|
427
|
+
# 3) Save athena artifacts locally and extract/compare features
|
|
428
|
+
diffs_by_name: dict[str, list[str]] = {}
|
|
429
|
+
for case in CASES:
|
|
430
|
+
if case.name not in athena_artifacts:
|
|
431
|
+
continue
|
|
432
|
+
if case.skip_content_diff:
|
|
433
|
+
continue
|
|
434
|
+
athena_path = ARTIFACT_DIR / f"athena_{case.name}.docx"
|
|
435
|
+
athena_path.write_bytes(athena_artifacts[case.name])
|
|
436
|
+
stock_path = stock_paths.get(case.name)
|
|
437
|
+
if stock_path is None:
|
|
438
|
+
continue
|
|
439
|
+
try:
|
|
440
|
+
stock_feat = extract_features(stock_path, str(STOCK_SITE_PACKAGES))
|
|
441
|
+
ath_feat = extract_features(athena_path, str(STOCK_SITE_PACKAGES))
|
|
442
|
+
diffs_by_name[case.name] = compare(stock_feat, ath_feat)
|
|
443
|
+
except Exception as e:
|
|
444
|
+
diffs_by_name[case.name] = [f"extractor error: {e}"]
|
|
445
|
+
|
|
446
|
+
# 4) Scorecard
|
|
447
|
+
code = print_scorecard(CASES, daytona_results, diffs_by_name)
|
|
448
|
+
print(f"\n[fidelity] artifacts in {ARTIFACT_DIR}")
|
|
449
|
+
return code
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
if __name__ == "__main__":
|
|
453
|
+
sys.exit(asyncio.run(main()))
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|