athena-python-docx 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,23 @@
1
+ node_modules/
2
+ dist/
3
+ .env
4
+ .env.local
5
+ .env.*.local
6
+
7
+ # Python
8
+ __pycache__/
9
+ *.py[cod]
10
+ *$py.class
11
+ .venv/
12
+ venv/
13
+ env/
14
+ *.egg-info/
15
+ build/
16
+ dist/
17
+ .pytest_cache/
18
+ .mypy_cache/
19
+ .ruff_cache/
20
+
21
+ # Editors
22
+ .vscode/
23
+ .idea/
@@ -0,0 +1,63 @@
1
+ # athena-python-docx SDK — Claude Instructions
2
+
3
+ ## API Parity Rule (MANDATORY)
4
+
5
+ **This SDK MUST be a 100% exact replica of the standard [python-docx](https://python-docx.readthedocs.io/) API.**
6
+
7
+ Every class, method, property, and parameter name must match python-docx exactly. The goal is that any code written for python-docx works identically with this SDK — no surprises, no differences.
8
+
9
+ ### What this means in practice
10
+
11
+ - **Do NOT add new methods** that don't exist in python-docx
12
+ - **Do NOT add new properties** that don't exist in python-docx
13
+ - **Do NOT rename parameters** — use the exact same parameter names as python-docx
14
+ - **Do NOT change method signatures** — if python-docx's `add_heading()` takes `(text, level=1)`, ours must too
15
+ - **Do NOT change return types** — if python-docx's `paragraph.runs` returns `list[Run]`, ours must too
16
+
17
+ ### How to verify parity
18
+
19
+ Before adding or modifying any API surface:
20
+
21
+ 1. Check the [python-docx documentation](https://python-docx.readthedocs.io/)
22
+ 2. Check the [python-docx source code](https://github.com/python-openxml/python-docx)
23
+ 3. Confirm the method/property/parameter exists with the same name and signature
24
+ 4. If it doesn't exist in python-docx, **do not add it** without explicit user approval
25
+
26
+ Run `uv run pytest tests/test_python_docx_api_parity.py -v -s` to verify parity.
27
+
28
+ ### Intentionally omitted (Superdoc SDK limitations)
29
+
30
+ These standard python-docx members don't apply to a Superdoc-backed SDK:
31
+
32
+ - `Paragraph._p`, `Run._r` — XML element access (no local XML)
33
+ - `Document.part`, `Paragraph.part`, `Run.part` — package part access
34
+ - `Document.core_properties.last_modified_by` — we use Keryx attribution instead
35
+ - `Document.settings` — Word app settings (not surfaced by Superdoc)
36
+ - `InlineShape.chart` — charts (Phase 2+)
37
+
38
+ ### If you need a deviation
39
+
40
+ If there is a genuine technical reason why a deviation from python-docx is necessary:
41
+
42
+ 1. **Stop and ask the user** before implementing
43
+ 2. Explain what the deviation is and why it's needed
44
+ 3. Get explicit confirmation that the deviation is acceptable
45
+ 4. Document the deviation in the "Intentionally omitted" list above
46
+
47
+ ## Architecture
48
+
49
+ This is an **async-Superdoc-SDK-backed client** that mimics the sync python-docx API.
50
+
51
+ - Sync façade (matches python-docx) — `doc.save()`, `paragraph.add_run()`
52
+ - Under the hood, a persistent event-loop thread in `_batching.py` runs `AsyncSuperDocClient` coroutines
53
+ - No XML manipulation — calls translate to Superdoc SDK ops (insert, find, replace, tables.*, format.apply, create.image, hyperlinks.wrap)
54
+ - Mutations write directly to Keryx Y.Doc; users and other agents see them live
55
+
56
+ ## Development
57
+
58
+ ```bash
59
+ uv venv
60
+ uv pip install -e ".[dev]"
61
+ uv run pytest tests/ -x -q
62
+ uv run pytest tests/test_python_docx_api_parity.py -v
63
+ ```
@@ -0,0 +1,76 @@
1
+ Metadata-Version: 2.4
2
+ Name: athena-python-docx
3
+ Version: 0.1.0
4
+ Summary: Drop-in replacement for python-docx that connects to Athena's Superdoc/Keryx collaborative document stack
5
+ Project-URL: Homepage, https://athenaintelligence.ai
6
+ Author-email: Athena Intelligence <engineering@athenaintelligence.ai>
7
+ License-Expression: MIT
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Requires-Python: >=3.11
14
+ Requires-Dist: httpx>=0.27
15
+ Requires-Dist: superdoc-sdk>=1.6.0.dev6
16
+ Provides-Extra: dev
17
+ Requires-Dist: mypy>=1.8; extra == 'dev'
18
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
19
+ Requires-Dist: pytest>=8.0; extra == 'dev'
20
+ Requires-Dist: python-docx>=1.1; extra == 'dev'
21
+ Requires-Dist: ruff>=0.3; extra == 'dev'
22
+ Description-Content-Type: text/markdown
23
+
24
+ # athena-python-docx
25
+
26
+ Drop-in replacement for [python-docx](https://python-docx.readthedocs.io/) that connects to Athena's Superdoc + Keryx collaborative document stack.
27
+
28
+ ## Quick start
29
+
30
+ ```python
31
+ from docx import Document
32
+ from docx.shared import Inches, Pt, RGBColor
33
+
34
+ # Open an existing SuperDoc asset by its Athena asset_id
35
+ with Document("asset_abc123") as doc:
36
+ doc.add_heading("Market Analysis", level=1)
37
+
38
+ p = doc.add_paragraph()
39
+ r = p.add_run("Revenue grew ")
40
+ highlight = p.add_run("12.3% year-over-year")
41
+ highlight.bold = True
42
+ highlight.font.color.rgb = RGBColor(0x00, 0x80, 0x00)
43
+ p.add_run(".")
44
+
45
+ t = doc.add_table(rows=2, cols=2, style="TableGrid")
46
+ t.cell(0, 0).text = "Segment"
47
+ t.cell(0, 1).text = "Revenue"
48
+ t.cell(1, 0).text = "Enterprise"
49
+ t.cell(1, 1).text = "$4.3M"
50
+
51
+ doc.save()
52
+ ```
53
+
54
+ ## API parity rule
55
+
56
+ This SDK mirrors python-docx's public API **exactly**. See `CLAUDE.md` for the full contract.
57
+
58
+ ## Development
59
+
60
+ ```bash
61
+ uv venv
62
+ uv pip install -e ".[dev]"
63
+ uv run pytest tests/ -x
64
+ ```
65
+
66
+ ## Environment variables
67
+
68
+ Required when connecting to Keryx (set by Athena backend when executing in Daytona):
69
+
70
+ - `SUPERDOC_COLLAB_TOKEN` — short-lived Keryx JWT
71
+ - `KERYX_WS_URL` — Keryx WebSocket base URL
72
+ - `ATHENA_WORKSPACE_ID` — workspace routing segment
73
+
74
+ ## License
75
+
76
+ MIT
@@ -0,0 +1,53 @@
1
+ # athena-python-docx
2
+
3
+ Drop-in replacement for [python-docx](https://python-docx.readthedocs.io/) that connects to Athena's Superdoc + Keryx collaborative document stack.
4
+
5
+ ## Quick start
6
+
7
+ ```python
8
+ from docx import Document
9
+ from docx.shared import Inches, Pt, RGBColor
10
+
11
+ # Open an existing SuperDoc asset by its Athena asset_id
12
+ with Document("asset_abc123") as doc:
13
+ doc.add_heading("Market Analysis", level=1)
14
+
15
+ p = doc.add_paragraph()
16
+ r = p.add_run("Revenue grew ")
17
+ highlight = p.add_run("12.3% year-over-year")
18
+ highlight.bold = True
19
+ highlight.font.color.rgb = RGBColor(0x00, 0x80, 0x00)
20
+ p.add_run(".")
21
+
22
+ t = doc.add_table(rows=2, cols=2, style="TableGrid")
23
+ t.cell(0, 0).text = "Segment"
24
+ t.cell(0, 1).text = "Revenue"
25
+ t.cell(1, 0).text = "Enterprise"
26
+ t.cell(1, 1).text = "$4.3M"
27
+
28
+ doc.save()
29
+ ```
30
+
31
+ ## API parity rule
32
+
33
+ This SDK mirrors python-docx's public API **exactly**. See `CLAUDE.md` for the full contract.
34
+
35
+ ## Development
36
+
37
+ ```bash
38
+ uv venv
39
+ uv pip install -e ".[dev]"
40
+ uv run pytest tests/ -x
41
+ ```
42
+
43
+ ## Environment variables
44
+
45
+ Required when connecting to Keryx (set by Athena backend when executing in Daytona):
46
+
47
+ - `SUPERDOC_COLLAB_TOKEN` — short-lived Keryx JWT
48
+ - `KERYX_WS_URL` — Keryx WebSocket base URL
49
+ - `ATHENA_WORKSPACE_ID` — workspace routing segment
50
+
51
+ ## License
52
+
53
+ MIT
@@ -0,0 +1,16 @@
1
+ """athena-python-docx — drop-in replacement for python-docx.
2
+
3
+ Calls translate into Superdoc SDK operations against a Keryx Y.Doc.
4
+ See CLAUDE.md for the API parity contract.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ __version__ = "0.1.0"
10
+
11
+ from docx.api import Document
12
+
13
+ __all__ = [
14
+ "Document",
15
+ "__version__",
16
+ ]
@@ -0,0 +1,86 @@
1
+ """Sync→async bridge. Runs a single event loop in a background thread
2
+ and dispatches coroutines from sync callers.
3
+
4
+ This is the only place in the SDK that uses threading.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import asyncio
10
+ import threading
11
+ from typing import Any, Coroutine, TypeVar
12
+
13
+ T = TypeVar("T")
14
+
15
+ # Default per-op timeout. Individual Superdoc SDK calls should complete well
16
+ # under this — the value is a safety net so a WebSocket stall or Keryx
17
+ # downtime surfaces a TimeoutError instead of wedging the calling thread
18
+ # until the outer Daytona INSTRUCTION_TIMEOUT_SECONDS kill.
19
+ DEFAULT_OP_TIMEOUT_SECONDS: float = 60.0
20
+
21
+ _loop: asyncio.AbstractEventLoop | None = None
22
+ _thread: threading.Thread | None = None
23
+ _lock = threading.Lock()
24
+
25
+
26
+ def _ensure_loop() -> asyncio.AbstractEventLoop:
27
+ """Return the persistent background event loop. Starts it lazily."""
28
+ global _loop, _thread
29
+
30
+ with _lock:
31
+ if _loop is not None and _loop.is_running():
32
+ return _loop
33
+
34
+ loop = asyncio.new_event_loop()
35
+ loop_ready = threading.Event()
36
+
37
+ def _run() -> None:
38
+ asyncio.set_event_loop(loop)
39
+ loop_ready.set()
40
+ loop.run_forever()
41
+
42
+ thread = threading.Thread(
43
+ target=_run,
44
+ name="docx-sdk-loop",
45
+ daemon=True,
46
+ )
47
+ thread.start()
48
+ loop_ready.wait(timeout=5)
49
+ _loop = loop
50
+ _thread = thread
51
+ return loop
52
+
53
+
54
+ def run_sync(
55
+ coro: Coroutine[Any, Any, T],
56
+ *,
57
+ timeout: float | None = DEFAULT_OP_TIMEOUT_SECONDS,
58
+ ) -> T:
59
+ """Run a coroutine on the background loop and block until it returns.
60
+
61
+ Args:
62
+ coro: The coroutine to execute.
63
+ timeout: Maximum seconds to wait. ``None`` blocks indefinitely
64
+ (use sparingly — prefer the default so stalls surface early).
65
+
66
+ Raises:
67
+ concurrent.futures.TimeoutError: if the coroutine does not
68
+ complete within `timeout` seconds.
69
+ The coroutine's exception, re-raised in the calling thread.
70
+ """
71
+ loop = _ensure_loop()
72
+ future = asyncio.run_coroutine_threadsafe(coro, loop)
73
+ return future.result(timeout=timeout)
74
+
75
+
76
+ def shutdown() -> None:
77
+ """Stop the background event loop. For test teardown."""
78
+ global _loop, _thread
79
+ with _lock:
80
+ if _loop is None:
81
+ return
82
+ _loop.call_soon_threadsafe(_loop.stop)
83
+ if _thread is not None:
84
+ _thread.join(timeout=5)
85
+ _loop = None
86
+ _thread = None
@@ -0,0 +1,41 @@
1
+ """The `Document` factory — matches python-docx's `docx.Document(path)`.
2
+
3
+ python-docx signature:
4
+ Document(docx=None) -> Document
5
+
6
+ Our signature deviates from the path-based one because we don't open
7
+ .docx files from disk — we open Y.Doc assets from Keryx. The parameter
8
+ is reused: you pass an asset_id string where python-docx would take a
9
+ file path.
10
+
11
+ If the caller truly needs to load a local .docx (rare inside Daytona),
12
+ they can upload it first via the apps/api upload route (Phase 2+) or
13
+ use the existing `replaceFile` flow from the Olympus side.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ from docx.document import Document as _Document
19
+
20
+
21
+ def Document(docx: str | None = None) -> _Document:
22
+ """Open a Word document for editing.
23
+
24
+ Args:
25
+ docx: Athena asset_id of the SuperDoc document to open.
26
+ In python-docx this takes a file path; here it takes
27
+ an asset_id. None is not supported in Phase 1.
28
+
29
+ Returns:
30
+ A Document instance bound to the asset.
31
+
32
+ Raises:
33
+ ValueError: if `docx` is None or empty.
34
+ """
35
+ if not docx:
36
+ raise ValueError(
37
+ "athena-python-docx requires an asset_id. "
38
+ "Pass the SuperDoc asset ID as the first argument: "
39
+ "Document('asset_xxx...')",
40
+ )
41
+ return _Document(asset_id=docx)
@@ -0,0 +1,230 @@
1
+ """Superdoc SDK session wrapper — auth, open/close lifecycle, error mapping.
2
+
3
+ This module is the only place in the SDK that talks to `superdoc_sdk`
4
+ directly. All other classes (Document, Paragraph, Run, Table) go
5
+ through a `Session` instance.
6
+
7
+ Environment variables expected inside Daytona:
8
+ SUPERDOC_COLLAB_TOKEN: short-lived Keryx JWT (signed by Athena backend)
9
+ KERYX_WS_URL: Keryx WebSocket base URL (e.g. "wss://keryx.athena...")
10
+ ATHENA_WORKSPACE_ID: workspace routing path segment
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import asyncio
16
+ import os
17
+ from contextlib import asynccontextmanager
18
+ from typing import TYPE_CHECKING, Any
19
+
20
+ from docx.errors import (
21
+ AuthenticationError,
22
+ DocumentClosedError,
23
+ SessionError,
24
+ )
25
+
26
+ if TYPE_CHECKING:
27
+ from collections.abc import AsyncIterator
28
+
29
+ _TOKEN_ENV = "SUPERDOC_COLLAB_TOKEN" # noqa: S105
30
+ _WS_URL_ENV = "KERYX_WS_URL"
31
+ _WORKSPACE_ENV = "ATHENA_WORKSPACE_ID"
32
+
33
+
34
+ def _log_info(msg: str) -> None:
35
+ """Minimal stderr logger. We avoid loguru here to keep the SDK
36
+ runtime dependency tree small — this code runs inside Daytona sandboxes.
37
+ """
38
+ import sys
39
+
40
+ print(f"[docx-sdk] {msg}", file=sys.stderr)
41
+
42
+
43
+ def _log_warning(msg: str) -> None:
44
+ import sys
45
+
46
+ print(f"[docx-sdk] WARN: {msg}", file=sys.stderr)
47
+
48
+
49
+ class Session:
50
+ """Manages the lifecycle of an AsyncSuperDocClient session.
51
+
52
+ Lifecycle:
53
+ create Session (does NOT open yet) → first op triggers open() →
54
+ subsequent ops reuse the same client → save() flushes → close()
55
+ disposes.
56
+
57
+ Thread safety: NOT thread-safe. Each Document should have its own
58
+ Session. The sync facade serializes all calls through a single
59
+ event-loop thread (see _batching.py).
60
+ """
61
+
62
+ def __init__(
63
+ self,
64
+ *,
65
+ asset_id: str,
66
+ user_info: dict[str, str] | None = None,
67
+ ) -> None:
68
+ self._asset_id: str = asset_id
69
+ self._user_info: dict[str, str] = user_info or {
70
+ "name": "Athena Agent",
71
+ "email": "agent@athenaintel.com",
72
+ }
73
+ self._client: Any | None = None
74
+ self._doc_handle: Any | None = None
75
+ self._opened: bool = False
76
+ self._closed: bool = False
77
+
78
+ @property
79
+ def asset_id(self) -> str:
80
+ return self._asset_id
81
+
82
+ @property
83
+ def is_open(self) -> bool:
84
+ return self._opened and not self._closed
85
+
86
+ async def open(self) -> None:
87
+ """Open the Superdoc SDK session against Keryx.
88
+
89
+ Raises:
90
+ AuthenticationError: if SUPERDOC_COLLAB_TOKEN is missing/invalid.
91
+ SessionError: on any other open-time failure.
92
+ """
93
+ if self._closed:
94
+ raise DocumentClosedError(
95
+ f"Session for asset {self._asset_id} was already closed.",
96
+ )
97
+ if self._opened:
98
+ return
99
+
100
+ token: str | None = os.environ.get(_TOKEN_ENV)
101
+ ws_url: str | None = os.environ.get(_WS_URL_ENV)
102
+ workspace_id: str | None = os.environ.get(_WORKSPACE_ENV)
103
+
104
+ if not token:
105
+ raise AuthenticationError(
106
+ f"Missing environment variable {_TOKEN_ENV}. "
107
+ "The Athena backend must mint a short-lived Keryx JWT "
108
+ "and inject it into the Daytona sandbox.",
109
+ )
110
+ if not ws_url:
111
+ raise SessionError(
112
+ f"Missing environment variable {_WS_URL_ENV}.",
113
+ )
114
+ if not workspace_id:
115
+ raise SessionError(
116
+ f"Missing environment variable {_WORKSPACE_ENV}.",
117
+ )
118
+
119
+ # Lazy import so `import docx` doesn't pay for superdoc_sdk init cost
120
+ from superdoc import AsyncSuperDocClient
121
+ from superdoc.generated.client import (
122
+ DocOpenParams,
123
+ DocOpenParamsCollaborationVariant1,
124
+ )
125
+
126
+ collab_base_url: str = f"{ws_url}/ws/{workspace_id}"
127
+ self._client = AsyncSuperDocClient(
128
+ user=self._user_info,
129
+ env={_TOKEN_ENV: token},
130
+ request_timeout_ms=30_000,
131
+ watchdog_timeout_ms=60_000,
132
+ )
133
+
134
+ collab_config: DocOpenParamsCollaborationVariant1 = {
135
+ "url": collab_base_url,
136
+ "documentId": self._asset_id,
137
+ "tokenEnv": _TOKEN_ENV,
138
+ "providerType": "y-websocket",
139
+ }
140
+ open_params: DocOpenParams = {"collaboration": collab_config}
141
+
142
+ try:
143
+ self._doc_handle = await self._client.open(open_params)
144
+ except Exception as e:
145
+ msg: str = str(e).lower()
146
+ if "401" in msg or "unauthorized" in msg or "forbidden" in msg:
147
+ raise AuthenticationError(
148
+ f"Keryx rejected the collab token: {e}",
149
+ ) from e
150
+ raise SessionError(
151
+ f"Failed to open Superdoc session for {self._asset_id}: {e}",
152
+ ) from e
153
+
154
+ self._opened = True
155
+ _log_info(f"Opened {self._asset_id}")
156
+
157
+ @property
158
+ def doc(self) -> Any:
159
+ """Return the opened Superdoc doc handle.
160
+
161
+ Lazy-opens on first access. Blocks on the event-loop thread
162
+ via the sync bridge; do not call this from inside a coroutine.
163
+ """
164
+ if not self._opened:
165
+ raise SessionError(
166
+ "Session not yet opened; call await session.open() first "
167
+ "or use the sync facade via Document().",
168
+ )
169
+ if self._closed:
170
+ raise DocumentClosedError(
171
+ f"Session for asset {self._asset_id} is closed.",
172
+ )
173
+ return self._doc_handle
174
+
175
+ async def save(self, *, in_place: bool = True) -> None:
176
+ """Flush pending mutations to Keryx and persist.
177
+
178
+ The AsyncSuperDocClient writes over WebSocket; we add a short
179
+ flush delay (matches superdoc_write_utils.py:227-237) to
180
+ ensure updates land before dispose().
181
+ """
182
+ if not self._opened:
183
+ raise SessionError("Cannot save a session that was never opened.")
184
+ if self._closed:
185
+ raise DocumentClosedError(f"Session {self._asset_id} is closed.")
186
+
187
+ await self._doc_handle.save({"inPlace": in_place})
188
+ _log_info(f"Saved {self._asset_id}")
189
+
190
+ async def close(self) -> None:
191
+ """Close the Superdoc session. Idempotent."""
192
+ if self._closed:
193
+ return
194
+
195
+ if self._opened and self._doc_handle is not None:
196
+ try:
197
+ # Match the 1-second flush delay from superdoc_write_utils
198
+ # before tearing down the WebSocket
199
+ await asyncio.sleep(1)
200
+ await self._doc_handle.close({"discard": True})
201
+ except Exception as close_err:
202
+ _log_warning(
203
+ f"Doc close failed for {self._asset_id}: {close_err}",
204
+ )
205
+
206
+ if self._client is not None:
207
+ try:
208
+ await self._client.dispose()
209
+ except Exception as dispose_err:
210
+ _log_warning(
211
+ f"Client dispose failed for {self._asset_id}: {dispose_err}",
212
+ )
213
+
214
+ self._closed = True
215
+ _log_info(f"Closed {self._asset_id}")
216
+
217
+
218
+ @asynccontextmanager
219
+ async def open_session(
220
+ *,
221
+ asset_id: str,
222
+ user_info: dict[str, str] | None = None,
223
+ ) -> "AsyncIterator[Session]":
224
+ """Async context manager for a Session. Yields an opened session."""
225
+ session = Session(asset_id=asset_id, user_info=user_info)
226
+ try:
227
+ await session.open()
228
+ yield session
229
+ finally:
230
+ await session.close()