athena-python-docx 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- athena_python_docx-0.1.0/.gitignore +23 -0
- athena_python_docx-0.1.0/CLAUDE.md +63 -0
- athena_python_docx-0.1.0/PKG-INFO +76 -0
- athena_python_docx-0.1.0/README.md +53 -0
- athena_python_docx-0.1.0/docx/__init__.py +16 -0
- athena_python_docx-0.1.0/docx/_batching.py +86 -0
- athena_python_docx-0.1.0/docx/api.py +41 -0
- athena_python_docx-0.1.0/docx/client.py +230 -0
- athena_python_docx-0.1.0/docx/document.py +335 -0
- athena_python_docx-0.1.0/docx/enum/__init__.py +1 -0
- athena_python_docx-0.1.0/docx/enum/table.py +15 -0
- athena_python_docx-0.1.0/docx/enum/text.py +29 -0
- athena_python_docx-0.1.0/docx/errors.py +30 -0
- athena_python_docx-0.1.0/docx/shared.py +81 -0
- athena_python_docx-0.1.0/docx/table.py +213 -0
- athena_python_docx-0.1.0/docx/text/__init__.py +8 -0
- athena_python_docx-0.1.0/docx/text/paragraph.py +141 -0
- athena_python_docx-0.1.0/docx/text/run.py +187 -0
- athena_python_docx-0.1.0/docx/typing.py +30 -0
- athena_python_docx-0.1.0/pyproject.toml +52 -0
- athena_python_docx-0.1.0/scripts/publish.sh +37 -0
- athena_python_docx-0.1.0/tests/__init__.py +0 -0
- athena_python_docx-0.1.0/tests/conftest.py +86 -0
- athena_python_docx-0.1.0/tests/test_commands.py +300 -0
- athena_python_docx-0.1.0/tests/test_python_docx_api_parity.py +161 -0
- athena_python_docx-0.1.0/tests/test_smoke_integration.py +56 -0
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
node_modules/
|
|
2
|
+
dist/
|
|
3
|
+
.env
|
|
4
|
+
.env.local
|
|
5
|
+
.env.*.local
|
|
6
|
+
|
|
7
|
+
# Python
|
|
8
|
+
__pycache__/
|
|
9
|
+
*.py[cod]
|
|
10
|
+
*$py.class
|
|
11
|
+
.venv/
|
|
12
|
+
venv/
|
|
13
|
+
env/
|
|
14
|
+
*.egg-info/
|
|
15
|
+
build/
|
|
16
|
+
dist/
|
|
17
|
+
.pytest_cache/
|
|
18
|
+
.mypy_cache/
|
|
19
|
+
.ruff_cache/
|
|
20
|
+
|
|
21
|
+
# Editors
|
|
22
|
+
.vscode/
|
|
23
|
+
.idea/
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# athena-python-docx SDK — Claude Instructions
|
|
2
|
+
|
|
3
|
+
## API Parity Rule (MANDATORY)
|
|
4
|
+
|
|
5
|
+
**This SDK MUST be a 100% exact replica of the standard [python-docx](https://python-docx.readthedocs.io/) API.**
|
|
6
|
+
|
|
7
|
+
Every class, method, property, and parameter name must match python-docx exactly. The goal is that any code written for python-docx works identically with this SDK — no surprises, no differences.
|
|
8
|
+
|
|
9
|
+
### What this means in practice
|
|
10
|
+
|
|
11
|
+
- **Do NOT add new methods** that don't exist in python-docx
|
|
12
|
+
- **Do NOT add new properties** that don't exist in python-docx
|
|
13
|
+
- **Do NOT rename parameters** — use the exact same parameter names as python-docx
|
|
14
|
+
- **Do NOT change method signatures** — if python-docx's `add_heading()` takes `(text, level=1)`, ours must too
|
|
15
|
+
- **Do NOT change return types** — if python-docx's `paragraph.runs` returns `list[Run]`, ours must too
|
|
16
|
+
|
|
17
|
+
### How to verify parity
|
|
18
|
+
|
|
19
|
+
Before adding or modifying any API surface:
|
|
20
|
+
|
|
21
|
+
1. Check the [python-docx documentation](https://python-docx.readthedocs.io/)
|
|
22
|
+
2. Check the [python-docx source code](https://github.com/python-openxml/python-docx)
|
|
23
|
+
3. Confirm the method/property/parameter exists with the same name and signature
|
|
24
|
+
4. If it doesn't exist in python-docx, **do not add it** without explicit user approval
|
|
25
|
+
|
|
26
|
+
Run `uv run pytest tests/test_python_docx_api_parity.py -v -s` to verify parity.
|
|
27
|
+
|
|
28
|
+
### Intentionally omitted (Superdoc SDK limitations)
|
|
29
|
+
|
|
30
|
+
These standard python-docx members don't apply to a Superdoc-backed SDK:
|
|
31
|
+
|
|
32
|
+
- `Paragraph._p`, `Run._r` — XML element access (no local XML)
|
|
33
|
+
- `Document.part`, `Paragraph.part`, `Run.part` — package part access
|
|
34
|
+
- `Document.core_properties.last_modified_by` — we use Keryx attribution instead
|
|
35
|
+
- `Document.settings` — Word app settings (not surfaced by Superdoc)
|
|
36
|
+
- `InlineShape.chart` — charts (Phase 2+)
|
|
37
|
+
|
|
38
|
+
### If you need a deviation
|
|
39
|
+
|
|
40
|
+
If there is a genuine technical reason why a deviation from python-docx is necessary:
|
|
41
|
+
|
|
42
|
+
1. **Stop and ask the user** before implementing
|
|
43
|
+
2. Explain what the deviation is and why it's needed
|
|
44
|
+
3. Get explicit confirmation that the deviation is acceptable
|
|
45
|
+
4. Document the deviation in the "Intentionally omitted" list above
|
|
46
|
+
|
|
47
|
+
## Architecture
|
|
48
|
+
|
|
49
|
+
This is an **async-Superdoc-SDK-backed client** that mimics the sync python-docx API.
|
|
50
|
+
|
|
51
|
+
- Sync façade (matches python-docx) — `doc.save()`, `paragraph.add_run()`
|
|
52
|
+
- Under the hood, a persistent event-loop thread in `_batching.py` runs `AsyncSuperDocClient` coroutines
|
|
53
|
+
- No XML manipulation — calls translate to Superdoc SDK ops (insert, find, replace, tables.*, format.apply, create.image, hyperlinks.wrap)
|
|
54
|
+
- Mutations write directly to Keryx Y.Doc; users and other agents see them live
|
|
55
|
+
|
|
56
|
+
## Development
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
uv venv
|
|
60
|
+
uv pip install -e ".[dev]"
|
|
61
|
+
uv run pytest tests/ -x -q
|
|
62
|
+
uv run pytest tests/test_python_docx_api_parity.py -v
|
|
63
|
+
```
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: athena-python-docx
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Drop-in replacement for python-docx that connects to Athena's Superdoc/Keryx collaborative document stack
|
|
5
|
+
Project-URL: Homepage, https://athenaintelligence.ai
|
|
6
|
+
Author-email: Athena Intelligence <engineering@athenaintelligence.ai>
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Requires-Python: >=3.11
|
|
14
|
+
Requires-Dist: httpx>=0.27
|
|
15
|
+
Requires-Dist: superdoc-sdk>=1.6.0.dev6
|
|
16
|
+
Provides-Extra: dev
|
|
17
|
+
Requires-Dist: mypy>=1.8; extra == 'dev'
|
|
18
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
19
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
20
|
+
Requires-Dist: python-docx>=1.1; extra == 'dev'
|
|
21
|
+
Requires-Dist: ruff>=0.3; extra == 'dev'
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
|
|
24
|
+
# athena-python-docx
|
|
25
|
+
|
|
26
|
+
Drop-in replacement for [python-docx](https://python-docx.readthedocs.io/) that connects to Athena's Superdoc + Keryx collaborative document stack.
|
|
27
|
+
|
|
28
|
+
## Quick start
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
from docx import Document
|
|
32
|
+
from docx.shared import Inches, Pt, RGBColor
|
|
33
|
+
|
|
34
|
+
# Open an existing SuperDoc asset by its Athena asset_id
|
|
35
|
+
with Document("asset_abc123") as doc:
|
|
36
|
+
doc.add_heading("Market Analysis", level=1)
|
|
37
|
+
|
|
38
|
+
p = doc.add_paragraph()
|
|
39
|
+
r = p.add_run("Revenue grew ")
|
|
40
|
+
highlight = p.add_run("12.3% year-over-year")
|
|
41
|
+
highlight.bold = True
|
|
42
|
+
highlight.font.color.rgb = RGBColor(0x00, 0x80, 0x00)
|
|
43
|
+
p.add_run(".")
|
|
44
|
+
|
|
45
|
+
t = doc.add_table(rows=2, cols=2, style="TableGrid")
|
|
46
|
+
t.cell(0, 0).text = "Segment"
|
|
47
|
+
t.cell(0, 1).text = "Revenue"
|
|
48
|
+
t.cell(1, 0).text = "Enterprise"
|
|
49
|
+
t.cell(1, 1).text = "$4.3M"
|
|
50
|
+
|
|
51
|
+
doc.save()
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## API parity rule
|
|
55
|
+
|
|
56
|
+
This SDK mirrors python-docx's public API **exactly**. See `CLAUDE.md` for the full contract.
|
|
57
|
+
|
|
58
|
+
## Development
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
uv venv
|
|
62
|
+
uv pip install -e ".[dev]"
|
|
63
|
+
uv run pytest tests/ -x
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## Environment variables
|
|
67
|
+
|
|
68
|
+
Required when connecting to Keryx (set by Athena backend when executing in Daytona):
|
|
69
|
+
|
|
70
|
+
- `SUPERDOC_COLLAB_TOKEN` — short-lived Keryx JWT
|
|
71
|
+
- `KERYX_WS_URL` — Keryx WebSocket base URL
|
|
72
|
+
- `ATHENA_WORKSPACE_ID` — workspace routing segment
|
|
73
|
+
|
|
74
|
+
## License
|
|
75
|
+
|
|
76
|
+
MIT
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# athena-python-docx
|
|
2
|
+
|
|
3
|
+
Drop-in replacement for [python-docx](https://python-docx.readthedocs.io/) that connects to Athena's Superdoc + Keryx collaborative document stack.
|
|
4
|
+
|
|
5
|
+
## Quick start
|
|
6
|
+
|
|
7
|
+
```python
|
|
8
|
+
from docx import Document
|
|
9
|
+
from docx.shared import Inches, Pt, RGBColor
|
|
10
|
+
|
|
11
|
+
# Open an existing SuperDoc asset by its Athena asset_id
|
|
12
|
+
with Document("asset_abc123") as doc:
|
|
13
|
+
doc.add_heading("Market Analysis", level=1)
|
|
14
|
+
|
|
15
|
+
p = doc.add_paragraph()
|
|
16
|
+
r = p.add_run("Revenue grew ")
|
|
17
|
+
highlight = p.add_run("12.3% year-over-year")
|
|
18
|
+
highlight.bold = True
|
|
19
|
+
highlight.font.color.rgb = RGBColor(0x00, 0x80, 0x00)
|
|
20
|
+
p.add_run(".")
|
|
21
|
+
|
|
22
|
+
t = doc.add_table(rows=2, cols=2, style="TableGrid")
|
|
23
|
+
t.cell(0, 0).text = "Segment"
|
|
24
|
+
t.cell(0, 1).text = "Revenue"
|
|
25
|
+
t.cell(1, 0).text = "Enterprise"
|
|
26
|
+
t.cell(1, 1).text = "$4.3M"
|
|
27
|
+
|
|
28
|
+
doc.save()
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## API parity rule
|
|
32
|
+
|
|
33
|
+
This SDK mirrors python-docx's public API **exactly**. See `CLAUDE.md` for the full contract.
|
|
34
|
+
|
|
35
|
+
## Development
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
uv venv
|
|
39
|
+
uv pip install -e ".[dev]"
|
|
40
|
+
uv run pytest tests/ -x
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Environment variables
|
|
44
|
+
|
|
45
|
+
Required when connecting to Keryx (set by Athena backend when executing in Daytona):
|
|
46
|
+
|
|
47
|
+
- `SUPERDOC_COLLAB_TOKEN` — short-lived Keryx JWT
|
|
48
|
+
- `KERYX_WS_URL` — Keryx WebSocket base URL
|
|
49
|
+
- `ATHENA_WORKSPACE_ID` — workspace routing segment
|
|
50
|
+
|
|
51
|
+
## License
|
|
52
|
+
|
|
53
|
+
MIT
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""athena-python-docx — drop-in replacement for python-docx.
|
|
2
|
+
|
|
3
|
+
Calls translate into Superdoc SDK operations against a Keryx Y.Doc.
|
|
4
|
+
See CLAUDE.md for the API parity contract.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
__version__ = "0.1.0"
|
|
10
|
+
|
|
11
|
+
from docx.api import Document
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"Document",
|
|
15
|
+
"__version__",
|
|
16
|
+
]
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""Sync→async bridge. Runs a single event loop in a background thread
|
|
2
|
+
and dispatches coroutines from sync callers.
|
|
3
|
+
|
|
4
|
+
This is the only place in the SDK that uses threading.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
import threading
|
|
11
|
+
from typing import Any, Coroutine, TypeVar
|
|
12
|
+
|
|
13
|
+
T = TypeVar("T")
|
|
14
|
+
|
|
15
|
+
# Default per-op timeout. Individual Superdoc SDK calls should complete well
|
|
16
|
+
# under this — the value is a safety net so a WebSocket stall or Keryx
|
|
17
|
+
# downtime surfaces a TimeoutError instead of wedging the calling thread
|
|
18
|
+
# until the outer Daytona INSTRUCTION_TIMEOUT_SECONDS kill.
|
|
19
|
+
DEFAULT_OP_TIMEOUT_SECONDS: float = 60.0
|
|
20
|
+
|
|
21
|
+
_loop: asyncio.AbstractEventLoop | None = None
|
|
22
|
+
_thread: threading.Thread | None = None
|
|
23
|
+
_lock = threading.Lock()
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _ensure_loop() -> asyncio.AbstractEventLoop:
|
|
27
|
+
"""Return the persistent background event loop. Starts it lazily."""
|
|
28
|
+
global _loop, _thread
|
|
29
|
+
|
|
30
|
+
with _lock:
|
|
31
|
+
if _loop is not None and _loop.is_running():
|
|
32
|
+
return _loop
|
|
33
|
+
|
|
34
|
+
loop = asyncio.new_event_loop()
|
|
35
|
+
loop_ready = threading.Event()
|
|
36
|
+
|
|
37
|
+
def _run() -> None:
|
|
38
|
+
asyncio.set_event_loop(loop)
|
|
39
|
+
loop_ready.set()
|
|
40
|
+
loop.run_forever()
|
|
41
|
+
|
|
42
|
+
thread = threading.Thread(
|
|
43
|
+
target=_run,
|
|
44
|
+
name="docx-sdk-loop",
|
|
45
|
+
daemon=True,
|
|
46
|
+
)
|
|
47
|
+
thread.start()
|
|
48
|
+
loop_ready.wait(timeout=5)
|
|
49
|
+
_loop = loop
|
|
50
|
+
_thread = thread
|
|
51
|
+
return loop
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def run_sync(
|
|
55
|
+
coro: Coroutine[Any, Any, T],
|
|
56
|
+
*,
|
|
57
|
+
timeout: float | None = DEFAULT_OP_TIMEOUT_SECONDS,
|
|
58
|
+
) -> T:
|
|
59
|
+
"""Run a coroutine on the background loop and block until it returns.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
coro: The coroutine to execute.
|
|
63
|
+
timeout: Maximum seconds to wait. ``None`` blocks indefinitely
|
|
64
|
+
(use sparingly — prefer the default so stalls surface early).
|
|
65
|
+
|
|
66
|
+
Raises:
|
|
67
|
+
concurrent.futures.TimeoutError: if the coroutine does not
|
|
68
|
+
complete within `timeout` seconds.
|
|
69
|
+
The coroutine's exception, re-raised in the calling thread.
|
|
70
|
+
"""
|
|
71
|
+
loop = _ensure_loop()
|
|
72
|
+
future = asyncio.run_coroutine_threadsafe(coro, loop)
|
|
73
|
+
return future.result(timeout=timeout)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def shutdown() -> None:
|
|
77
|
+
"""Stop the background event loop. For test teardown."""
|
|
78
|
+
global _loop, _thread
|
|
79
|
+
with _lock:
|
|
80
|
+
if _loop is None:
|
|
81
|
+
return
|
|
82
|
+
_loop.call_soon_threadsafe(_loop.stop)
|
|
83
|
+
if _thread is not None:
|
|
84
|
+
_thread.join(timeout=5)
|
|
85
|
+
_loop = None
|
|
86
|
+
_thread = None
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""The `Document` factory — matches python-docx's `docx.Document(path)`.
|
|
2
|
+
|
|
3
|
+
python-docx signature:
|
|
4
|
+
Document(docx=None) -> Document
|
|
5
|
+
|
|
6
|
+
Our signature deviates from the path-based one because we don't open
|
|
7
|
+
.docx files from disk — we open Y.Doc assets from Keryx. The parameter
|
|
8
|
+
is reused: you pass an asset_id string where python-docx would take a
|
|
9
|
+
file path.
|
|
10
|
+
|
|
11
|
+
If the caller truly needs to load a local .docx (rare inside Daytona),
|
|
12
|
+
they can upload it first via the apps/api upload route (Phase 2+) or
|
|
13
|
+
use the existing `replaceFile` flow from the Olympus side.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
from docx.document import Document as _Document
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def Document(docx: str | None = None) -> _Document:
|
|
22
|
+
"""Open a Word document for editing.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
docx: Athena asset_id of the SuperDoc document to open.
|
|
26
|
+
In python-docx this takes a file path; here it takes
|
|
27
|
+
an asset_id. None is not supported in Phase 1.
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
A Document instance bound to the asset.
|
|
31
|
+
|
|
32
|
+
Raises:
|
|
33
|
+
ValueError: if `docx` is None or empty.
|
|
34
|
+
"""
|
|
35
|
+
if not docx:
|
|
36
|
+
raise ValueError(
|
|
37
|
+
"athena-python-docx requires an asset_id. "
|
|
38
|
+
"Pass the SuperDoc asset ID as the first argument: "
|
|
39
|
+
"Document('asset_xxx...')",
|
|
40
|
+
)
|
|
41
|
+
return _Document(asset_id=docx)
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
"""Superdoc SDK session wrapper — auth, open/close lifecycle, error mapping.
|
|
2
|
+
|
|
3
|
+
This module is the only place in the SDK that talks to `superdoc_sdk`
|
|
4
|
+
directly. All other classes (Document, Paragraph, Run, Table) go
|
|
5
|
+
through a `Session` instance.
|
|
6
|
+
|
|
7
|
+
Environment variables expected inside Daytona:
|
|
8
|
+
SUPERDOC_COLLAB_TOKEN: short-lived Keryx JWT (signed by Athena backend)
|
|
9
|
+
KERYX_WS_URL: Keryx WebSocket base URL (e.g. "wss://keryx.athena...")
|
|
10
|
+
ATHENA_WORKSPACE_ID: workspace routing path segment
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import asyncio
|
|
16
|
+
import os
|
|
17
|
+
from contextlib import asynccontextmanager
|
|
18
|
+
from typing import TYPE_CHECKING, Any
|
|
19
|
+
|
|
20
|
+
from docx.errors import (
|
|
21
|
+
AuthenticationError,
|
|
22
|
+
DocumentClosedError,
|
|
23
|
+
SessionError,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
if TYPE_CHECKING:
|
|
27
|
+
from collections.abc import AsyncIterator
|
|
28
|
+
|
|
29
|
+
_TOKEN_ENV = "SUPERDOC_COLLAB_TOKEN" # noqa: S105
|
|
30
|
+
_WS_URL_ENV = "KERYX_WS_URL"
|
|
31
|
+
_WORKSPACE_ENV = "ATHENA_WORKSPACE_ID"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _log_info(msg: str) -> None:
|
|
35
|
+
"""Minimal stderr logger. We avoid loguru here to keep the SDK
|
|
36
|
+
runtime dependency tree small — this code runs inside Daytona sandboxes.
|
|
37
|
+
"""
|
|
38
|
+
import sys
|
|
39
|
+
|
|
40
|
+
print(f"[docx-sdk] {msg}", file=sys.stderr)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _log_warning(msg: str) -> None:
|
|
44
|
+
import sys
|
|
45
|
+
|
|
46
|
+
print(f"[docx-sdk] WARN: {msg}", file=sys.stderr)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class Session:
|
|
50
|
+
"""Manages the lifecycle of an AsyncSuperDocClient session.
|
|
51
|
+
|
|
52
|
+
Lifecycle:
|
|
53
|
+
create Session (does NOT open yet) → first op triggers open() →
|
|
54
|
+
subsequent ops reuse the same client → save() flushes → close()
|
|
55
|
+
disposes.
|
|
56
|
+
|
|
57
|
+
Thread safety: NOT thread-safe. Each Document should have its own
|
|
58
|
+
Session. The sync facade serializes all calls through a single
|
|
59
|
+
event-loop thread (see _batching.py).
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
def __init__(
|
|
63
|
+
self,
|
|
64
|
+
*,
|
|
65
|
+
asset_id: str,
|
|
66
|
+
user_info: dict[str, str] | None = None,
|
|
67
|
+
) -> None:
|
|
68
|
+
self._asset_id: str = asset_id
|
|
69
|
+
self._user_info: dict[str, str] = user_info or {
|
|
70
|
+
"name": "Athena Agent",
|
|
71
|
+
"email": "agent@athenaintel.com",
|
|
72
|
+
}
|
|
73
|
+
self._client: Any | None = None
|
|
74
|
+
self._doc_handle: Any | None = None
|
|
75
|
+
self._opened: bool = False
|
|
76
|
+
self._closed: bool = False
|
|
77
|
+
|
|
78
|
+
@property
|
|
79
|
+
def asset_id(self) -> str:
|
|
80
|
+
return self._asset_id
|
|
81
|
+
|
|
82
|
+
@property
|
|
83
|
+
def is_open(self) -> bool:
|
|
84
|
+
return self._opened and not self._closed
|
|
85
|
+
|
|
86
|
+
async def open(self) -> None:
|
|
87
|
+
"""Open the Superdoc SDK session against Keryx.
|
|
88
|
+
|
|
89
|
+
Raises:
|
|
90
|
+
AuthenticationError: if SUPERDOC_COLLAB_TOKEN is missing/invalid.
|
|
91
|
+
SessionError: on any other open-time failure.
|
|
92
|
+
"""
|
|
93
|
+
if self._closed:
|
|
94
|
+
raise DocumentClosedError(
|
|
95
|
+
f"Session for asset {self._asset_id} was already closed.",
|
|
96
|
+
)
|
|
97
|
+
if self._opened:
|
|
98
|
+
return
|
|
99
|
+
|
|
100
|
+
token: str | None = os.environ.get(_TOKEN_ENV)
|
|
101
|
+
ws_url: str | None = os.environ.get(_WS_URL_ENV)
|
|
102
|
+
workspace_id: str | None = os.environ.get(_WORKSPACE_ENV)
|
|
103
|
+
|
|
104
|
+
if not token:
|
|
105
|
+
raise AuthenticationError(
|
|
106
|
+
f"Missing environment variable {_TOKEN_ENV}. "
|
|
107
|
+
"The Athena backend must mint a short-lived Keryx JWT "
|
|
108
|
+
"and inject it into the Daytona sandbox.",
|
|
109
|
+
)
|
|
110
|
+
if not ws_url:
|
|
111
|
+
raise SessionError(
|
|
112
|
+
f"Missing environment variable {_WS_URL_ENV}.",
|
|
113
|
+
)
|
|
114
|
+
if not workspace_id:
|
|
115
|
+
raise SessionError(
|
|
116
|
+
f"Missing environment variable {_WORKSPACE_ENV}.",
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
# Lazy import so `import docx` doesn't pay for superdoc_sdk init cost
|
|
120
|
+
from superdoc import AsyncSuperDocClient
|
|
121
|
+
from superdoc.generated.client import (
|
|
122
|
+
DocOpenParams,
|
|
123
|
+
DocOpenParamsCollaborationVariant1,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
collab_base_url: str = f"{ws_url}/ws/{workspace_id}"
|
|
127
|
+
self._client = AsyncSuperDocClient(
|
|
128
|
+
user=self._user_info,
|
|
129
|
+
env={_TOKEN_ENV: token},
|
|
130
|
+
request_timeout_ms=30_000,
|
|
131
|
+
watchdog_timeout_ms=60_000,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
collab_config: DocOpenParamsCollaborationVariant1 = {
|
|
135
|
+
"url": collab_base_url,
|
|
136
|
+
"documentId": self._asset_id,
|
|
137
|
+
"tokenEnv": _TOKEN_ENV,
|
|
138
|
+
"providerType": "y-websocket",
|
|
139
|
+
}
|
|
140
|
+
open_params: DocOpenParams = {"collaboration": collab_config}
|
|
141
|
+
|
|
142
|
+
try:
|
|
143
|
+
self._doc_handle = await self._client.open(open_params)
|
|
144
|
+
except Exception as e:
|
|
145
|
+
msg: str = str(e).lower()
|
|
146
|
+
if "401" in msg or "unauthorized" in msg or "forbidden" in msg:
|
|
147
|
+
raise AuthenticationError(
|
|
148
|
+
f"Keryx rejected the collab token: {e}",
|
|
149
|
+
) from e
|
|
150
|
+
raise SessionError(
|
|
151
|
+
f"Failed to open Superdoc session for {self._asset_id}: {e}",
|
|
152
|
+
) from e
|
|
153
|
+
|
|
154
|
+
self._opened = True
|
|
155
|
+
_log_info(f"Opened {self._asset_id}")
|
|
156
|
+
|
|
157
|
+
@property
|
|
158
|
+
def doc(self) -> Any:
|
|
159
|
+
"""Return the opened Superdoc doc handle.
|
|
160
|
+
|
|
161
|
+
Lazy-opens on first access. Blocks on the event-loop thread
|
|
162
|
+
via the sync bridge; do not call this from inside a coroutine.
|
|
163
|
+
"""
|
|
164
|
+
if not self._opened:
|
|
165
|
+
raise SessionError(
|
|
166
|
+
"Session not yet opened; call await session.open() first "
|
|
167
|
+
"or use the sync facade via Document().",
|
|
168
|
+
)
|
|
169
|
+
if self._closed:
|
|
170
|
+
raise DocumentClosedError(
|
|
171
|
+
f"Session for asset {self._asset_id} is closed.",
|
|
172
|
+
)
|
|
173
|
+
return self._doc_handle
|
|
174
|
+
|
|
175
|
+
async def save(self, *, in_place: bool = True) -> None:
|
|
176
|
+
"""Flush pending mutations to Keryx and persist.
|
|
177
|
+
|
|
178
|
+
The AsyncSuperDocClient writes over WebSocket; we add a short
|
|
179
|
+
flush delay (matches superdoc_write_utils.py:227-237) to
|
|
180
|
+
ensure updates land before dispose().
|
|
181
|
+
"""
|
|
182
|
+
if not self._opened:
|
|
183
|
+
raise SessionError("Cannot save a session that was never opened.")
|
|
184
|
+
if self._closed:
|
|
185
|
+
raise DocumentClosedError(f"Session {self._asset_id} is closed.")
|
|
186
|
+
|
|
187
|
+
await self._doc_handle.save({"inPlace": in_place})
|
|
188
|
+
_log_info(f"Saved {self._asset_id}")
|
|
189
|
+
|
|
190
|
+
async def close(self) -> None:
|
|
191
|
+
"""Close the Superdoc session. Idempotent."""
|
|
192
|
+
if self._closed:
|
|
193
|
+
return
|
|
194
|
+
|
|
195
|
+
if self._opened and self._doc_handle is not None:
|
|
196
|
+
try:
|
|
197
|
+
# Match the 1-second flush delay from superdoc_write_utils
|
|
198
|
+
# before tearing down the WebSocket
|
|
199
|
+
await asyncio.sleep(1)
|
|
200
|
+
await self._doc_handle.close({"discard": True})
|
|
201
|
+
except Exception as close_err:
|
|
202
|
+
_log_warning(
|
|
203
|
+
f"Doc close failed for {self._asset_id}: {close_err}",
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
if self._client is not None:
|
|
207
|
+
try:
|
|
208
|
+
await self._client.dispose()
|
|
209
|
+
except Exception as dispose_err:
|
|
210
|
+
_log_warning(
|
|
211
|
+
f"Client dispose failed for {self._asset_id}: {dispose_err}",
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
self._closed = True
|
|
215
|
+
_log_info(f"Closed {self._asset_id}")
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
@asynccontextmanager
|
|
219
|
+
async def open_session(
|
|
220
|
+
*,
|
|
221
|
+
asset_id: str,
|
|
222
|
+
user_info: dict[str, str] | None = None,
|
|
223
|
+
) -> "AsyncIterator[Session]":
|
|
224
|
+
"""Async context manager for a Session. Yields an opened session."""
|
|
225
|
+
session = Session(asset_id=asset_id, user_info=user_info)
|
|
226
|
+
try:
|
|
227
|
+
await session.open()
|
|
228
|
+
yield session
|
|
229
|
+
finally:
|
|
230
|
+
await session.close()
|