athena-python-docx 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
docx/document.py ADDED
@@ -0,0 +1,335 @@
1
+ """Document class — mirrors python-docx's Document.
2
+
3
+ Phase 1 supported methods:
4
+ .paragraphs -> list[Paragraph] (snapshot on access)
5
+ .tables -> list[Table]
6
+ .add_paragraph(text, style=None) -> Paragraph
7
+ .add_heading(text, level=1) -> Paragraph
8
+ .add_table(rows, cols, style=None) -> Table
9
+ .add_picture(image_path_or_stream, width=None, height=None) -> None
10
+ .add_page_break() -> None
11
+ .save(path=None) -> None (path ignored; always in-place)
12
+ .close() -> None
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import base64
18
+ import io
19
+ import sys
20
+ from typing import TYPE_CHECKING, BinaryIO
21
+
22
+ from docx._batching import run_sync
23
+ from docx.client import Session
24
+ from docx.errors import DocumentClosedError, ValidationError
25
+
26
+ if TYPE_CHECKING:
27
+ from docx.shared import Emu
28
+ from docx.table import Table
29
+ from docx.text.paragraph import Paragraph
30
+
31
+
32
+ def _log_warn(msg: str) -> None:
33
+ print(f"[docx-sdk] WARN: {msg}", file=sys.stderr)
34
+
35
+
36
+ class Document:
37
+ """A Word document backed by a Superdoc/Keryx Y.Doc.
38
+
39
+ Construct via :func:`docx.api.Document` (recommended) — do not
40
+ instantiate this class directly in user code.
41
+ """
42
+
43
+ def __init__(self, *, asset_id: str) -> None:
44
+ self._session: Session = Session(asset_id=asset_id)
45
+ self._saved: bool = False
46
+ self._closed: bool = False
47
+
48
+ # ---- Public properties ----
49
+
50
+ @property
51
+ def paragraphs(self) -> list["Paragraph"]:
52
+ """Return all paragraphs in document order.
53
+
54
+ python-docx returns a live list; we return a snapshot.
55
+ For a live-ish list, call this again.
56
+ """
57
+ from docx.text.paragraph import Paragraph
58
+
59
+ self._ensure_open()
60
+ blocks: list[dict] = run_sync(
61
+ self._session.doc.blocks.list({"type": "paragraph"}),
62
+ )
63
+ return [
64
+ Paragraph(session=self._session, node_id=b["nodeId"])
65
+ for b in blocks
66
+ ]
67
+
68
+ @property
69
+ def tables(self) -> list["Table"]:
70
+ """Return all tables in document order."""
71
+ from docx.table import Table
72
+
73
+ self._ensure_open()
74
+ find_result: dict = run_sync(
75
+ self._session.doc.find({"type": "table"}),
76
+ )
77
+ items: list[dict] = find_result.get("items", [])
78
+ return [
79
+ Table(session=self._session, node_id=item["address"]["nodeId"])
80
+ for item in items
81
+ ]
82
+
83
+ # ---- Append operations ----
84
+
85
+ def add_paragraph(
86
+ self,
87
+ text: str = "",
88
+ style: str | None = None,
89
+ ) -> "Paragraph":
90
+ """Append a new paragraph at the end of the document.
91
+
92
+ Args:
93
+ text: The paragraph text.
94
+ style: The paragraph style name (e.g. "Heading 1", "Normal").
95
+ If None, uses the default style.
96
+
97
+ Returns:
98
+ The newly-created Paragraph.
99
+ """
100
+ from docx.text.paragraph import Paragraph
101
+
102
+ self._ensure_open()
103
+ md: str
104
+ if style and style.lower().startswith("heading"):
105
+ try:
106
+ level: int = int(style.rsplit(" ", 1)[-1])
107
+ except ValueError:
108
+ level = 1
109
+ md = f"{'#' * level} {text}\n\n"
110
+ else:
111
+ md = f"{text}\n\n"
112
+
113
+ result: dict = run_sync(
114
+ self._session.doc.insert(
115
+ {"value": md, "type": "markdown"},
116
+ ),
117
+ )
118
+ node_id: str = _extract_inserted_node_id(result, expected_type="paragraph")
119
+ if not node_id:
120
+ raise RuntimeError(
121
+ f"Superdoc did not return a nodeId for add_paragraph: {result!r}",
122
+ )
123
+ return Paragraph(session=self._session, node_id=node_id)
124
+
125
+ def add_heading(
126
+ self,
127
+ text: str = "",
128
+ level: int = 1,
129
+ ) -> "Paragraph":
130
+ """Append a heading (convenience over add_paragraph)."""
131
+ if not 0 <= level <= 9:
132
+ raise ValidationError(
133
+ f"level must be in 0..9; got {level}",
134
+ )
135
+ style: str = "Title" if level == 0 else f"Heading {level}"
136
+ return self.add_paragraph(text=text, style=style)
137
+
138
+ def add_table(
139
+ self,
140
+ rows: int,
141
+ cols: int,
142
+ style: str | None = None,
143
+ ) -> "Table":
144
+ """Append a table with the given dimensions."""
145
+ from docx.table import Table
146
+
147
+ self._ensure_open()
148
+ if rows < 1 or cols < 1:
149
+ raise ValidationError(
150
+ f"rows and cols must be >= 1; got rows={rows} cols={cols}",
151
+ )
152
+
153
+ result: dict = run_sync(
154
+ self._session.doc.create.table(
155
+ {"rows": rows, "cols": cols},
156
+ ),
157
+ )
158
+ result_data: dict = (
159
+ result.get("result", {}) if isinstance(result, dict) else {}
160
+ )
161
+ table_info: dict = (
162
+ result_data.get("table", {}) if isinstance(result_data, dict) else {}
163
+ )
164
+ node_id: str = str(table_info.get("nodeId", ""))
165
+ if not node_id:
166
+ raise RuntimeError(
167
+ f"Superdoc did not return a nodeId for add_table: {result!r}",
168
+ )
169
+
170
+ if style:
171
+ run_sync(
172
+ self._session.doc.tables.set_style(
173
+ {"nodeId": node_id, "styleId": style},
174
+ ),
175
+ )
176
+ # Re-fetch nodeId — set_style may rotate it. Prefer matching the
177
+ # original id so concurrent table insertions from other editors
178
+ # don't cause us to pick up the wrong table. Only fall back to
179
+ # last-in-list (with a warning) when the original id is gone.
180
+ refetch: dict = run_sync(
181
+ self._session.doc.find({"type": "table"}),
182
+ )
183
+ items: list[dict] = refetch.get("items", [])
184
+ matched: bool = any(
185
+ isinstance(item, dict)
186
+ and item.get("address", {}).get("nodeId") == node_id
187
+ for item in items
188
+ )
189
+ if not matched and items:
190
+ last: dict = items[-1] if isinstance(items[-1], dict) else {}
191
+ new_id: str = str(last.get("address", {}).get("nodeId", ""))
192
+ if new_id:
193
+ import sys
194
+ print(
195
+ f"[docx-sdk] WARN: add_table nodeId rotated after "
196
+ f"set_style: {node_id} -> {new_id}",
197
+ file=sys.stderr,
198
+ )
199
+ node_id = new_id
200
+
201
+ return Table(session=self._session, node_id=node_id)
202
+
203
+ def add_picture(
204
+ self,
205
+ image_path_or_stream: str | BinaryIO,
206
+ width: "Emu | int | None" = None,
207
+ height: "Emu | int | None" = None,
208
+ ) -> None:
209
+ """Append an inline image at the end of the document."""
210
+ self._ensure_open()
211
+
212
+ image_bytes: bytes
213
+ content_type: str = "image/png"
214
+ if isinstance(image_path_or_stream, str):
215
+ with open(image_path_or_stream, "rb") as f:
216
+ image_bytes = f.read()
217
+ lower: str = image_path_or_stream.lower()
218
+ if lower.endswith((".jpg", ".jpeg")):
219
+ content_type = "image/jpeg"
220
+ elif lower.endswith(".gif"):
221
+ content_type = "image/gif"
222
+ elif lower.endswith(".webp"):
223
+ content_type = "image/webp"
224
+ elif isinstance(image_path_or_stream, io.IOBase):
225
+ image_bytes = image_path_or_stream.read()
226
+ else:
227
+ raise TypeError(
228
+ "Expected path str or binary stream, "
229
+ f"got {type(image_path_or_stream).__name__}",
230
+ )
231
+
232
+ b64: str = base64.b64encode(image_bytes).decode("ascii")
233
+ data_uri: str = f"data:{content_type};base64,{b64}"
234
+
235
+ # Default 6-inch wide, proportional 4.5-inch tall at 96 DPI
236
+ # (python-docx uses image's intrinsic size; we don't have that here,
237
+ # so we default to a reasonable 6x4.5 inch rectangle)
238
+ w_px: float = _emu_to_px(width) if width is not None else 576.0
239
+ h_px: float = _emu_to_px(height) if height is not None else 432.0
240
+
241
+ run_sync(
242
+ self._session.doc.create.image(
243
+ {
244
+ "src": data_uri,
245
+ "size": {"width": w_px, "height": h_px},
246
+ "at": {"kind": "documentEnd"},
247
+ },
248
+ ),
249
+ )
250
+
251
+ def add_page_break(self) -> None:
252
+ """Append a page break at the end of the document."""
253
+ self._ensure_open()
254
+ # Use form-feed character; Superdoc's markdown parser converts
255
+ # this to a <w:br w:type="page"/> in export.
256
+ run_sync(
257
+ self._session.doc.insert(
258
+ {"value": "\f", "type": "text"},
259
+ ),
260
+ )
261
+
262
+ # ---- Lifecycle ----
263
+
264
+ def save(self, path: str | None = None) -> None: # noqa: ARG002
265
+ """Flush pending edits to Keryx.
266
+
267
+ `path` is accepted for python-docx parity but ignored — writes
268
+ are always in-place against the Y.Doc. Saving to a local file
269
+ is not supported in Phase 1 (use the Olympus UI's Export DOCX).
270
+ """
271
+ self._ensure_open()
272
+ run_sync(self._session.save(in_place=True))
273
+ self._saved = True
274
+
275
+ def close(self) -> None:
276
+ """Close the session. Not strictly needed (context-managed)."""
277
+ if self._closed:
278
+ return
279
+ try:
280
+ run_sync(self._session.close())
281
+ finally:
282
+ self._closed = True
283
+
284
+ def __enter__(self) -> "Document":
285
+ return self
286
+
287
+ def __exit__(self, exc_type, exc_val, exc_tb) -> None: # noqa: ANN001
288
+ if exc_type is None and not self._saved and self._session.is_open:
289
+ try:
290
+ self.save()
291
+ except Exception as e:
292
+ _log_warn(f"autosave failed for {self._session.asset_id}: {e}")
293
+ self.close()
294
+
295
+ # ---- Internals ----
296
+
297
+ def _ensure_open(self) -> None:
298
+ if self._closed:
299
+ raise DocumentClosedError(
300
+ f"Document {self._session.asset_id} is closed.",
301
+ )
302
+ if not self._session.is_open:
303
+ run_sync(self._session.open())
304
+
305
+
306
+ # ---- Module-level helpers ----
307
+
308
+ def _extract_inserted_node_id(result: dict, *, expected_type: str) -> str:
309
+ """Parse Superdoc's insert() response to find the new nodeId.
310
+
311
+ Superdoc rotates response shape across versions. Defensively probe.
312
+ """
313
+ if not isinstance(result, dict):
314
+ return ""
315
+ data_obj: object = result.get("result", {})
316
+ data: dict = data_obj if isinstance(data_obj, dict) else {}
317
+ nodes_obj: object = data.get("insertedNodes", data.get("nodes", []))
318
+ nodes: list = nodes_obj if isinstance(nodes_obj, list) else []
319
+ for node in nodes:
320
+ if not isinstance(node, dict):
321
+ continue
322
+ if node.get("type") == expected_type:
323
+ return str(node.get("nodeId", ""))
324
+ if nodes and isinstance(nodes[-1], dict):
325
+ return str(nodes[-1].get("nodeId", ""))
326
+ return ""
327
+
328
+
329
+ def _emu_to_px(emu: object) -> float:
330
+ """EMU → px at 96 DPI. 914400 EMU = 1 inch.
331
+
332
+ Accepts Emu/Inches/Pt subclasses (all of which are int) or plain int.
333
+ """
334
+ val: float = float(emu) if not hasattr(emu, "emu") else float(emu.emu) # type: ignore[union-attr]
335
+ return val / 914400.0 * 96.0
docx/enum/__init__.py ADDED
@@ -0,0 +1 @@
1
+ """Enum submodule — mirrors python-docx.enum."""
docx/enum/table.py ADDED
@@ -0,0 +1,15 @@
1
+ """Table-related enums — python-docx parity (docx.enum.table).
2
+
3
+ Phase 1 includes only WD_ROW_HEIGHT_RULE as a placeholder; Phase 2 will
4
+ expand to WD_CELL_VERTICAL_ALIGNMENT, WD_TABLE_ALIGNMENT, etc.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from enum import Enum
10
+
11
+
12
+ class WD_ROW_HEIGHT_RULE(Enum):
13
+ AUTO = "auto"
14
+ AT_LEAST = "atLeast"
15
+ EXACTLY = "exact"
docx/enum/text.py ADDED
@@ -0,0 +1,29 @@
1
+ """Paragraph alignment — python-docx parity (docx.enum.text).
2
+
3
+ python-docx exposes a richer set (DISTRIBUTE, THAI_JUSTIFY, etc.). Phase 1
4
+ implements the top 4 (LEFT, CENTER, RIGHT, JUSTIFY) — the others return
5
+ None from the getter and accept-but-no-op on the setter.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from enum import Enum
11
+
12
+
13
+ class WD_ALIGN_PARAGRAPH(Enum):
14
+ """Paragraph alignment. python-docx exposes these exact names."""
15
+
16
+ LEFT = "left"
17
+ CENTER = "center"
18
+ RIGHT = "right"
19
+ JUSTIFY = "justify"
20
+
21
+ def to_superdoc(self) -> str:
22
+ return self.value
23
+
24
+ @classmethod
25
+ def from_superdoc(cls, s: str) -> "WD_ALIGN_PARAGRAPH | None":
26
+ try:
27
+ return cls(s.lower())
28
+ except ValueError:
29
+ return None
docx/errors.py ADDED
@@ -0,0 +1,30 @@
1
+ """Exception types for athena-python-docx."""
2
+
3
+ from __future__ import annotations
4
+
5
+
6
+ class DocxError(Exception):
7
+ """Base class for all athena-python-docx errors."""
8
+
9
+
10
+ class SessionError(DocxError):
11
+ """Raised when the Superdoc SDK session cannot be established or used."""
12
+
13
+
14
+ class AuthenticationError(SessionError):
15
+ """Raised when Keryx rejects the collab token."""
16
+
17
+
18
+ class UnsupportedProviderError(SessionError):
19
+ """Raised when the asset is on ysweet instead of yhub.
20
+
21
+ Phase 1 only supports yhub; ysweet-routed assets must be migrated first.
22
+ """
23
+
24
+
25
+ class DocumentClosedError(DocxError):
26
+ """Raised when operating on a closed Document."""
27
+
28
+
29
+ class ValidationError(DocxError):
30
+ """Raised when SDK-level validation fails (bad args, out-of-range indices)."""
docx/shared.py ADDED
@@ -0,0 +1,81 @@
1
+ """Length and color types — python-docx parity.
2
+
3
+ python-docx defines these in docx.shared. We mirror the class surface,
4
+ but delegate underlying math (Inches→Emu, Pt→Emu) to simple formulas.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+
10
+ class Emu(int):
11
+ """English Metric Unit. 914400 EMU = 1 inch."""
12
+
13
+ def __new__(cls, value: int | float) -> "Emu":
14
+ return super().__new__(cls, int(value))
15
+
16
+ @property
17
+ def emu(self) -> int:
18
+ return int(self)
19
+
20
+ @property
21
+ def inches(self) -> float:
22
+ return int(self) / 914400
23
+
24
+ @property
25
+ def cm(self) -> float:
26
+ return int(self) / 360000
27
+
28
+ @property
29
+ def mm(self) -> float:
30
+ return int(self) / 36000
31
+
32
+ @property
33
+ def pt(self) -> float:
34
+ return int(self) / 12700
35
+
36
+
37
+ class Inches(Emu):
38
+ def __new__(cls, value: float) -> "Inches":
39
+ return super().__new__(cls, int(value * 914400)) # type: ignore[return-value]
40
+
41
+
42
+ class Pt(Emu):
43
+ def __new__(cls, value: float) -> "Pt":
44
+ return super().__new__(cls, int(value * 12700)) # type: ignore[return-value]
45
+
46
+
47
+ class Cm(Emu):
48
+ def __new__(cls, value: float) -> "Cm":
49
+ return super().__new__(cls, int(value * 360000)) # type: ignore[return-value]
50
+
51
+
52
+ class Mm(Emu):
53
+ def __new__(cls, value: float) -> "Mm":
54
+ return super().__new__(cls, int(value * 36000)) # type: ignore[return-value]
55
+
56
+
57
+ class RGBColor(tuple):
58
+ """24-bit RGB color. python-docx parity."""
59
+
60
+ def __new__(cls, r: int, g: int, b: int) -> "RGBColor":
61
+ if not all(0 <= v <= 255 for v in (r, g, b)):
62
+ raise ValueError(
63
+ f"RGBColor components must be 0..255; got ({r}, {g}, {b})",
64
+ )
65
+ return super().__new__(cls, (r, g, b)) # type: ignore[arg-type]
66
+
67
+ def __str__(self) -> str:
68
+ r, g, b = self
69
+ return f"{r:02X}{g:02X}{b:02X}"
70
+
71
+ def __repr__(self) -> str:
72
+ return f"RGBColor(0x{self[0]:02X}, 0x{self[1]:02X}, 0x{self[2]:02X})"
73
+
74
+ @classmethod
75
+ def from_string(cls, rgb_hex_str: str) -> "RGBColor":
76
+ s: str = rgb_hex_str.lstrip("#")
77
+ if len(s) != 6:
78
+ raise ValueError(
79
+ f"RGBColor.from_string expects 6 hex chars; got {rgb_hex_str!r}",
80
+ )
81
+ return cls(int(s[0:2], 16), int(s[2:4], 16), int(s[4:6], 16))