docx-editor 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,67 @@
1
+ """docx_editor - Pure Python Track Changes Library for Word Documents.
2
+
3
+ A standalone library for Word document track changes and comments,
4
+ without requiring Microsoft Word installed.
5
+
6
+ Example:
7
+ from docx_editor import Document
8
+
9
+ # Open and edit
10
+ doc = Document.open("contract.docx")
11
+ doc.replace("30 days", "60 days") # Tracked replacement
12
+ doc.insert_after("Section 5", "New clause") # Tracked insertion
13
+ doc.delete("obsolete text") # Tracked deletion
14
+
15
+ # Comments
16
+ doc.add_comment("Section 5", "Please review")
17
+ doc.reply_to_comment(comment_id=0, "Approved")
18
+
19
+ # Revision management
20
+ revisions = doc.list_revisions()
21
+ doc.accept_revision(revision_id=1)
22
+ doc.reject_all(author="OtherUser")
23
+
24
+ # Save and close
25
+ doc.save()
26
+ doc.close()
27
+ """
28
+
29
+ __version__ = "0.0.1"
30
+
31
+ from .comments import Comment
32
+ from .document import Document
33
+ from .exceptions import (
34
+ CommentError,
35
+ DocumentNotFoundError,
36
+ DocxEditError,
37
+ InvalidDocumentError,
38
+ MultipleNodesFoundError,
39
+ NodeNotFoundError,
40
+ RevisionError,
41
+ TextNotFoundError,
42
+ WorkspaceError,
43
+ WorkspaceExistsError,
44
+ WorkspaceSyncError,
45
+ XMLError,
46
+ )
47
+ from .track_changes import Revision
48
+
49
+ __all__ = [
50
+ # Main classes
51
+ "Document",
52
+ "Revision",
53
+ "Comment",
54
+ # Exceptions
55
+ "DocxEditError",
56
+ "DocumentNotFoundError",
57
+ "InvalidDocumentError",
58
+ "WorkspaceError",
59
+ "WorkspaceExistsError",
60
+ "WorkspaceSyncError",
61
+ "XMLError",
62
+ "NodeNotFoundError",
63
+ "MultipleNodesFoundError",
64
+ "RevisionError",
65
+ "CommentError",
66
+ "TextNotFoundError",
67
+ ]
@@ -0,0 +1,503 @@
1
+ """Comment management for docx_editor.
2
+
3
+ Provides CommentManager for creating and managing document comments.
4
+ """
5
+
6
+ import html
7
+ import shutil
8
+ from dataclasses import dataclass, field
9
+ from datetime import datetime, timezone
10
+ from pathlib import Path
11
+
12
+ from .exceptions import CommentError, TextNotFoundError
13
+ from .xml_editor import DocxXMLEditor, _generate_hex_id
14
+
15
+ # Path to template files
16
+ TEMPLATE_DIR = Path(__file__).parent / "ooxml" / "templates"
17
+
18
+
19
+ @dataclass
20
+ class Comment:
21
+ """Represents a document comment."""
22
+
23
+ id: int
24
+ text: str
25
+ author: str
26
+ date: datetime | None
27
+ resolved: bool = False
28
+ replies: list["Comment"] = field(default_factory=list)
29
+
30
+ def __repr__(self) -> str:
31
+ status = "[RESOLVED] " if self.resolved else ""
32
+ reply_count = f" ({len(self.replies)} replies)" if self.replies else ""
33
+ return f"Comment({self.id}: {status}'{self.text[:30]}...' by {self.author}{reply_count})"
34
+
35
+
36
+ class CommentManager:
37
+ """Manages comments in a Word document.
38
+
39
+ Handles the complex task of managing comments across 5 related XML files:
40
+ - comments.xml: Main comment content
41
+ - commentsExtended.xml: Threading information
42
+ - commentsIds.xml: Durable IDs
43
+ - commentsExtensible.xml: Extended properties
44
+ - document.xml: Comment range markers
45
+ """
46
+
47
+ def __init__(
48
+ self,
49
+ workspace_path: Path,
50
+ document_editor: DocxXMLEditor,
51
+ author: str,
52
+ initials: str,
53
+ ):
54
+ """Initialize with workspace path and document editor.
55
+
56
+ Args:
57
+ workspace_path: Path to the unpacked workspace folder
58
+ document_editor: DocxXMLEditor for word/document.xml
59
+ author: Author name for new comments
60
+ initials: Author initials for new comments
61
+ """
62
+ self.workspace_path = workspace_path
63
+ self.word_path = workspace_path / "word"
64
+ self.document_editor = document_editor
65
+ self.author = author
66
+ self.initials = initials
67
+
68
+ # Comment file paths
69
+ self.comments_path = self.word_path / "comments.xml"
70
+ self.comments_extended_path = self.word_path / "commentsExtended.xml"
71
+ self.comments_ids_path = self.word_path / "commentsIds.xml"
72
+ self.comments_extensible_path = self.word_path / "commentsExtensible.xml"
73
+
74
+ # Cache for lazy-loaded editors
75
+ self._editors: dict[str, DocxXMLEditor] = {}
76
+
77
+ # Load existing comments for reply support
78
+ self.existing_comments = self._load_existing_comments()
79
+ self.next_comment_id = self._get_next_comment_id()
80
+
81
+ def _get_editor(self, xml_path: Path) -> DocxXMLEditor:
82
+ """Get or create an editor for the specified XML file."""
83
+ path_str = str(xml_path)
84
+ if path_str not in self._editors:
85
+ self._editors[path_str] = DocxXMLEditor(
86
+ xml_path,
87
+ rsid=self.document_editor.rsid,
88
+ author=self.author,
89
+ initials=self.initials,
90
+ )
91
+ return self._editors[path_str]
92
+
93
+ def add_comment(self, anchor_text: str, comment_text: str) -> int:
94
+ """Add a comment anchored to specific text.
95
+
96
+ Args:
97
+ anchor_text: Text to attach the comment to
98
+ comment_text: The comment content
99
+
100
+ Returns:
101
+ The comment ID
102
+
103
+ Raises:
104
+ TextNotFoundError: If the anchor text is not found
105
+ """
106
+ # Find the anchor element
107
+ try:
108
+ elem = self.document_editor.get_node(tag="w:t", contains=anchor_text)
109
+ except Exception:
110
+ raise TextNotFoundError(f"Anchor text not found: '{anchor_text}'") from None
111
+
112
+ # Get the parent run and paragraph
113
+ run = elem.parentNode
114
+ while run and run.nodeName != "w:r":
115
+ run = run.parentNode
116
+
117
+ para = run
118
+ while para and para.nodeName != "w:p":
119
+ para = para.parentNode
120
+
121
+ if not run or not para:
122
+ raise CommentError("Could not find parent run/paragraph")
123
+
124
+ comment_id = self.next_comment_id
125
+ para_id = _generate_hex_id()
126
+ durable_id = _generate_hex_id()
127
+ timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
128
+
129
+ # Add comment range markers to document.xml
130
+ self.document_editor.insert_before(run, self._comment_range_start_xml(comment_id))
131
+ self.document_editor.append_to(para, self._comment_range_end_xml(comment_id))
132
+
133
+ # Add to all comment XML files
134
+ self._add_to_comments_xml(comment_id, para_id, comment_text, timestamp)
135
+ self._add_to_comments_extended_xml(para_id, parent_para_id=None)
136
+ self._add_to_comments_ids_xml(para_id, durable_id)
137
+ self._add_to_comments_extensible_xml(durable_id)
138
+
139
+ # Track for reply support
140
+ self.existing_comments[comment_id] = {"para_id": para_id}
141
+ self.next_comment_id += 1
142
+
143
+ return comment_id
144
+
145
+ def reply_to_comment(self, parent_comment_id: int, reply_text: str) -> int:
146
+ """Add a reply to an existing comment.
147
+
148
+ Args:
149
+ parent_comment_id: The ID of the comment to reply to
150
+ reply_text: The reply content
151
+
152
+ Returns:
153
+ The new comment ID for the reply
154
+
155
+ Raises:
156
+ CommentError: If the parent comment is not found
157
+ """
158
+ if parent_comment_id not in self.existing_comments:
159
+ raise CommentError(f"Parent comment with id={parent_comment_id} not found")
160
+
161
+ parent_info = self.existing_comments[parent_comment_id]
162
+ comment_id = self.next_comment_id
163
+ para_id = _generate_hex_id()
164
+ durable_id = _generate_hex_id()
165
+ timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
166
+
167
+ # Find parent comment markers in document.xml
168
+ parent_start_elem = self.document_editor.get_node(
169
+ tag="w:commentRangeStart", attrs={"w:id": str(parent_comment_id)}
170
+ )
171
+ parent_ref_elem = self.document_editor.get_node(
172
+ tag="w:commentReference", attrs={"w:id": str(parent_comment_id)}
173
+ )
174
+
175
+ # Add reply markers after parent markers
176
+ self.document_editor.insert_after(parent_start_elem, self._comment_range_start_xml(comment_id))
177
+
178
+ parent_ref_run = parent_ref_elem.parentNode
179
+ self.document_editor.insert_after(parent_ref_run, f'<w:commentRangeEnd w:id="{comment_id}"/>')
180
+ self.document_editor.insert_after(parent_ref_run, self._comment_ref_run_xml(comment_id))
181
+
182
+ # Add to all comment XML files
183
+ self._add_to_comments_xml(comment_id, para_id, reply_text, timestamp)
184
+ self._add_to_comments_extended_xml(para_id, parent_para_id=parent_info["para_id"])
185
+ self._add_to_comments_ids_xml(para_id, durable_id)
186
+ self._add_to_comments_extensible_xml(durable_id)
187
+
188
+ # Track for further replies
189
+ self.existing_comments[comment_id] = {"para_id": para_id}
190
+ self.next_comment_id += 1
191
+
192
+ return comment_id
193
+
194
+ def list_comments(self, author: str | None = None) -> list[Comment]:
195
+ """List all comments in the document.
196
+
197
+ Args:
198
+ author: If provided, filter by author name
199
+
200
+ Returns:
201
+ List of Comment objects (with replies nested)
202
+ """
203
+ if not self.comments_path.exists():
204
+ return []
205
+
206
+ editor = self._get_editor(self.comments_path)
207
+ comments_dict: dict[int, Comment] = {}
208
+ parent_map: dict[str, str] = {} # para_id -> parent_para_id
209
+
210
+ # Build parent map from commentsExtended.xml
211
+ if self.comments_extended_path.exists():
212
+ ext_editor = self._get_editor(self.comments_extended_path)
213
+ for ex_elem in ext_editor.dom.getElementsByTagName("w15:commentEx"):
214
+ para_id = ex_elem.getAttribute("w15:paraId")
215
+ parent_para_id = ex_elem.getAttribute("w15:paraIdParent")
216
+ if para_id:
217
+ parent_map[para_id] = parent_para_id
218
+
219
+ # Parse all comments
220
+ for comment_elem in editor.dom.getElementsByTagName("w:comment"):
221
+ comment = self._parse_comment(comment_elem)
222
+ if comment and (author is None or comment.author == author):
223
+ # Check if resolved
224
+ para_id = self._get_comment_para_id(comment_elem)
225
+ if para_id and self.comments_extended_path.exists():
226
+ ext_editor = self._get_editor(self.comments_extended_path)
227
+ for ex_elem in ext_editor.dom.getElementsByTagName("w15:commentEx"):
228
+ if ex_elem.getAttribute("w15:paraId") == para_id:
229
+ comment.resolved = ex_elem.getAttribute("w15:done") == "1"
230
+ break
231
+
232
+ comments_dict[comment.id] = comment
233
+
234
+ # Build reply tree
235
+ para_to_id: dict[str, int] = {}
236
+ for comment_id, info in self.existing_comments.items():
237
+ para_to_id[info["para_id"]] = comment_id
238
+
239
+ # Nest replies
240
+ root_comments = []
241
+ for comment_id, comment in comments_dict.items():
242
+ para_id = self.existing_comments.get(comment_id, {}).get("para_id")
243
+ if para_id:
244
+ parent_para = parent_map.get(para_id)
245
+ if parent_para and parent_para in para_to_id:
246
+ parent_id = para_to_id[parent_para]
247
+ if parent_id in comments_dict:
248
+ comments_dict[parent_id].replies.append(comment)
249
+ continue
250
+ root_comments.append(comment)
251
+
252
+ return sorted(root_comments, key=lambda c: c.id)
253
+
254
+ def resolve_comment(self, comment_id: int) -> bool:
255
+ """Mark a comment as resolved.
256
+
257
+ Args:
258
+ comment_id: The comment ID to resolve
259
+
260
+ Returns:
261
+ True if resolved, False if not found
262
+ """
263
+ if comment_id not in self.existing_comments:
264
+ return False
265
+
266
+ para_id = self.existing_comments[comment_id]["para_id"]
267
+
268
+ if not self.comments_extended_path.exists():
269
+ return False
270
+
271
+ editor = self._get_editor(self.comments_extended_path)
272
+ for ex_elem in editor.dom.getElementsByTagName("w15:commentEx"):
273
+ if ex_elem.getAttribute("w15:paraId") == para_id:
274
+ ex_elem.setAttribute("w15:done", "1")
275
+ return True
276
+
277
+ return False
278
+
279
+ def delete_comment(self, comment_id: int) -> bool:
280
+ """Delete a comment from the document.
281
+
282
+ Args:
283
+ comment_id: The comment ID to delete
284
+
285
+ Returns:
286
+ True if deleted, False if not found
287
+ """
288
+ if comment_id not in self.existing_comments:
289
+ return False
290
+
291
+ para_id = self.existing_comments[comment_id]["para_id"]
292
+
293
+ # Remove from document.xml
294
+ try:
295
+ range_start = self.document_editor.get_node(tag="w:commentRangeStart", attrs={"w:id": str(comment_id)})
296
+ range_start.parentNode.removeChild(range_start)
297
+ except Exception:
298
+ pass
299
+
300
+ try:
301
+ range_end = self.document_editor.get_node(tag="w:commentRangeEnd", attrs={"w:id": str(comment_id)})
302
+ range_end.parentNode.removeChild(range_end)
303
+ except Exception:
304
+ pass
305
+
306
+ try:
307
+ ref = self.document_editor.get_node(tag="w:commentReference", attrs={"w:id": str(comment_id)})
308
+ # Remove the parent run containing the reference
309
+ if ref.parentNode and ref.parentNode.nodeName == "w:r":
310
+ ref.parentNode.parentNode.removeChild(ref.parentNode)
311
+ else:
312
+ ref.parentNode.removeChild(ref)
313
+ except Exception:
314
+ pass
315
+
316
+ # Remove from comments.xml
317
+ if self.comments_path.exists():
318
+ editor = self._get_editor(self.comments_path)
319
+ for comment_elem in editor.dom.getElementsByTagName("w:comment"):
320
+ if comment_elem.getAttribute("w:id") == str(comment_id):
321
+ comment_elem.parentNode.removeChild(comment_elem)
322
+ break
323
+
324
+ # Remove from commentsExtended.xml
325
+ if self.comments_extended_path.exists():
326
+ editor = self._get_editor(self.comments_extended_path)
327
+ for ex_elem in editor.dom.getElementsByTagName("w15:commentEx"):
328
+ if ex_elem.getAttribute("w15:paraId") == para_id:
329
+ ex_elem.parentNode.removeChild(ex_elem)
330
+ break
331
+
332
+ # Remove from commentsIds.xml
333
+ if self.comments_ids_path.exists():
334
+ editor = self._get_editor(self.comments_ids_path)
335
+ for id_elem in editor.dom.getElementsByTagName("w16cid:commentId"):
336
+ if id_elem.getAttribute("w16cid:paraId") == para_id:
337
+ id_elem.parentNode.removeChild(id_elem)
338
+ break
339
+
340
+ # Remove from commentsExtensible.xml
341
+ if self.comments_extensible_path.exists():
342
+ # Need durable_id, which is in commentsIds.xml - already removed
343
+ # Just leave it, or we'd need to track durable_id
344
+ pass
345
+
346
+ del self.existing_comments[comment_id]
347
+ return True
348
+
349
+ def save_all(self) -> None:
350
+ """Save all modified XML files."""
351
+ for editor in self._editors.values():
352
+ editor.save()
353
+
354
+ # ==================== Private: Loading ====================
355
+
356
+ def _get_next_comment_id(self) -> int:
357
+ """Get the next available comment ID."""
358
+ if not self.comments_path.exists():
359
+ return 0
360
+
361
+ editor = self._get_editor(self.comments_path)
362
+ max_id = -1
363
+ for comment_elem in editor.dom.getElementsByTagName("w:comment"):
364
+ comment_id = comment_elem.getAttribute("w:id")
365
+ if comment_id:
366
+ try:
367
+ max_id = max(max_id, int(comment_id))
368
+ except ValueError:
369
+ pass
370
+ return max_id + 1
371
+
372
+ def _load_existing_comments(self) -> dict[int, dict]:
373
+ """Load existing comments for reply support."""
374
+ if not self.comments_path.exists():
375
+ return {}
376
+
377
+ editor = self._get_editor(self.comments_path)
378
+ existing = {}
379
+
380
+ for comment_elem in editor.dom.getElementsByTagName("w:comment"):
381
+ comment_id = comment_elem.getAttribute("w:id")
382
+ if not comment_id:
383
+ continue
384
+
385
+ para_id = self._get_comment_para_id(comment_elem)
386
+ if not para_id:
387
+ continue
388
+
389
+ existing[int(comment_id)] = {"para_id": para_id}
390
+
391
+ return existing
392
+
393
+ def _get_comment_para_id(self, comment_elem) -> str | None:
394
+ """Get the para_id from a comment element."""
395
+ for p_elem in comment_elem.getElementsByTagName("w:p"):
396
+ para_id = p_elem.getAttribute("w14:paraId")
397
+ if para_id:
398
+ return para_id
399
+ return None
400
+
401
+ def _parse_comment(self, comment_elem) -> Comment | None:
402
+ """Parse a w:comment element into a Comment object."""
403
+ comment_id = comment_elem.getAttribute("w:id")
404
+ if not comment_id:
405
+ return None
406
+
407
+ author = comment_elem.getAttribute("w:author") or "Unknown"
408
+ date_str = comment_elem.getAttribute("w:date")
409
+
410
+ try:
411
+ date = datetime.fromisoformat(date_str.replace("Z", "+00:00")) if date_str else None
412
+ except ValueError:
413
+ date = None
414
+
415
+ # Extract text content from w:t elements
416
+ text_parts = []
417
+ for t_elem in comment_elem.getElementsByTagName("w:t"):
418
+ if t_elem.firstChild:
419
+ text_parts.append(t_elem.firstChild.data)
420
+
421
+ return Comment(
422
+ id=int(comment_id),
423
+ text="".join(text_parts),
424
+ author=author,
425
+ date=date,
426
+ )
427
+
428
+ # ==================== Private: XML File Creation ====================
429
+
430
+ def _ensure_comment_file(self, path: Path, template_name: str) -> None:
431
+ """Ensure a comment XML file exists, creating from template if needed."""
432
+ if not path.exists():
433
+ shutil.copy(TEMPLATE_DIR / template_name, path)
434
+
435
+ def _add_to_comments_xml(self, comment_id: int, para_id: str, text: str, timestamp: str) -> None:
436
+ """Add a single comment to comments.xml."""
437
+ self._ensure_comment_file(self.comments_path, "comments.xml")
438
+
439
+ editor = self._get_editor(self.comments_path)
440
+ root = editor.get_node(tag="w:comments")
441
+
442
+ escaped_text = html.escape(text)
443
+ comment_xml = f"""<w:comment w:id="{comment_id}">
444
+ <w:p w14:paraId="{para_id}" w14:textId="77777777">
445
+ <w:r><w:rPr><w:rStyle w:val="CommentReference"/></w:rPr><w:annotationRef/></w:r>
446
+ <w:r><w:rPr><w:color w:val="000000"/><w:sz w:val="20"/><w:szCs w:val="20"/></w:rPr><w:t>{escaped_text}</w:t></w:r>
447
+ </w:p>
448
+ </w:comment>"""
449
+ editor.append_to(root, comment_xml)
450
+
451
+ def _add_to_comments_extended_xml(self, para_id: str, parent_para_id: str | None) -> None:
452
+ """Add a single comment to commentsExtended.xml."""
453
+ self._ensure_comment_file(self.comments_extended_path, "commentsExtended.xml")
454
+
455
+ editor = self._get_editor(self.comments_extended_path)
456
+ root = editor.get_node(tag="w15:commentsEx")
457
+
458
+ if parent_para_id:
459
+ xml = f'<w15:commentEx w15:paraId="{para_id}" w15:paraIdParent="{parent_para_id}" w15:done="0"/>'
460
+ else:
461
+ xml = f'<w15:commentEx w15:paraId="{para_id}" w15:done="0"/>'
462
+ editor.append_to(root, xml)
463
+
464
+ def _add_to_comments_ids_xml(self, para_id: str, durable_id: str) -> None:
465
+ """Add a single comment to commentsIds.xml."""
466
+ self._ensure_comment_file(self.comments_ids_path, "commentsIds.xml")
467
+
468
+ editor = self._get_editor(self.comments_ids_path)
469
+ root = editor.get_node(tag="w16cid:commentsIds")
470
+
471
+ xml = f'<w16cid:commentId w16cid:paraId="{para_id}" w16cid:durableId="{durable_id}"/>'
472
+ editor.append_to(root, xml)
473
+
474
+ def _add_to_comments_extensible_xml(self, durable_id: str) -> None:
475
+ """Add a single comment to commentsExtensible.xml."""
476
+ self._ensure_comment_file(self.comments_extensible_path, "commentsExtensible.xml")
477
+
478
+ editor = self._get_editor(self.comments_extensible_path)
479
+ root = editor.get_node(tag="w16cex:commentsExtensible")
480
+
481
+ xml = f'<w16cex:commentExtensible w16cex:durableId="{durable_id}"/>'
482
+ editor.append_to(root, xml)
483
+
484
+ # ==================== Private: XML Fragments ====================
485
+
486
+ def _comment_range_start_xml(self, comment_id: int) -> str:
487
+ """Generate XML for comment range start."""
488
+ return f'<w:commentRangeStart w:id="{comment_id}"/>'
489
+
490
+ def _comment_range_end_xml(self, comment_id: int) -> str:
491
+ """Generate XML for comment range end with reference run."""
492
+ return f"""<w:commentRangeEnd w:id="{comment_id}"/>
493
+ <w:r>
494
+ <w:rPr><w:rStyle w:val="CommentReference"/></w:rPr>
495
+ <w:commentReference w:id="{comment_id}"/>
496
+ </w:r>"""
497
+
498
+ def _comment_ref_run_xml(self, comment_id: int) -> str:
499
+ """Generate XML for comment reference run."""
500
+ return f"""<w:r>
501
+ <w:rPr><w:rStyle w:val="CommentReference"/></w:rPr>
502
+ <w:commentReference w:id="{comment_id}"/>
503
+ </w:r>"""