docx-editor 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docx_editor/__init__.py +67 -0
- docx_editor/comments.py +503 -0
- docx_editor/document.py +678 -0
- docx_editor/exceptions.py +73 -0
- docx_editor/ooxml/__init__.py +6 -0
- docx_editor/ooxml/pack.py +126 -0
- docx_editor/ooxml/templates/comments.xml +3 -0
- docx_editor/ooxml/templates/commentsExtended.xml +3 -0
- docx_editor/ooxml/templates/commentsExtensible.xml +3 -0
- docx_editor/ooxml/templates/commentsIds.xml +3 -0
- docx_editor/ooxml/templates/people.xml +3 -0
- docx_editor/ooxml/unpack.py +43 -0
- docx_editor/track_changes.py +486 -0
- docx_editor/workspace.py +282 -0
- docx_editor/xml_editor.py +811 -0
- docx_editor-0.0.1.dist-info/METADATA +76 -0
- docx_editor-0.0.1.dist-info/RECORD +19 -0
- docx_editor-0.0.1.dist-info/WHEEL +4 -0
- docx_editor-0.0.1.dist-info/licenses/LICENSE +21 -0
docx_editor/__init__.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""docx_editor - Pure Python Track Changes Library for Word Documents.
|
|
2
|
+
|
|
3
|
+
A standalone library for Word document track changes and comments,
|
|
4
|
+
without requiring Microsoft Word installed.
|
|
5
|
+
|
|
6
|
+
Example:
|
|
7
|
+
from docx_editor import Document
|
|
8
|
+
|
|
9
|
+
# Open and edit
|
|
10
|
+
doc = Document.open("contract.docx")
|
|
11
|
+
doc.replace("30 days", "60 days") # Tracked replacement
|
|
12
|
+
doc.insert_after("Section 5", "New clause") # Tracked insertion
|
|
13
|
+
doc.delete("obsolete text") # Tracked deletion
|
|
14
|
+
|
|
15
|
+
# Comments
|
|
16
|
+
doc.add_comment("Section 5", "Please review")
|
|
17
|
+
doc.reply_to_comment(comment_id=0, "Approved")
|
|
18
|
+
|
|
19
|
+
# Revision management
|
|
20
|
+
revisions = doc.list_revisions()
|
|
21
|
+
doc.accept_revision(revision_id=1)
|
|
22
|
+
doc.reject_all(author="OtherUser")
|
|
23
|
+
|
|
24
|
+
# Save and close
|
|
25
|
+
doc.save()
|
|
26
|
+
doc.close()
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
__version__ = "0.0.1"
|
|
30
|
+
|
|
31
|
+
from .comments import Comment
|
|
32
|
+
from .document import Document
|
|
33
|
+
from .exceptions import (
|
|
34
|
+
CommentError,
|
|
35
|
+
DocumentNotFoundError,
|
|
36
|
+
DocxEditError,
|
|
37
|
+
InvalidDocumentError,
|
|
38
|
+
MultipleNodesFoundError,
|
|
39
|
+
NodeNotFoundError,
|
|
40
|
+
RevisionError,
|
|
41
|
+
TextNotFoundError,
|
|
42
|
+
WorkspaceError,
|
|
43
|
+
WorkspaceExistsError,
|
|
44
|
+
WorkspaceSyncError,
|
|
45
|
+
XMLError,
|
|
46
|
+
)
|
|
47
|
+
from .track_changes import Revision
|
|
48
|
+
|
|
49
|
+
__all__ = [
|
|
50
|
+
# Main classes
|
|
51
|
+
"Document",
|
|
52
|
+
"Revision",
|
|
53
|
+
"Comment",
|
|
54
|
+
# Exceptions
|
|
55
|
+
"DocxEditError",
|
|
56
|
+
"DocumentNotFoundError",
|
|
57
|
+
"InvalidDocumentError",
|
|
58
|
+
"WorkspaceError",
|
|
59
|
+
"WorkspaceExistsError",
|
|
60
|
+
"WorkspaceSyncError",
|
|
61
|
+
"XMLError",
|
|
62
|
+
"NodeNotFoundError",
|
|
63
|
+
"MultipleNodesFoundError",
|
|
64
|
+
"RevisionError",
|
|
65
|
+
"CommentError",
|
|
66
|
+
"TextNotFoundError",
|
|
67
|
+
]
|
docx_editor/comments.py
ADDED
|
@@ -0,0 +1,503 @@
|
|
|
1
|
+
"""Comment management for docx_editor.
|
|
2
|
+
|
|
3
|
+
Provides CommentManager for creating and managing document comments.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import html
|
|
7
|
+
import shutil
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from datetime import datetime, timezone
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
from .exceptions import CommentError, TextNotFoundError
|
|
13
|
+
from .xml_editor import DocxXMLEditor, _generate_hex_id
|
|
14
|
+
|
|
15
|
+
# Path to template files
|
|
16
|
+
TEMPLATE_DIR = Path(__file__).parent / "ooxml" / "templates"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class Comment:
|
|
21
|
+
"""Represents a document comment."""
|
|
22
|
+
|
|
23
|
+
id: int
|
|
24
|
+
text: str
|
|
25
|
+
author: str
|
|
26
|
+
date: datetime | None
|
|
27
|
+
resolved: bool = False
|
|
28
|
+
replies: list["Comment"] = field(default_factory=list)
|
|
29
|
+
|
|
30
|
+
def __repr__(self) -> str:
|
|
31
|
+
status = "[RESOLVED] " if self.resolved else ""
|
|
32
|
+
reply_count = f" ({len(self.replies)} replies)" if self.replies else ""
|
|
33
|
+
return f"Comment({self.id}: {status}'{self.text[:30]}...' by {self.author}{reply_count})"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class CommentManager:
|
|
37
|
+
"""Manages comments in a Word document.
|
|
38
|
+
|
|
39
|
+
Handles the complex task of managing comments across 5 related XML files:
|
|
40
|
+
- comments.xml: Main comment content
|
|
41
|
+
- commentsExtended.xml: Threading information
|
|
42
|
+
- commentsIds.xml: Durable IDs
|
|
43
|
+
- commentsExtensible.xml: Extended properties
|
|
44
|
+
- document.xml: Comment range markers
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
workspace_path: Path,
|
|
50
|
+
document_editor: DocxXMLEditor,
|
|
51
|
+
author: str,
|
|
52
|
+
initials: str,
|
|
53
|
+
):
|
|
54
|
+
"""Initialize with workspace path and document editor.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
workspace_path: Path to the unpacked workspace folder
|
|
58
|
+
document_editor: DocxXMLEditor for word/document.xml
|
|
59
|
+
author: Author name for new comments
|
|
60
|
+
initials: Author initials for new comments
|
|
61
|
+
"""
|
|
62
|
+
self.workspace_path = workspace_path
|
|
63
|
+
self.word_path = workspace_path / "word"
|
|
64
|
+
self.document_editor = document_editor
|
|
65
|
+
self.author = author
|
|
66
|
+
self.initials = initials
|
|
67
|
+
|
|
68
|
+
# Comment file paths
|
|
69
|
+
self.comments_path = self.word_path / "comments.xml"
|
|
70
|
+
self.comments_extended_path = self.word_path / "commentsExtended.xml"
|
|
71
|
+
self.comments_ids_path = self.word_path / "commentsIds.xml"
|
|
72
|
+
self.comments_extensible_path = self.word_path / "commentsExtensible.xml"
|
|
73
|
+
|
|
74
|
+
# Cache for lazy-loaded editors
|
|
75
|
+
self._editors: dict[str, DocxXMLEditor] = {}
|
|
76
|
+
|
|
77
|
+
# Load existing comments for reply support
|
|
78
|
+
self.existing_comments = self._load_existing_comments()
|
|
79
|
+
self.next_comment_id = self._get_next_comment_id()
|
|
80
|
+
|
|
81
|
+
def _get_editor(self, xml_path: Path) -> DocxXMLEditor:
|
|
82
|
+
"""Get or create an editor for the specified XML file."""
|
|
83
|
+
path_str = str(xml_path)
|
|
84
|
+
if path_str not in self._editors:
|
|
85
|
+
self._editors[path_str] = DocxXMLEditor(
|
|
86
|
+
xml_path,
|
|
87
|
+
rsid=self.document_editor.rsid,
|
|
88
|
+
author=self.author,
|
|
89
|
+
initials=self.initials,
|
|
90
|
+
)
|
|
91
|
+
return self._editors[path_str]
|
|
92
|
+
|
|
93
|
+
def add_comment(self, anchor_text: str, comment_text: str) -> int:
|
|
94
|
+
"""Add a comment anchored to specific text.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
anchor_text: Text to attach the comment to
|
|
98
|
+
comment_text: The comment content
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
The comment ID
|
|
102
|
+
|
|
103
|
+
Raises:
|
|
104
|
+
TextNotFoundError: If the anchor text is not found
|
|
105
|
+
"""
|
|
106
|
+
# Find the anchor element
|
|
107
|
+
try:
|
|
108
|
+
elem = self.document_editor.get_node(tag="w:t", contains=anchor_text)
|
|
109
|
+
except Exception:
|
|
110
|
+
raise TextNotFoundError(f"Anchor text not found: '{anchor_text}'") from None
|
|
111
|
+
|
|
112
|
+
# Get the parent run and paragraph
|
|
113
|
+
run = elem.parentNode
|
|
114
|
+
while run and run.nodeName != "w:r":
|
|
115
|
+
run = run.parentNode
|
|
116
|
+
|
|
117
|
+
para = run
|
|
118
|
+
while para and para.nodeName != "w:p":
|
|
119
|
+
para = para.parentNode
|
|
120
|
+
|
|
121
|
+
if not run or not para:
|
|
122
|
+
raise CommentError("Could not find parent run/paragraph")
|
|
123
|
+
|
|
124
|
+
comment_id = self.next_comment_id
|
|
125
|
+
para_id = _generate_hex_id()
|
|
126
|
+
durable_id = _generate_hex_id()
|
|
127
|
+
timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
128
|
+
|
|
129
|
+
# Add comment range markers to document.xml
|
|
130
|
+
self.document_editor.insert_before(run, self._comment_range_start_xml(comment_id))
|
|
131
|
+
self.document_editor.append_to(para, self._comment_range_end_xml(comment_id))
|
|
132
|
+
|
|
133
|
+
# Add to all comment XML files
|
|
134
|
+
self._add_to_comments_xml(comment_id, para_id, comment_text, timestamp)
|
|
135
|
+
self._add_to_comments_extended_xml(para_id, parent_para_id=None)
|
|
136
|
+
self._add_to_comments_ids_xml(para_id, durable_id)
|
|
137
|
+
self._add_to_comments_extensible_xml(durable_id)
|
|
138
|
+
|
|
139
|
+
# Track for reply support
|
|
140
|
+
self.existing_comments[comment_id] = {"para_id": para_id}
|
|
141
|
+
self.next_comment_id += 1
|
|
142
|
+
|
|
143
|
+
return comment_id
|
|
144
|
+
|
|
145
|
+
def reply_to_comment(self, parent_comment_id: int, reply_text: str) -> int:
|
|
146
|
+
"""Add a reply to an existing comment.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
parent_comment_id: The ID of the comment to reply to
|
|
150
|
+
reply_text: The reply content
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
The new comment ID for the reply
|
|
154
|
+
|
|
155
|
+
Raises:
|
|
156
|
+
CommentError: If the parent comment is not found
|
|
157
|
+
"""
|
|
158
|
+
if parent_comment_id not in self.existing_comments:
|
|
159
|
+
raise CommentError(f"Parent comment with id={parent_comment_id} not found")
|
|
160
|
+
|
|
161
|
+
parent_info = self.existing_comments[parent_comment_id]
|
|
162
|
+
comment_id = self.next_comment_id
|
|
163
|
+
para_id = _generate_hex_id()
|
|
164
|
+
durable_id = _generate_hex_id()
|
|
165
|
+
timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
166
|
+
|
|
167
|
+
# Find parent comment markers in document.xml
|
|
168
|
+
parent_start_elem = self.document_editor.get_node(
|
|
169
|
+
tag="w:commentRangeStart", attrs={"w:id": str(parent_comment_id)}
|
|
170
|
+
)
|
|
171
|
+
parent_ref_elem = self.document_editor.get_node(
|
|
172
|
+
tag="w:commentReference", attrs={"w:id": str(parent_comment_id)}
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
# Add reply markers after parent markers
|
|
176
|
+
self.document_editor.insert_after(parent_start_elem, self._comment_range_start_xml(comment_id))
|
|
177
|
+
|
|
178
|
+
parent_ref_run = parent_ref_elem.parentNode
|
|
179
|
+
self.document_editor.insert_after(parent_ref_run, f'<w:commentRangeEnd w:id="{comment_id}"/>')
|
|
180
|
+
self.document_editor.insert_after(parent_ref_run, self._comment_ref_run_xml(comment_id))
|
|
181
|
+
|
|
182
|
+
# Add to all comment XML files
|
|
183
|
+
self._add_to_comments_xml(comment_id, para_id, reply_text, timestamp)
|
|
184
|
+
self._add_to_comments_extended_xml(para_id, parent_para_id=parent_info["para_id"])
|
|
185
|
+
self._add_to_comments_ids_xml(para_id, durable_id)
|
|
186
|
+
self._add_to_comments_extensible_xml(durable_id)
|
|
187
|
+
|
|
188
|
+
# Track for further replies
|
|
189
|
+
self.existing_comments[comment_id] = {"para_id": para_id}
|
|
190
|
+
self.next_comment_id += 1
|
|
191
|
+
|
|
192
|
+
return comment_id
|
|
193
|
+
|
|
194
|
+
def list_comments(self, author: str | None = None) -> list[Comment]:
|
|
195
|
+
"""List all comments in the document.
|
|
196
|
+
|
|
197
|
+
Args:
|
|
198
|
+
author: If provided, filter by author name
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
List of Comment objects (with replies nested)
|
|
202
|
+
"""
|
|
203
|
+
if not self.comments_path.exists():
|
|
204
|
+
return []
|
|
205
|
+
|
|
206
|
+
editor = self._get_editor(self.comments_path)
|
|
207
|
+
comments_dict: dict[int, Comment] = {}
|
|
208
|
+
parent_map: dict[str, str] = {} # para_id -> parent_para_id
|
|
209
|
+
|
|
210
|
+
# Build parent map from commentsExtended.xml
|
|
211
|
+
if self.comments_extended_path.exists():
|
|
212
|
+
ext_editor = self._get_editor(self.comments_extended_path)
|
|
213
|
+
for ex_elem in ext_editor.dom.getElementsByTagName("w15:commentEx"):
|
|
214
|
+
para_id = ex_elem.getAttribute("w15:paraId")
|
|
215
|
+
parent_para_id = ex_elem.getAttribute("w15:paraIdParent")
|
|
216
|
+
if para_id:
|
|
217
|
+
parent_map[para_id] = parent_para_id
|
|
218
|
+
|
|
219
|
+
# Parse all comments
|
|
220
|
+
for comment_elem in editor.dom.getElementsByTagName("w:comment"):
|
|
221
|
+
comment = self._parse_comment(comment_elem)
|
|
222
|
+
if comment and (author is None or comment.author == author):
|
|
223
|
+
# Check if resolved
|
|
224
|
+
para_id = self._get_comment_para_id(comment_elem)
|
|
225
|
+
if para_id and self.comments_extended_path.exists():
|
|
226
|
+
ext_editor = self._get_editor(self.comments_extended_path)
|
|
227
|
+
for ex_elem in ext_editor.dom.getElementsByTagName("w15:commentEx"):
|
|
228
|
+
if ex_elem.getAttribute("w15:paraId") == para_id:
|
|
229
|
+
comment.resolved = ex_elem.getAttribute("w15:done") == "1"
|
|
230
|
+
break
|
|
231
|
+
|
|
232
|
+
comments_dict[comment.id] = comment
|
|
233
|
+
|
|
234
|
+
# Build reply tree
|
|
235
|
+
para_to_id: dict[str, int] = {}
|
|
236
|
+
for comment_id, info in self.existing_comments.items():
|
|
237
|
+
para_to_id[info["para_id"]] = comment_id
|
|
238
|
+
|
|
239
|
+
# Nest replies
|
|
240
|
+
root_comments = []
|
|
241
|
+
for comment_id, comment in comments_dict.items():
|
|
242
|
+
para_id = self.existing_comments.get(comment_id, {}).get("para_id")
|
|
243
|
+
if para_id:
|
|
244
|
+
parent_para = parent_map.get(para_id)
|
|
245
|
+
if parent_para and parent_para in para_to_id:
|
|
246
|
+
parent_id = para_to_id[parent_para]
|
|
247
|
+
if parent_id in comments_dict:
|
|
248
|
+
comments_dict[parent_id].replies.append(comment)
|
|
249
|
+
continue
|
|
250
|
+
root_comments.append(comment)
|
|
251
|
+
|
|
252
|
+
return sorted(root_comments, key=lambda c: c.id)
|
|
253
|
+
|
|
254
|
+
def resolve_comment(self, comment_id: int) -> bool:
|
|
255
|
+
"""Mark a comment as resolved.
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
comment_id: The comment ID to resolve
|
|
259
|
+
|
|
260
|
+
Returns:
|
|
261
|
+
True if resolved, False if not found
|
|
262
|
+
"""
|
|
263
|
+
if comment_id not in self.existing_comments:
|
|
264
|
+
return False
|
|
265
|
+
|
|
266
|
+
para_id = self.existing_comments[comment_id]["para_id"]
|
|
267
|
+
|
|
268
|
+
if not self.comments_extended_path.exists():
|
|
269
|
+
return False
|
|
270
|
+
|
|
271
|
+
editor = self._get_editor(self.comments_extended_path)
|
|
272
|
+
for ex_elem in editor.dom.getElementsByTagName("w15:commentEx"):
|
|
273
|
+
if ex_elem.getAttribute("w15:paraId") == para_id:
|
|
274
|
+
ex_elem.setAttribute("w15:done", "1")
|
|
275
|
+
return True
|
|
276
|
+
|
|
277
|
+
return False
|
|
278
|
+
|
|
279
|
+
def delete_comment(self, comment_id: int) -> bool:
|
|
280
|
+
"""Delete a comment from the document.
|
|
281
|
+
|
|
282
|
+
Args:
|
|
283
|
+
comment_id: The comment ID to delete
|
|
284
|
+
|
|
285
|
+
Returns:
|
|
286
|
+
True if deleted, False if not found
|
|
287
|
+
"""
|
|
288
|
+
if comment_id not in self.existing_comments:
|
|
289
|
+
return False
|
|
290
|
+
|
|
291
|
+
para_id = self.existing_comments[comment_id]["para_id"]
|
|
292
|
+
|
|
293
|
+
# Remove from document.xml
|
|
294
|
+
try:
|
|
295
|
+
range_start = self.document_editor.get_node(tag="w:commentRangeStart", attrs={"w:id": str(comment_id)})
|
|
296
|
+
range_start.parentNode.removeChild(range_start)
|
|
297
|
+
except Exception:
|
|
298
|
+
pass
|
|
299
|
+
|
|
300
|
+
try:
|
|
301
|
+
range_end = self.document_editor.get_node(tag="w:commentRangeEnd", attrs={"w:id": str(comment_id)})
|
|
302
|
+
range_end.parentNode.removeChild(range_end)
|
|
303
|
+
except Exception:
|
|
304
|
+
pass
|
|
305
|
+
|
|
306
|
+
try:
|
|
307
|
+
ref = self.document_editor.get_node(tag="w:commentReference", attrs={"w:id": str(comment_id)})
|
|
308
|
+
# Remove the parent run containing the reference
|
|
309
|
+
if ref.parentNode and ref.parentNode.nodeName == "w:r":
|
|
310
|
+
ref.parentNode.parentNode.removeChild(ref.parentNode)
|
|
311
|
+
else:
|
|
312
|
+
ref.parentNode.removeChild(ref)
|
|
313
|
+
except Exception:
|
|
314
|
+
pass
|
|
315
|
+
|
|
316
|
+
# Remove from comments.xml
|
|
317
|
+
if self.comments_path.exists():
|
|
318
|
+
editor = self._get_editor(self.comments_path)
|
|
319
|
+
for comment_elem in editor.dom.getElementsByTagName("w:comment"):
|
|
320
|
+
if comment_elem.getAttribute("w:id") == str(comment_id):
|
|
321
|
+
comment_elem.parentNode.removeChild(comment_elem)
|
|
322
|
+
break
|
|
323
|
+
|
|
324
|
+
# Remove from commentsExtended.xml
|
|
325
|
+
if self.comments_extended_path.exists():
|
|
326
|
+
editor = self._get_editor(self.comments_extended_path)
|
|
327
|
+
for ex_elem in editor.dom.getElementsByTagName("w15:commentEx"):
|
|
328
|
+
if ex_elem.getAttribute("w15:paraId") == para_id:
|
|
329
|
+
ex_elem.parentNode.removeChild(ex_elem)
|
|
330
|
+
break
|
|
331
|
+
|
|
332
|
+
# Remove from commentsIds.xml
|
|
333
|
+
if self.comments_ids_path.exists():
|
|
334
|
+
editor = self._get_editor(self.comments_ids_path)
|
|
335
|
+
for id_elem in editor.dom.getElementsByTagName("w16cid:commentId"):
|
|
336
|
+
if id_elem.getAttribute("w16cid:paraId") == para_id:
|
|
337
|
+
id_elem.parentNode.removeChild(id_elem)
|
|
338
|
+
break
|
|
339
|
+
|
|
340
|
+
# Remove from commentsExtensible.xml
|
|
341
|
+
if self.comments_extensible_path.exists():
|
|
342
|
+
# Need durable_id, which is in commentsIds.xml - already removed
|
|
343
|
+
# Just leave it, or we'd need to track durable_id
|
|
344
|
+
pass
|
|
345
|
+
|
|
346
|
+
del self.existing_comments[comment_id]
|
|
347
|
+
return True
|
|
348
|
+
|
|
349
|
+
def save_all(self) -> None:
|
|
350
|
+
"""Save all modified XML files."""
|
|
351
|
+
for editor in self._editors.values():
|
|
352
|
+
editor.save()
|
|
353
|
+
|
|
354
|
+
# ==================== Private: Loading ====================
|
|
355
|
+
|
|
356
|
+
def _get_next_comment_id(self) -> int:
|
|
357
|
+
"""Get the next available comment ID."""
|
|
358
|
+
if not self.comments_path.exists():
|
|
359
|
+
return 0
|
|
360
|
+
|
|
361
|
+
editor = self._get_editor(self.comments_path)
|
|
362
|
+
max_id = -1
|
|
363
|
+
for comment_elem in editor.dom.getElementsByTagName("w:comment"):
|
|
364
|
+
comment_id = comment_elem.getAttribute("w:id")
|
|
365
|
+
if comment_id:
|
|
366
|
+
try:
|
|
367
|
+
max_id = max(max_id, int(comment_id))
|
|
368
|
+
except ValueError:
|
|
369
|
+
pass
|
|
370
|
+
return max_id + 1
|
|
371
|
+
|
|
372
|
+
def _load_existing_comments(self) -> dict[int, dict]:
|
|
373
|
+
"""Load existing comments for reply support."""
|
|
374
|
+
if not self.comments_path.exists():
|
|
375
|
+
return {}
|
|
376
|
+
|
|
377
|
+
editor = self._get_editor(self.comments_path)
|
|
378
|
+
existing = {}
|
|
379
|
+
|
|
380
|
+
for comment_elem in editor.dom.getElementsByTagName("w:comment"):
|
|
381
|
+
comment_id = comment_elem.getAttribute("w:id")
|
|
382
|
+
if not comment_id:
|
|
383
|
+
continue
|
|
384
|
+
|
|
385
|
+
para_id = self._get_comment_para_id(comment_elem)
|
|
386
|
+
if not para_id:
|
|
387
|
+
continue
|
|
388
|
+
|
|
389
|
+
existing[int(comment_id)] = {"para_id": para_id}
|
|
390
|
+
|
|
391
|
+
return existing
|
|
392
|
+
|
|
393
|
+
def _get_comment_para_id(self, comment_elem) -> str | None:
|
|
394
|
+
"""Get the para_id from a comment element."""
|
|
395
|
+
for p_elem in comment_elem.getElementsByTagName("w:p"):
|
|
396
|
+
para_id = p_elem.getAttribute("w14:paraId")
|
|
397
|
+
if para_id:
|
|
398
|
+
return para_id
|
|
399
|
+
return None
|
|
400
|
+
|
|
401
|
+
def _parse_comment(self, comment_elem) -> Comment | None:
|
|
402
|
+
"""Parse a w:comment element into a Comment object."""
|
|
403
|
+
comment_id = comment_elem.getAttribute("w:id")
|
|
404
|
+
if not comment_id:
|
|
405
|
+
return None
|
|
406
|
+
|
|
407
|
+
author = comment_elem.getAttribute("w:author") or "Unknown"
|
|
408
|
+
date_str = comment_elem.getAttribute("w:date")
|
|
409
|
+
|
|
410
|
+
try:
|
|
411
|
+
date = datetime.fromisoformat(date_str.replace("Z", "+00:00")) if date_str else None
|
|
412
|
+
except ValueError:
|
|
413
|
+
date = None
|
|
414
|
+
|
|
415
|
+
# Extract text content from w:t elements
|
|
416
|
+
text_parts = []
|
|
417
|
+
for t_elem in comment_elem.getElementsByTagName("w:t"):
|
|
418
|
+
if t_elem.firstChild:
|
|
419
|
+
text_parts.append(t_elem.firstChild.data)
|
|
420
|
+
|
|
421
|
+
return Comment(
|
|
422
|
+
id=int(comment_id),
|
|
423
|
+
text="".join(text_parts),
|
|
424
|
+
author=author,
|
|
425
|
+
date=date,
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
# ==================== Private: XML File Creation ====================
|
|
429
|
+
|
|
430
|
+
def _ensure_comment_file(self, path: Path, template_name: str) -> None:
|
|
431
|
+
"""Ensure a comment XML file exists, creating from template if needed."""
|
|
432
|
+
if not path.exists():
|
|
433
|
+
shutil.copy(TEMPLATE_DIR / template_name, path)
|
|
434
|
+
|
|
435
|
+
def _add_to_comments_xml(self, comment_id: int, para_id: str, text: str, timestamp: str) -> None:
|
|
436
|
+
"""Add a single comment to comments.xml."""
|
|
437
|
+
self._ensure_comment_file(self.comments_path, "comments.xml")
|
|
438
|
+
|
|
439
|
+
editor = self._get_editor(self.comments_path)
|
|
440
|
+
root = editor.get_node(tag="w:comments")
|
|
441
|
+
|
|
442
|
+
escaped_text = html.escape(text)
|
|
443
|
+
comment_xml = f"""<w:comment w:id="{comment_id}">
|
|
444
|
+
<w:p w14:paraId="{para_id}" w14:textId="77777777">
|
|
445
|
+
<w:r><w:rPr><w:rStyle w:val="CommentReference"/></w:rPr><w:annotationRef/></w:r>
|
|
446
|
+
<w:r><w:rPr><w:color w:val="000000"/><w:sz w:val="20"/><w:szCs w:val="20"/></w:rPr><w:t>{escaped_text}</w:t></w:r>
|
|
447
|
+
</w:p>
|
|
448
|
+
</w:comment>"""
|
|
449
|
+
editor.append_to(root, comment_xml)
|
|
450
|
+
|
|
451
|
+
def _add_to_comments_extended_xml(self, para_id: str, parent_para_id: str | None) -> None:
|
|
452
|
+
"""Add a single comment to commentsExtended.xml."""
|
|
453
|
+
self._ensure_comment_file(self.comments_extended_path, "commentsExtended.xml")
|
|
454
|
+
|
|
455
|
+
editor = self._get_editor(self.comments_extended_path)
|
|
456
|
+
root = editor.get_node(tag="w15:commentsEx")
|
|
457
|
+
|
|
458
|
+
if parent_para_id:
|
|
459
|
+
xml = f'<w15:commentEx w15:paraId="{para_id}" w15:paraIdParent="{parent_para_id}" w15:done="0"/>'
|
|
460
|
+
else:
|
|
461
|
+
xml = f'<w15:commentEx w15:paraId="{para_id}" w15:done="0"/>'
|
|
462
|
+
editor.append_to(root, xml)
|
|
463
|
+
|
|
464
|
+
def _add_to_comments_ids_xml(self, para_id: str, durable_id: str) -> None:
|
|
465
|
+
"""Add a single comment to commentsIds.xml."""
|
|
466
|
+
self._ensure_comment_file(self.comments_ids_path, "commentsIds.xml")
|
|
467
|
+
|
|
468
|
+
editor = self._get_editor(self.comments_ids_path)
|
|
469
|
+
root = editor.get_node(tag="w16cid:commentsIds")
|
|
470
|
+
|
|
471
|
+
xml = f'<w16cid:commentId w16cid:paraId="{para_id}" w16cid:durableId="{durable_id}"/>'
|
|
472
|
+
editor.append_to(root, xml)
|
|
473
|
+
|
|
474
|
+
def _add_to_comments_extensible_xml(self, durable_id: str) -> None:
|
|
475
|
+
"""Add a single comment to commentsExtensible.xml."""
|
|
476
|
+
self._ensure_comment_file(self.comments_extensible_path, "commentsExtensible.xml")
|
|
477
|
+
|
|
478
|
+
editor = self._get_editor(self.comments_extensible_path)
|
|
479
|
+
root = editor.get_node(tag="w16cex:commentsExtensible")
|
|
480
|
+
|
|
481
|
+
xml = f'<w16cex:commentExtensible w16cex:durableId="{durable_id}"/>'
|
|
482
|
+
editor.append_to(root, xml)
|
|
483
|
+
|
|
484
|
+
# ==================== Private: XML Fragments ====================
|
|
485
|
+
|
|
486
|
+
def _comment_range_start_xml(self, comment_id: int) -> str:
|
|
487
|
+
"""Generate XML for comment range start."""
|
|
488
|
+
return f'<w:commentRangeStart w:id="{comment_id}"/>'
|
|
489
|
+
|
|
490
|
+
def _comment_range_end_xml(self, comment_id: int) -> str:
|
|
491
|
+
"""Generate XML for comment range end with reference run."""
|
|
492
|
+
return f"""<w:commentRangeEnd w:id="{comment_id}"/>
|
|
493
|
+
<w:r>
|
|
494
|
+
<w:rPr><w:rStyle w:val="CommentReference"/></w:rPr>
|
|
495
|
+
<w:commentReference w:id="{comment_id}"/>
|
|
496
|
+
</w:r>"""
|
|
497
|
+
|
|
498
|
+
def _comment_ref_run_xml(self, comment_id: int) -> str:
|
|
499
|
+
"""Generate XML for comment reference run."""
|
|
500
|
+
return f"""<w:r>
|
|
501
|
+
<w:rPr><w:rStyle w:val="CommentReference"/></w:rPr>
|
|
502
|
+
<w:commentReference w:id="{comment_id}"/>
|
|
503
|
+
</w:r>"""
|