@brookmind/ai-toolkit 1.0.5 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +60 -14
- package/agents/code-reviewer.md +6 -1
- package/agents/code-simplifier.md +52 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +257 -220
- package/dist/index.js.map +1 -1
- package/mcps/context7/.mcp.json +13 -0
- package/mcps/expo-mcp/.mcp.json +13 -0
- package/mcps/figma-mcp/.mcp.json +4 -6
- package/package.json +4 -4
- package/skills/pdf-processing-pro/FORMS.md +610 -0
- package/skills/pdf-processing-pro/OCR.md +137 -0
- package/skills/pdf-processing-pro/SKILL.md +296 -0
- package/skills/pdf-processing-pro/TABLES.md +626 -0
- package/skills/pdf-processing-pro/scripts/analyze_form.py +307 -0
- package/skills/react-best-practices/AGENTS.md +915 -0
- package/skills/react-best-practices/README.md +127 -0
- package/skills/react-best-practices/SKILL.md +110 -0
- package/skills/react-best-practices/metadata.json +14 -0
- package/skills/react-best-practices/rules/_sections.md +41 -0
- package/skills/react-best-practices/rules/_template.md +28 -0
- package/skills/react-best-practices/rules/advanced-event-handler-refs.md +80 -0
- package/skills/react-best-practices/rules/advanced-use-latest.md +76 -0
- package/skills/react-best-practices/rules/async-defer-await.md +80 -0
- package/skills/react-best-practices/rules/async-dependencies.md +36 -0
- package/skills/react-best-practices/rules/async-parallel.md +28 -0
- package/skills/react-best-practices/rules/async-suspense-boundaries.md +100 -0
- package/skills/react-best-practices/rules/bundle-barrel-imports.md +42 -0
- package/skills/react-best-practices/rules/bundle-conditional.md +106 -0
- package/skills/react-best-practices/rules/bundle-preload.md +44 -0
- package/skills/react-best-practices/rules/client-event-listeners.md +131 -0
- package/skills/react-best-practices/rules/client-swr-dedup.md +133 -0
- package/skills/react-best-practices/rules/js-batch-dom-css.md +82 -0
- package/skills/react-best-practices/rules/js-cache-function-results.md +80 -0
- package/skills/react-best-practices/rules/js-cache-property-access.md +28 -0
- package/skills/react-best-practices/rules/js-cache-storage.md +70 -0
- package/skills/react-best-practices/rules/js-combine-iterations.md +32 -0
- package/skills/react-best-practices/rules/js-early-exit.md +50 -0
- package/skills/react-best-practices/rules/js-hoist-regexp.md +45 -0
- package/skills/react-best-practices/rules/js-index-maps.md +37 -0
- package/skills/react-best-practices/rules/js-length-check-first.md +49 -0
- package/skills/react-best-practices/rules/js-min-max-loop.md +82 -0
- package/skills/react-best-practices/rules/js-set-map-lookups.md +24 -0
- package/skills/react-best-practices/rules/js-tosorted-immutable.md +57 -0
- package/skills/react-best-practices/rules/rendering-activity.md +90 -0
- package/skills/react-best-practices/rules/rendering-animate-svg-wrapper.md +47 -0
- package/skills/react-best-practices/rules/rendering-conditional-render.md +40 -0
- package/skills/react-best-practices/rules/rendering-content-visibility.md +38 -0
- package/skills/react-best-practices/rules/rendering-hoist-jsx.md +65 -0
- package/skills/react-best-practices/rules/rendering-svg-precision.md +28 -0
- package/skills/react-best-practices/rules/rerender-defer-reads.md +39 -0
- package/skills/react-best-practices/rules/rerender-dependencies.md +45 -0
- package/skills/react-best-practices/rules/rerender-derived-state.md +29 -0
- package/skills/react-best-practices/rules/rerender-functional-setstate.md +74 -0
- package/skills/react-best-practices/rules/rerender-lazy-state-init.md +58 -0
- package/skills/react-best-practices/rules/rerender-memo.md +85 -0
- package/skills/react-best-practices/rules/rerender-transitions.md +40 -0
- package/themes/README.md +68 -0
- package/themes/claude-vivid.json +72 -0
- package/mcps/context7/.claude-plugin +0 -1
- package/mcps/context7/README.md +0 -1
- package/mcps/context7/server.json +0 -1
- package/mcps/expo-mcp/README.md +0 -33
- package/mcps/expo-mcp/package.json +0 -30
- package/mcps/figma-mcp/README.md +0 -554
- package/mcps/figma-mcp/server.json +0 -17
- package/mcps/figma-mcp/skills/code-connect-components +0 -1
- package/mcps/figma-mcp/skills/create-design-system-rules +0 -1
- package/mcps/figma-mcp/skills/implement-design +0 -1
- package/mcps/pg-aiguide/.claude-plugin +0 -1
- package/mcps/pg-aiguide/CLAUDE.md +0 -21
- package/mcps/pg-aiguide/README.md +0 -275
- package/mcps/pg-aiguide/skills/design-postgres-tables +0 -1
- package/mcps/pg-aiguide/skills/find-hypertable-candidates +0 -1
- package/mcps/pg-aiguide/skills/migrate-postgres-tables-to-hypertables +0 -1
- package/mcps/pg-aiguide/skills/setup-timescaledb-hypertables +0 -1
- package/mcps/pg-aiguide/skills.yaml +0 -4
- package/skills/cloudflare-cli/SKILL.md +0 -151
- package/skills/docx/LICENSE.txt +0 -30
- package/skills/docx/SKILL.md +0 -197
- package/skills/docx/docx-js.md +0 -350
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +0 -1499
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +0 -146
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +0 -1085
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +0 -11
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +0 -3081
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +0 -23
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +0 -185
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +0 -287
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +0 -1676
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +0 -28
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +0 -144
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +0 -174
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +0 -25
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +0 -18
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +0 -59
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +0 -56
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +0 -195
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +0 -582
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +0 -25
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +0 -4439
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +0 -570
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +0 -509
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +0 -12
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +0 -108
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +0 -96
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +0 -3646
- package/skills/docx/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +0 -116
- package/skills/docx/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +0 -42
- package/skills/docx/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +0 -50
- package/skills/docx/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +0 -49
- package/skills/docx/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +0 -33
- package/skills/docx/ooxml/schemas/mce/mc.xsd +0 -75
- package/skills/docx/ooxml/schemas/microsoft/wml-2010.xsd +0 -560
- package/skills/docx/ooxml/schemas/microsoft/wml-2012.xsd +0 -67
- package/skills/docx/ooxml/schemas/microsoft/wml-2018.xsd +0 -14
- package/skills/docx/ooxml/schemas/microsoft/wml-cex-2018.xsd +0 -20
- package/skills/docx/ooxml/schemas/microsoft/wml-cid-2016.xsd +0 -13
- package/skills/docx/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +0 -4
- package/skills/docx/ooxml/schemas/microsoft/wml-symex-2015.xsd +0 -8
- package/skills/docx/ooxml/scripts/pack.py +0 -159
- package/skills/docx/ooxml/scripts/unpack.py +0 -29
- package/skills/docx/ooxml/scripts/validate.py +0 -69
- package/skills/docx/ooxml/scripts/validation/__init__.py +0 -15
- package/skills/docx/ooxml/scripts/validation/base.py +0 -951
- package/skills/docx/ooxml/scripts/validation/docx.py +0 -274
- package/skills/docx/ooxml/scripts/validation/pptx.py +0 -315
- package/skills/docx/ooxml/scripts/validation/redlining.py +0 -279
- package/skills/docx/ooxml.md +0 -610
- package/skills/docx/scripts/__init__.py +0 -1
- package/skills/docx/scripts/document.py +0 -1276
- package/skills/docx/scripts/templates/comments.xml +0 -3
- package/skills/docx/scripts/templates/commentsExtended.xml +0 -3
- package/skills/docx/scripts/templates/commentsExtensible.xml +0 -3
- package/skills/docx/scripts/templates/commentsIds.xml +0 -3
- package/skills/docx/scripts/templates/people.xml +0 -3
- package/skills/docx/scripts/utilities.py +0 -374
- package/skills/pdf/LICENSE.txt +0 -30
- package/skills/pdf/SKILL.md +0 -294
- package/skills/pdf/forms.md +0 -205
- package/skills/pdf/reference.md +0 -612
- package/skills/pdf/scripts/check_bounding_boxes.py +0 -70
- package/skills/pdf/scripts/check_bounding_boxes_test.py +0 -226
- package/skills/pdf/scripts/check_fillable_fields.py +0 -12
- package/skills/pdf/scripts/convert_pdf_to_images.py +0 -35
- package/skills/pdf/scripts/create_validation_image.py +0 -41
- package/skills/pdf/scripts/extract_form_field_info.py +0 -152
- package/skills/pdf/scripts/fill_fillable_fields.py +0 -114
- package/skills/pdf/scripts/fill_pdf_form_with_annotations.py +0 -108
- package/skills/xlsx/LICENSE.txt +0 -30
- package/skills/xlsx/SKILL.md +0 -289
- package/skills/xlsx/recalc.py +0 -178
|
@@ -1,279 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Validator for tracked changes in Word documents.
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
import subprocess
|
|
6
|
-
import tempfile
|
|
7
|
-
import zipfile
|
|
8
|
-
from pathlib import Path
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class RedliningValidator:
|
|
12
|
-
"""Validator for tracked changes in Word documents."""
|
|
13
|
-
|
|
14
|
-
def __init__(self, unpacked_dir, original_docx, verbose=False):
|
|
15
|
-
self.unpacked_dir = Path(unpacked_dir)
|
|
16
|
-
self.original_docx = Path(original_docx)
|
|
17
|
-
self.verbose = verbose
|
|
18
|
-
self.namespaces = {
|
|
19
|
-
"w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
def validate(self):
|
|
23
|
-
"""Main validation method that returns True if valid, False otherwise."""
|
|
24
|
-
# Verify unpacked directory exists and has correct structure
|
|
25
|
-
modified_file = self.unpacked_dir / "word" / "document.xml"
|
|
26
|
-
if not modified_file.exists():
|
|
27
|
-
print(f"FAILED - Modified document.xml not found at {modified_file}")
|
|
28
|
-
return False
|
|
29
|
-
|
|
30
|
-
# First, check if there are any tracked changes by Claude to validate
|
|
31
|
-
try:
|
|
32
|
-
import xml.etree.ElementTree as ET
|
|
33
|
-
|
|
34
|
-
tree = ET.parse(modified_file)
|
|
35
|
-
root = tree.getroot()
|
|
36
|
-
|
|
37
|
-
# Check for w:del or w:ins tags authored by Claude
|
|
38
|
-
del_elements = root.findall(".//w:del", self.namespaces)
|
|
39
|
-
ins_elements = root.findall(".//w:ins", self.namespaces)
|
|
40
|
-
|
|
41
|
-
# Filter to only include changes by Claude
|
|
42
|
-
claude_del_elements = [
|
|
43
|
-
elem
|
|
44
|
-
for elem in del_elements
|
|
45
|
-
if elem.get(f"{{{self.namespaces['w']}}}author") == "Claude"
|
|
46
|
-
]
|
|
47
|
-
claude_ins_elements = [
|
|
48
|
-
elem
|
|
49
|
-
for elem in ins_elements
|
|
50
|
-
if elem.get(f"{{{self.namespaces['w']}}}author") == "Claude"
|
|
51
|
-
]
|
|
52
|
-
|
|
53
|
-
# Redlining validation is only needed if tracked changes by Claude have been used.
|
|
54
|
-
if not claude_del_elements and not claude_ins_elements:
|
|
55
|
-
if self.verbose:
|
|
56
|
-
print("PASSED - No tracked changes by Claude found.")
|
|
57
|
-
return True
|
|
58
|
-
|
|
59
|
-
except Exception:
|
|
60
|
-
# If we can't parse the XML, continue with full validation
|
|
61
|
-
pass
|
|
62
|
-
|
|
63
|
-
# Create temporary directory for unpacking original docx
|
|
64
|
-
with tempfile.TemporaryDirectory() as temp_dir:
|
|
65
|
-
temp_path = Path(temp_dir)
|
|
66
|
-
|
|
67
|
-
# Unpack original docx
|
|
68
|
-
try:
|
|
69
|
-
with zipfile.ZipFile(self.original_docx, "r") as zip_ref:
|
|
70
|
-
zip_ref.extractall(temp_path)
|
|
71
|
-
except Exception as e:
|
|
72
|
-
print(f"FAILED - Error unpacking original docx: {e}")
|
|
73
|
-
return False
|
|
74
|
-
|
|
75
|
-
original_file = temp_path / "word" / "document.xml"
|
|
76
|
-
if not original_file.exists():
|
|
77
|
-
print(
|
|
78
|
-
f"FAILED - Original document.xml not found in {self.original_docx}"
|
|
79
|
-
)
|
|
80
|
-
return False
|
|
81
|
-
|
|
82
|
-
# Parse both XML files using xml.etree.ElementTree for redlining validation
|
|
83
|
-
try:
|
|
84
|
-
import xml.etree.ElementTree as ET
|
|
85
|
-
|
|
86
|
-
modified_tree = ET.parse(modified_file)
|
|
87
|
-
modified_root = modified_tree.getroot()
|
|
88
|
-
original_tree = ET.parse(original_file)
|
|
89
|
-
original_root = original_tree.getroot()
|
|
90
|
-
except ET.ParseError as e:
|
|
91
|
-
print(f"FAILED - Error parsing XML files: {e}")
|
|
92
|
-
return False
|
|
93
|
-
|
|
94
|
-
# Remove Claude's tracked changes from both documents
|
|
95
|
-
self._remove_claude_tracked_changes(original_root)
|
|
96
|
-
self._remove_claude_tracked_changes(modified_root)
|
|
97
|
-
|
|
98
|
-
# Extract and compare text content
|
|
99
|
-
modified_text = self._extract_text_content(modified_root)
|
|
100
|
-
original_text = self._extract_text_content(original_root)
|
|
101
|
-
|
|
102
|
-
if modified_text != original_text:
|
|
103
|
-
# Show detailed character-level differences for each paragraph
|
|
104
|
-
error_message = self._generate_detailed_diff(
|
|
105
|
-
original_text, modified_text
|
|
106
|
-
)
|
|
107
|
-
print(error_message)
|
|
108
|
-
return False
|
|
109
|
-
|
|
110
|
-
if self.verbose:
|
|
111
|
-
print("PASSED - All changes by Claude are properly tracked")
|
|
112
|
-
return True
|
|
113
|
-
|
|
114
|
-
def _generate_detailed_diff(self, original_text, modified_text):
|
|
115
|
-
"""Generate detailed word-level differences using git word diff."""
|
|
116
|
-
error_parts = [
|
|
117
|
-
"FAILED - Document text doesn't match after removing Claude's tracked changes",
|
|
118
|
-
"",
|
|
119
|
-
"Likely causes:",
|
|
120
|
-
" 1. Modified text inside another author's <w:ins> or <w:del> tags",
|
|
121
|
-
" 2. Made edits without proper tracked changes",
|
|
122
|
-
" 3. Didn't nest <w:del> inside <w:ins> when deleting another's insertion",
|
|
123
|
-
"",
|
|
124
|
-
"For pre-redlined documents, use correct patterns:",
|
|
125
|
-
" - To reject another's INSERTION: Nest <w:del> inside their <w:ins>",
|
|
126
|
-
" - To restore another's DELETION: Add new <w:ins> AFTER their <w:del>",
|
|
127
|
-
"",
|
|
128
|
-
]
|
|
129
|
-
|
|
130
|
-
# Show git word diff
|
|
131
|
-
git_diff = self._get_git_word_diff(original_text, modified_text)
|
|
132
|
-
if git_diff:
|
|
133
|
-
error_parts.extend(["Differences:", "============", git_diff])
|
|
134
|
-
else:
|
|
135
|
-
error_parts.append("Unable to generate word diff (git not available)")
|
|
136
|
-
|
|
137
|
-
return "\n".join(error_parts)
|
|
138
|
-
|
|
139
|
-
def _get_git_word_diff(self, original_text, modified_text):
|
|
140
|
-
"""Generate word diff using git with character-level precision."""
|
|
141
|
-
try:
|
|
142
|
-
with tempfile.TemporaryDirectory() as temp_dir:
|
|
143
|
-
temp_path = Path(temp_dir)
|
|
144
|
-
|
|
145
|
-
# Create two files
|
|
146
|
-
original_file = temp_path / "original.txt"
|
|
147
|
-
modified_file = temp_path / "modified.txt"
|
|
148
|
-
|
|
149
|
-
original_file.write_text(original_text, encoding="utf-8")
|
|
150
|
-
modified_file.write_text(modified_text, encoding="utf-8")
|
|
151
|
-
|
|
152
|
-
# Try character-level diff first for precise differences
|
|
153
|
-
result = subprocess.run(
|
|
154
|
-
[
|
|
155
|
-
"git",
|
|
156
|
-
"diff",
|
|
157
|
-
"--word-diff=plain",
|
|
158
|
-
"--word-diff-regex=.", # Character-by-character diff
|
|
159
|
-
"-U0", # Zero lines of context - show only changed lines
|
|
160
|
-
"--no-index",
|
|
161
|
-
str(original_file),
|
|
162
|
-
str(modified_file),
|
|
163
|
-
],
|
|
164
|
-
capture_output=True,
|
|
165
|
-
text=True,
|
|
166
|
-
)
|
|
167
|
-
|
|
168
|
-
if result.stdout.strip():
|
|
169
|
-
# Clean up the output - remove git diff header lines
|
|
170
|
-
lines = result.stdout.split("\n")
|
|
171
|
-
# Skip the header lines (diff --git, index, +++, ---, @@)
|
|
172
|
-
content_lines = []
|
|
173
|
-
in_content = False
|
|
174
|
-
for line in lines:
|
|
175
|
-
if line.startswith("@@"):
|
|
176
|
-
in_content = True
|
|
177
|
-
continue
|
|
178
|
-
if in_content and line.strip():
|
|
179
|
-
content_lines.append(line)
|
|
180
|
-
|
|
181
|
-
if content_lines:
|
|
182
|
-
return "\n".join(content_lines)
|
|
183
|
-
|
|
184
|
-
# Fallback to word-level diff if character-level is too verbose
|
|
185
|
-
result = subprocess.run(
|
|
186
|
-
[
|
|
187
|
-
"git",
|
|
188
|
-
"diff",
|
|
189
|
-
"--word-diff=plain",
|
|
190
|
-
"-U0", # Zero lines of context
|
|
191
|
-
"--no-index",
|
|
192
|
-
str(original_file),
|
|
193
|
-
str(modified_file),
|
|
194
|
-
],
|
|
195
|
-
capture_output=True,
|
|
196
|
-
text=True,
|
|
197
|
-
)
|
|
198
|
-
|
|
199
|
-
if result.stdout.strip():
|
|
200
|
-
lines = result.stdout.split("\n")
|
|
201
|
-
content_lines = []
|
|
202
|
-
in_content = False
|
|
203
|
-
for line in lines:
|
|
204
|
-
if line.startswith("@@"):
|
|
205
|
-
in_content = True
|
|
206
|
-
continue
|
|
207
|
-
if in_content and line.strip():
|
|
208
|
-
content_lines.append(line)
|
|
209
|
-
return "\n".join(content_lines)
|
|
210
|
-
|
|
211
|
-
except (subprocess.CalledProcessError, FileNotFoundError, Exception):
|
|
212
|
-
# Git not available or other error, return None to use fallback
|
|
213
|
-
pass
|
|
214
|
-
|
|
215
|
-
return None
|
|
216
|
-
|
|
217
|
-
def _remove_claude_tracked_changes(self, root):
|
|
218
|
-
"""Remove tracked changes authored by Claude from the XML root."""
|
|
219
|
-
ins_tag = f"{{{self.namespaces['w']}}}ins"
|
|
220
|
-
del_tag = f"{{{self.namespaces['w']}}}del"
|
|
221
|
-
author_attr = f"{{{self.namespaces['w']}}}author"
|
|
222
|
-
|
|
223
|
-
# Remove w:ins elements
|
|
224
|
-
for parent in root.iter():
|
|
225
|
-
to_remove = []
|
|
226
|
-
for child in parent:
|
|
227
|
-
if child.tag == ins_tag and child.get(author_attr) == "Claude":
|
|
228
|
-
to_remove.append(child)
|
|
229
|
-
for elem in to_remove:
|
|
230
|
-
parent.remove(elem)
|
|
231
|
-
|
|
232
|
-
# Unwrap content in w:del elements where author is "Claude"
|
|
233
|
-
deltext_tag = f"{{{self.namespaces['w']}}}delText"
|
|
234
|
-
t_tag = f"{{{self.namespaces['w']}}}t"
|
|
235
|
-
|
|
236
|
-
for parent in root.iter():
|
|
237
|
-
to_process = []
|
|
238
|
-
for child in parent:
|
|
239
|
-
if child.tag == del_tag and child.get(author_attr) == "Claude":
|
|
240
|
-
to_process.append((child, list(parent).index(child)))
|
|
241
|
-
|
|
242
|
-
# Process in reverse order to maintain indices
|
|
243
|
-
for del_elem, del_index in reversed(to_process):
|
|
244
|
-
# Convert w:delText to w:t before moving
|
|
245
|
-
for elem in del_elem.iter():
|
|
246
|
-
if elem.tag == deltext_tag:
|
|
247
|
-
elem.tag = t_tag
|
|
248
|
-
|
|
249
|
-
# Move all children of w:del to its parent before removing w:del
|
|
250
|
-
for child in reversed(list(del_elem)):
|
|
251
|
-
parent.insert(del_index, child)
|
|
252
|
-
parent.remove(del_elem)
|
|
253
|
-
|
|
254
|
-
def _extract_text_content(self, root):
|
|
255
|
-
"""Extract text content from Word XML, preserving paragraph structure.
|
|
256
|
-
|
|
257
|
-
Empty paragraphs are skipped to avoid false positives when tracked
|
|
258
|
-
insertions add only structural elements without text content.
|
|
259
|
-
"""
|
|
260
|
-
p_tag = f"{{{self.namespaces['w']}}}p"
|
|
261
|
-
t_tag = f"{{{self.namespaces['w']}}}t"
|
|
262
|
-
|
|
263
|
-
paragraphs = []
|
|
264
|
-
for p_elem in root.findall(f".//{p_tag}"):
|
|
265
|
-
# Get all text elements within this paragraph
|
|
266
|
-
text_parts = []
|
|
267
|
-
for t_elem in p_elem.findall(f".//{t_tag}"):
|
|
268
|
-
if t_elem.text:
|
|
269
|
-
text_parts.append(t_elem.text)
|
|
270
|
-
paragraph_text = "".join(text_parts)
|
|
271
|
-
# Skip empty paragraphs - they don't affect content validation
|
|
272
|
-
if paragraph_text:
|
|
273
|
-
paragraphs.append(paragraph_text)
|
|
274
|
-
|
|
275
|
-
return "\n".join(paragraphs)
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
if __name__ == "__main__":
|
|
279
|
-
raise RuntimeError("This module should not be run directly.")
|