@farazirfan/costar-server-executor 1.7.37 → 1.7.39
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/agent.d.ts +90 -0
- package/dist/agent/agent.d.ts.map +1 -1
- package/dist/agent/agent.js +606 -0
- package/dist/agent/agent.js.map +1 -1
- package/dist/agent/pi-embedded-runner/run.d.ts.map +1 -1
- package/dist/agent/pi-embedded-runner/run.js +2 -1
- package/dist/agent/pi-embedded-runner/run.js.map +1 -1
- package/dist/agent/pi-embedded-runner/system-prompt.d.ts.map +1 -1
- package/dist/agent/pi-embedded-runner/system-prompt.js +16 -37
- package/dist/agent/pi-embedded-runner/system-prompt.js.map +1 -1
- package/dist/agent/pi-embedded-runner/tools.d.ts +4 -1
- package/dist/agent/pi-embedded-runner/tools.d.ts.map +1 -1
- package/dist/agent/pi-embedded-runner/tools.js +3 -1
- package/dist/agent/pi-embedded-runner/tools.js.map +1 -1
- package/dist/agent/pi-embedded-runner/types.d.ts +4 -0
- package/dist/agent/pi-embedded-runner/types.d.ts.map +1 -1
- package/dist/cli/env-loader.d.ts.map +1 -1
- package/dist/cli/env-loader.js +1 -0
- package/dist/cli/env-loader.js.map +1 -1
- package/dist/cli/setup.js +2 -2
- package/dist/cli/setup.js.map +1 -1
- package/dist/cron/normalize.d.ts +31 -0
- package/dist/cron/normalize.d.ts.map +1 -0
- package/dist/cron/normalize.js +211 -0
- package/dist/cron/normalize.js.map +1 -0
- package/dist/cron/scheduler.d.ts +33 -3
- package/dist/cron/scheduler.d.ts.map +1 -1
- package/dist/cron/scheduler.js +253 -48
- package/dist/cron/scheduler.js.map +1 -1
- package/dist/heartbeat/runner.d.ts +27 -12
- package/dist/heartbeat/runner.d.ts.map +1 -1
- package/dist/heartbeat/runner.js +82 -104
- package/dist/heartbeat/runner.js.map +1 -1
- package/dist/infra/heartbeat-events-filter.d.ts +29 -0
- package/dist/infra/heartbeat-events-filter.d.ts.map +1 -0
- package/dist/infra/heartbeat-events-filter.js +80 -0
- package/dist/infra/heartbeat-events-filter.js.map +1 -0
- package/dist/infra/index.d.ts +9 -0
- package/dist/infra/index.d.ts.map +1 -0
- package/dist/infra/index.js +9 -0
- package/dist/infra/index.js.map +1 -0
- package/dist/infra/system-events.d.ts +58 -2
- package/dist/infra/system-events.d.ts.map +1 -1
- package/dist/infra/system-events.js +80 -14
- package/dist/infra/system-events.js.map +1 -1
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +6 -1
- package/dist/server.js.map +1 -1
- package/dist/services/platform-keys.d.ts +19 -0
- package/dist/services/platform-keys.d.ts.map +1 -0
- package/dist/services/platform-keys.js +74 -0
- package/dist/services/platform-keys.js.map +1 -0
- package/dist/subagent/registry.d.ts +96 -0
- package/dist/subagent/registry.d.ts.map +1 -0
- package/dist/subagent/registry.js +180 -0
- package/dist/subagent/registry.js.map +1 -0
- package/dist/tools/complete-turn.d.ts +2 -2
- package/dist/tools/complete-turn.js +10 -10
- package/dist/tools/complete-turn.js.map +1 -1
- package/dist/tools/contacts.d.ts +13 -0
- package/dist/tools/contacts.d.ts.map +1 -0
- package/dist/tools/contacts.js +80 -0
- package/dist/tools/contacts.js.map +1 -0
- package/dist/tools/cron.d.ts +17 -2
- package/dist/tools/cron.d.ts.map +1 -1
- package/dist/tools/cron.js +117 -35
- package/dist/tools/cron.js.map +1 -1
- package/dist/tools/google-maps.d.ts +6 -6
- package/dist/tools/google-maps.d.ts.map +1 -1
- package/dist/tools/google-maps.js +207 -262
- package/dist/tools/google-maps.js.map +1 -1
- package/dist/tools/index.d.ts +17 -7
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +40 -9
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/phone-call.d.ts +11 -0
- package/dist/tools/phone-call.d.ts.map +1 -0
- package/dist/tools/phone-call.js +151 -0
- package/dist/tools/phone-call.js.map +1 -0
- package/dist/tools/sessions-spawn.d.ts +33 -0
- package/dist/tools/sessions-spawn.d.ts.map +1 -0
- package/dist/tools/sessions-spawn.js +164 -0
- package/dist/tools/sessions-spawn.js.map +1 -0
- package/dist/tools/spotify.d.ts +12 -0
- package/dist/tools/spotify.d.ts.map +1 -0
- package/dist/tools/spotify.js +251 -0
- package/dist/tools/spotify.js.map +1 -0
- package/dist/tools/subagents.d.ts +23 -0
- package/dist/tools/subagents.d.ts.map +1 -0
- package/dist/tools/subagents.js +209 -0
- package/dist/tools/subagents.js.map +1 -0
- package/dist/tools/whatsapp.d.ts +13 -0
- package/dist/tools/whatsapp.d.ts.map +1 -0
- package/dist/tools/whatsapp.js +215 -0
- package/dist/tools/whatsapp.js.map +1 -0
- package/dist/tools/youtube.d.ts +12 -0
- package/dist/tools/youtube.d.ts.map +1 -0
- package/dist/tools/youtube.js +218 -0
- package/dist/tools/youtube.js.map +1 -0
- package/dist/utils/asterizk-auth.d.ts +43 -0
- package/dist/utils/asterizk-auth.d.ts.map +1 -0
- package/dist/utils/asterizk-auth.js +125 -0
- package/dist/utils/asterizk-auth.js.map +1 -0
- package/dist/web-server.d.ts.map +1 -1
- package/dist/web-server.js +132 -0
- package/dist/web-server.js.map +1 -1
- package/dist/workspace/index.d.ts +3 -4
- package/dist/workspace/index.d.ts.map +1 -1
- package/dist/workspace/index.js +3 -4
- package/dist/workspace/index.js.map +1 -1
- package/dist/workspace/templates.d.ts +8 -7
- package/dist/workspace/templates.d.ts.map +1 -1
- package/dist/workspace/templates.js +18 -127
- package/dist/workspace/templates.js.map +1 -1
- package/dist/workspace/workspace.d.ts +2 -4
- package/dist/workspace/workspace.d.ts.map +1 -1
- package/dist/workspace/workspace.js +7 -16
- package/dist/workspace/workspace.js.map +1 -1
- package/package.json +1 -1
- package/public/index.html +231 -0
- package/skills/docx/SKILL.md +468 -0
- package/skills/docx/scripts/__init__.py +1 -0
- package/skills/docx/scripts/accept_changes.py +181 -0
- package/skills/docx/scripts/comment.py +347 -0
- package/skills/docx/scripts/helpers/__init__.py +0 -0
- package/skills/docx/scripts/helpers/merge_runs.py +231 -0
- package/skills/docx/scripts/helpers/simplify_redlines.py +240 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/skills/docx/scripts/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/skills/docx/scripts/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/skills/docx/scripts/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/skills/docx/scripts/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/skills/docx/scripts/ooxml/schemas/mce/mc.xsd +75 -0
- package/skills/docx/scripts/ooxml/schemas/microsoft/wml-2010.xsd +560 -0
- package/skills/docx/scripts/ooxml/schemas/microsoft/wml-2012.xsd +67 -0
- package/skills/docx/scripts/ooxml/schemas/microsoft/wml-2018.xsd +14 -0
- package/skills/docx/scripts/ooxml/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/skills/docx/scripts/ooxml/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/skills/docx/scripts/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/skills/docx/scripts/ooxml/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/skills/docx/scripts/ooxml/scripts/pack.py +159 -0
- package/skills/docx/scripts/ooxml/scripts/unpack.py +29 -0
- package/skills/docx/scripts/ooxml/scripts/validate.py +106 -0
- package/skills/docx/scripts/ooxml/scripts/validation/__init__.py +15 -0
- package/skills/docx/scripts/ooxml/scripts/validation/base.py +1023 -0
- package/skills/docx/scripts/ooxml/scripts/validation/docx.py +519 -0
- package/skills/docx/scripts/ooxml/scripts/validation/pptx.py +315 -0
- package/skills/docx/scripts/ooxml/scripts/validation/redlining.py +284 -0
- package/skills/docx/scripts/pack.py +166 -0
- package/skills/docx/scripts/templates/comments.xml +3 -0
- package/skills/docx/scripts/templates/commentsExtended.xml +3 -0
- package/skills/docx/scripts/templates/commentsExtensible.xml +3 -0
- package/skills/docx/scripts/templates/commentsIds.xml +3 -0
- package/skills/docx/scripts/templates/people.xml +3 -0
- package/skills/docx/scripts/unpack.py +134 -0
- package/skills/longform-video-generation/SKILL.md +298 -0
- package/skills/longform-video-generation/references/advanced_techniques.md +474 -0
- package/skills/longform-video-generation/references/google_api_guide.md +288 -0
- package/skills/longform-video-generation/scripts/video_generator.py +579 -0
- package/skills/pdf/FORMS.md +305 -0
- package/skills/pdf/REFERENCE.md +612 -0
- package/skills/pdf/SKILL.md +293 -0
- package/skills/pdf/scripts/check_bounding_boxes.py +70 -0
- package/skills/pdf/scripts/check_fillable_fields.py +12 -0
- package/skills/pdf/scripts/convert_pdf_to_images.py +35 -0
- package/skills/pdf/scripts/create_validation_image.py +41 -0
- package/skills/pdf/scripts/extract_form_field_info.py +152 -0
- package/skills/pdf/scripts/extract_form_structure.py +124 -0
- package/skills/pdf/scripts/fill_fillable_fields.py +116 -0
- package/skills/pdf/scripts/fill_pdf_form_with_annotations.py +136 -0
- package/skills/pptx/SKILL.md +171 -0
- package/skills/pptx/editing.md +205 -0
- package/skills/pptx/pptxgenjs.md +377 -0
- package/skills/pptx/scripts/add_slide.py +225 -0
- package/skills/pptx/scripts/clean.py +309 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/skills/pptx/scripts/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/skills/pptx/scripts/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/skills/pptx/scripts/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/skills/pptx/scripts/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/skills/pptx/scripts/ooxml/schemas/mce/mc.xsd +75 -0
- package/skills/pptx/scripts/ooxml/schemas/microsoft/wml-2010.xsd +560 -0
- package/skills/pptx/scripts/ooxml/schemas/microsoft/wml-2012.xsd +67 -0
- package/skills/pptx/scripts/ooxml/schemas/microsoft/wml-2018.xsd +14 -0
- package/skills/pptx/scripts/ooxml/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/skills/pptx/scripts/ooxml/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/skills/pptx/scripts/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/skills/pptx/scripts/ooxml/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/skills/pptx/scripts/ooxml/scripts/pack.py +159 -0
- package/skills/pptx/scripts/ooxml/scripts/unpack.py +29 -0
- package/skills/pptx/scripts/ooxml/scripts/validate.py +106 -0
- package/skills/pptx/scripts/ooxml/scripts/validation/__init__.py +15 -0
- package/skills/pptx/scripts/ooxml/scripts/validation/base.py +1023 -0
- package/skills/pptx/scripts/ooxml/scripts/validation/docx.py +519 -0
- package/skills/pptx/scripts/ooxml/scripts/validation/pptx.py +315 -0
- package/skills/pptx/scripts/ooxml/scripts/validation/redlining.py +284 -0
- package/skills/pptx/scripts/pack.py +168 -0
- package/skills/pptx/scripts/thumbnail.py +318 -0
- package/skills/pptx/scripts/unpack.py +86 -0
- package/skills/xlsx/SKILL.md +291 -0
- package/skills/xlsx/recalc.py +247 -0
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Add comments to DOCX documents.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
python comment.py unpacked/ 0 "Comment text"
|
|
6
|
+
python comment.py unpacked/ 1 "Reply text" --parent 0
|
|
7
|
+
|
|
8
|
+
Text should be pre-escaped XML (e.g., & for &, ’ for smart quotes).
|
|
9
|
+
|
|
10
|
+
After running, add markers to document.xml:
|
|
11
|
+
<w:commentRangeStart w:id="0"/>
|
|
12
|
+
... commented content ...
|
|
13
|
+
<w:commentRangeEnd w:id="0"/>
|
|
14
|
+
<w:r><w:rPr><w:rStyle w:val="CommentReference"/></w:rPr><w:commentReference w:id="0"/></w:r>
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import argparse
|
|
18
|
+
import random
|
|
19
|
+
import shutil
|
|
20
|
+
import sys
|
|
21
|
+
from datetime import datetime, timezone
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
|
|
24
|
+
import defusedxml.minidom
|
|
25
|
+
|
|
26
|
+
TEMPLATE_DIR = Path(__file__).parent / "templates"
|
|
27
|
+
NS = {
|
|
28
|
+
"w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main",
|
|
29
|
+
"w14": "http://schemas.microsoft.com/office/word/2010/wordml",
|
|
30
|
+
"w15": "http://schemas.microsoft.com/office/word/2012/wordml",
|
|
31
|
+
"w16cid": "http://schemas.microsoft.com/office/word/2016/wordml/cid",
|
|
32
|
+
"w16cex": "http://schemas.microsoft.com/office/word/2018/wordml/cex",
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
# XML template for comment content in comments.xml
|
|
36
|
+
COMMENT_XML = """\
|
|
37
|
+
<w:comment w:id="{id}" w:author="{author}" w:date="{date}" w:initials="{initials}">
|
|
38
|
+
<w:p w14:paraId="{para_id}" w14:textId="77777777">
|
|
39
|
+
<w:r>
|
|
40
|
+
<w:rPr><w:rStyle w:val="CommentReference"/></w:rPr>
|
|
41
|
+
<w:annotationRef/>
|
|
42
|
+
</w:r>
|
|
43
|
+
<w:r>
|
|
44
|
+
<w:rPr>
|
|
45
|
+
<w:color w:val="000000"/>
|
|
46
|
+
<w:sz w:val="20"/>
|
|
47
|
+
<w:szCs w:val="20"/>
|
|
48
|
+
</w:rPr>
|
|
49
|
+
<w:t>{text}</w:t>
|
|
50
|
+
</w:r>
|
|
51
|
+
</w:p>
|
|
52
|
+
</w:comment>"""
|
|
53
|
+
|
|
54
|
+
# Output templates for marker placement instructions
|
|
55
|
+
COMMENT_MARKER_TEMPLATE = """
|
|
56
|
+
Add to document.xml (markers must be direct children of w:p, never inside w:r):
|
|
57
|
+
<w:commentRangeStart w:id="{cid}"/>
|
|
58
|
+
<w:r>...</w:r>
|
|
59
|
+
<w:commentRangeEnd w:id="{cid}"/>
|
|
60
|
+
<w:r><w:rPr><w:rStyle w:val="CommentReference"/></w:rPr><w:commentReference w:id="{cid}"/></w:r>"""
|
|
61
|
+
|
|
62
|
+
REPLY_MARKER_TEMPLATE = """
|
|
63
|
+
Nest markers inside parent {pid}'s markers (markers must be direct children of w:p, never inside w:r):
|
|
64
|
+
<w:commentRangeStart w:id="{pid}"/><w:commentRangeStart w:id="{cid}"/>
|
|
65
|
+
<w:r>...</w:r>
|
|
66
|
+
<w:commentRangeEnd w:id="{cid}"/><w:commentRangeEnd w:id="{pid}"/>
|
|
67
|
+
<w:r><w:rPr><w:rStyle w:val="CommentReference"/></w:rPr><w:commentReference w:id="{pid}"/></w:r>
|
|
68
|
+
<w:r><w:rPr><w:rStyle w:val="CommentReference"/></w:rPr><w:commentReference w:id="{cid}"/></w:r>"""
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _generate_hex_id() -> str:
|
|
72
|
+
"""Random 8-char hex ID (satisfies paraId < 0x80000000, durableId < 0x7FFFFFFF)."""
|
|
73
|
+
return f"{random.randint(0, 0x7FFFFFFE):08X}"
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
# Smart quotes to re-encode after DOM serialization (DOM decodes entities to Unicode)
|
|
77
|
+
SMART_QUOTE_ENTITIES = {
|
|
78
|
+
"\u201c": "“", # Left double quote
|
|
79
|
+
"\u201d": "”", # Right double quote
|
|
80
|
+
"\u2018": "‘", # Left single quote
|
|
81
|
+
"\u2019": "’", # Right single quote
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _encode_smart_quotes(text: str) -> str:
|
|
86
|
+
"""Re-encode smart quotes as XML entities after DOM serialization."""
|
|
87
|
+
for char, entity in SMART_QUOTE_ENTITIES.items():
|
|
88
|
+
text = text.replace(char, entity)
|
|
89
|
+
return text
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _append_xml(xml_path: Path, root_tag: str, content: str) -> None:
|
|
93
|
+
"""Append content as child of root element."""
|
|
94
|
+
dom = defusedxml.minidom.parseString(xml_path.read_text(encoding="utf-8"))
|
|
95
|
+
root = dom.getElementsByTagName(root_tag)[0]
|
|
96
|
+
ns_attrs = " ".join(f'xmlns:{k}="{v}"' for k, v in NS.items())
|
|
97
|
+
wrapper_dom = defusedxml.minidom.parseString(f"<root {ns_attrs}>{content}</root>")
|
|
98
|
+
for child in wrapper_dom.documentElement.childNodes: # type: ignore
|
|
99
|
+
if child.nodeType == child.ELEMENT_NODE:
|
|
100
|
+
root.appendChild(dom.importNode(child, True))
|
|
101
|
+
# Re-encode smart quotes that DOM decoded to Unicode
|
|
102
|
+
output = _encode_smart_quotes(dom.toxml(encoding="UTF-8").decode("utf-8"))
|
|
103
|
+
xml_path.write_text(output, encoding="utf-8")
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _find_para_id(comments_path: Path, comment_id: int) -> str | None:
|
|
107
|
+
"""Find para_id for a comment ID."""
|
|
108
|
+
dom = defusedxml.minidom.parseString(comments_path.read_text(encoding="utf-8"))
|
|
109
|
+
for c in dom.getElementsByTagName("w:comment"):
|
|
110
|
+
if c.getAttribute("w:id") == str(comment_id):
|
|
111
|
+
for p in c.getElementsByTagName("w:p"):
|
|
112
|
+
if pid := p.getAttribute("w14:paraId"):
|
|
113
|
+
return pid
|
|
114
|
+
return None
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _get_next_rid(rels_path: Path) -> int:
|
|
118
|
+
"""Get the next available rId number from document.xml.rels."""
|
|
119
|
+
dom = defusedxml.minidom.parseString(rels_path.read_text(encoding="utf-8"))
|
|
120
|
+
max_rid = 0
|
|
121
|
+
for rel in dom.getElementsByTagName("Relationship"):
|
|
122
|
+
rid = rel.getAttribute("Id")
|
|
123
|
+
if rid and rid.startswith("rId"):
|
|
124
|
+
try:
|
|
125
|
+
max_rid = max(max_rid, int(rid[3:]))
|
|
126
|
+
except ValueError:
|
|
127
|
+
pass
|
|
128
|
+
return max_rid + 1
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _has_relationship(rels_path: Path, target: str) -> bool:
|
|
132
|
+
"""Check if a relationship with given target exists."""
|
|
133
|
+
dom = defusedxml.minidom.parseString(rels_path.read_text(encoding="utf-8"))
|
|
134
|
+
for rel in dom.getElementsByTagName("Relationship"):
|
|
135
|
+
if rel.getAttribute("Target") == target:
|
|
136
|
+
return True
|
|
137
|
+
return False
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _has_content_type(ct_path: Path, part_name: str) -> bool:
|
|
141
|
+
"""Check if a content type override with given part name exists."""
|
|
142
|
+
dom = defusedxml.minidom.parseString(ct_path.read_text(encoding="utf-8"))
|
|
143
|
+
for override in dom.getElementsByTagName("Override"):
|
|
144
|
+
if override.getAttribute("PartName") == part_name:
|
|
145
|
+
return True
|
|
146
|
+
return False
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _ensure_comment_relationships(unpacked_dir: Path) -> None:
|
|
150
|
+
"""Ensure word/_rels/document.xml.rels has comment relationships."""
|
|
151
|
+
rels_path = unpacked_dir / "word" / "_rels" / "document.xml.rels"
|
|
152
|
+
if not rels_path.exists():
|
|
153
|
+
return
|
|
154
|
+
|
|
155
|
+
if _has_relationship(rels_path, "comments.xml"):
|
|
156
|
+
return # Already has comment relationships
|
|
157
|
+
|
|
158
|
+
dom = defusedxml.minidom.parseString(rels_path.read_text(encoding="utf-8"))
|
|
159
|
+
root = dom.documentElement
|
|
160
|
+
next_rid = _get_next_rid(rels_path)
|
|
161
|
+
|
|
162
|
+
# Add relationship elements
|
|
163
|
+
rels = [
|
|
164
|
+
(
|
|
165
|
+
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments",
|
|
166
|
+
"comments.xml",
|
|
167
|
+
),
|
|
168
|
+
(
|
|
169
|
+
"http://schemas.microsoft.com/office/2011/relationships/commentsExtended",
|
|
170
|
+
"commentsExtended.xml",
|
|
171
|
+
),
|
|
172
|
+
(
|
|
173
|
+
"http://schemas.microsoft.com/office/2016/09/relationships/commentsIds",
|
|
174
|
+
"commentsIds.xml",
|
|
175
|
+
),
|
|
176
|
+
(
|
|
177
|
+
"http://schemas.microsoft.com/office/2018/08/relationships/commentsExtensible",
|
|
178
|
+
"commentsExtensible.xml",
|
|
179
|
+
),
|
|
180
|
+
]
|
|
181
|
+
|
|
182
|
+
for rel_type, target in rels:
|
|
183
|
+
rel = dom.createElement("Relationship")
|
|
184
|
+
rel.setAttribute("Id", f"rId{next_rid}")
|
|
185
|
+
rel.setAttribute("Type", rel_type)
|
|
186
|
+
rel.setAttribute("Target", target)
|
|
187
|
+
root.appendChild(rel) # type: ignore
|
|
188
|
+
next_rid += 1
|
|
189
|
+
|
|
190
|
+
rels_path.write_bytes(dom.toxml(encoding="UTF-8"))
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def _ensure_comment_content_types(unpacked_dir: Path) -> None:
|
|
194
|
+
"""Ensure [Content_Types].xml has comment content types."""
|
|
195
|
+
ct_path = unpacked_dir / "[Content_Types].xml"
|
|
196
|
+
if not ct_path.exists():
|
|
197
|
+
return
|
|
198
|
+
|
|
199
|
+
if _has_content_type(ct_path, "/word/comments.xml"):
|
|
200
|
+
return # Already has comment content types
|
|
201
|
+
|
|
202
|
+
dom = defusedxml.minidom.parseString(ct_path.read_text(encoding="utf-8"))
|
|
203
|
+
root = dom.documentElement
|
|
204
|
+
|
|
205
|
+
# Add Override elements
|
|
206
|
+
overrides = [
|
|
207
|
+
(
|
|
208
|
+
"/word/comments.xml",
|
|
209
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml",
|
|
210
|
+
),
|
|
211
|
+
(
|
|
212
|
+
"/word/commentsExtended.xml",
|
|
213
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtended+xml",
|
|
214
|
+
),
|
|
215
|
+
(
|
|
216
|
+
"/word/commentsIds.xml",
|
|
217
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.commentsIds+xml",
|
|
218
|
+
),
|
|
219
|
+
(
|
|
220
|
+
"/word/commentsExtensible.xml",
|
|
221
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtensible+xml",
|
|
222
|
+
),
|
|
223
|
+
]
|
|
224
|
+
|
|
225
|
+
for part_name, content_type in overrides:
|
|
226
|
+
override = dom.createElement("Override")
|
|
227
|
+
override.setAttribute("PartName", part_name)
|
|
228
|
+
override.setAttribute("ContentType", content_type)
|
|
229
|
+
root.appendChild(override) # type: ignore
|
|
230
|
+
|
|
231
|
+
ct_path.write_bytes(dom.toxml(encoding="UTF-8"))
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def add_comment(
|
|
235
|
+
unpacked_dir: str,
|
|
236
|
+
comment_id: int,
|
|
237
|
+
text: str,
|
|
238
|
+
author: str = "Costar",
|
|
239
|
+
initials: str = "C",
|
|
240
|
+
parent_id: int | None = None,
|
|
241
|
+
) -> tuple[str, str]:
|
|
242
|
+
"""Add comment to unpacked DOCX.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
text: Comment text, pre-escaped for XML (e.g., & ’).
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
(para_id, message) tuple.
|
|
249
|
+
"""
|
|
250
|
+
word = Path(unpacked_dir) / "word"
|
|
251
|
+
if not word.exists():
|
|
252
|
+
return "", f"Error: {word} not found"
|
|
253
|
+
|
|
254
|
+
para_id, durable_id = _generate_hex_id(), _generate_hex_id()
|
|
255
|
+
ts = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
256
|
+
|
|
257
|
+
# comments.xml
|
|
258
|
+
comments = word / "comments.xml"
|
|
259
|
+
first_comment = not comments.exists()
|
|
260
|
+
if first_comment:
|
|
261
|
+
shutil.copy(TEMPLATE_DIR / "comments.xml", comments)
|
|
262
|
+
# Add relationships and content types for comment files
|
|
263
|
+
_ensure_comment_relationships(Path(unpacked_dir))
|
|
264
|
+
_ensure_comment_content_types(Path(unpacked_dir))
|
|
265
|
+
_append_xml(
|
|
266
|
+
comments,
|
|
267
|
+
"w:comments",
|
|
268
|
+
COMMENT_XML.format(
|
|
269
|
+
id=comment_id,
|
|
270
|
+
author=author,
|
|
271
|
+
date=ts,
|
|
272
|
+
initials=initials,
|
|
273
|
+
para_id=para_id,
|
|
274
|
+
text=text, # Model provides pre-escaped XML content
|
|
275
|
+
),
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
# commentsExtended.xml
|
|
279
|
+
ext = word / "commentsExtended.xml"
|
|
280
|
+
if not ext.exists():
|
|
281
|
+
shutil.copy(TEMPLATE_DIR / "commentsExtended.xml", ext)
|
|
282
|
+
if parent_id is not None:
|
|
283
|
+
parent_para = _find_para_id(comments, parent_id)
|
|
284
|
+
if not parent_para:
|
|
285
|
+
return "", f"Error: Parent comment {parent_id} not found"
|
|
286
|
+
_append_xml(
|
|
287
|
+
ext,
|
|
288
|
+
"w15:commentsEx",
|
|
289
|
+
f'<w15:commentEx w15:paraId="{para_id}" w15:paraIdParent="{parent_para}" w15:done="0"/>',
|
|
290
|
+
)
|
|
291
|
+
else:
|
|
292
|
+
_append_xml(
|
|
293
|
+
ext,
|
|
294
|
+
"w15:commentsEx",
|
|
295
|
+
f'<w15:commentEx w15:paraId="{para_id}" w15:done="0"/>',
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
# commentsIds.xml
|
|
299
|
+
ids = word / "commentsIds.xml"
|
|
300
|
+
if not ids.exists():
|
|
301
|
+
shutil.copy(TEMPLATE_DIR / "commentsIds.xml", ids)
|
|
302
|
+
_append_xml(
|
|
303
|
+
ids,
|
|
304
|
+
"w16cid:commentsIds",
|
|
305
|
+
f'<w16cid:commentId w16cid:paraId="{para_id}" w16cid:durableId="{durable_id}"/>',
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
# commentsExtensible.xml
|
|
309
|
+
extensible = word / "commentsExtensible.xml"
|
|
310
|
+
if not extensible.exists():
|
|
311
|
+
shutil.copy(TEMPLATE_DIR / "commentsExtensible.xml", extensible)
|
|
312
|
+
_append_xml(
|
|
313
|
+
extensible,
|
|
314
|
+
"w16cex:commentsExtensible",
|
|
315
|
+
f'<w16cex:commentExtensible w16cex:durableId="{durable_id}" w16cex:dateUtc="{ts}"/>',
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
action = "reply" if parent_id is not None else "comment"
|
|
319
|
+
return para_id, f"Added {action} {comment_id} (para_id={para_id})"
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
if __name__ == "__main__":
|
|
323
|
+
p = argparse.ArgumentParser(description="Add comments to DOCX documents")
|
|
324
|
+
p.add_argument("unpacked_dir", help="Unpacked DOCX directory")
|
|
325
|
+
p.add_argument("comment_id", type=int, help="Comment ID (must be unique)")
|
|
326
|
+
p.add_argument("text", help="Comment text")
|
|
327
|
+
p.add_argument("--author", default="Costar", help="Author name")
|
|
328
|
+
p.add_argument("--initials", default="C", help="Author initials")
|
|
329
|
+
p.add_argument("--parent", type=int, help="Parent comment ID (for replies)")
|
|
330
|
+
args = p.parse_args()
|
|
331
|
+
|
|
332
|
+
para_id, msg = add_comment(
|
|
333
|
+
args.unpacked_dir,
|
|
334
|
+
args.comment_id,
|
|
335
|
+
args.text,
|
|
336
|
+
args.author,
|
|
337
|
+
args.initials,
|
|
338
|
+
args.parent,
|
|
339
|
+
)
|
|
340
|
+
print(msg)
|
|
341
|
+
if "Error" in msg:
|
|
342
|
+
sys.exit(1)
|
|
343
|
+
cid = args.comment_id
|
|
344
|
+
if args.parent is not None:
|
|
345
|
+
print(REPLY_MARKER_TEMPLATE.format(pid=args.parent, cid=cid))
|
|
346
|
+
else:
|
|
347
|
+
print(COMMENT_MARKER_TEMPLATE.format(cid=cid))
|
|
File without changes
|
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
"""Merge adjacent runs with identical formatting in DOCX.
|
|
2
|
+
|
|
3
|
+
Merges adjacent <w:r> elements that have identical <w:rPr> properties.
|
|
4
|
+
Works on runs in paragraphs and inside tracked changes (<w:ins>, <w:del>).
|
|
5
|
+
|
|
6
|
+
Also:
|
|
7
|
+
- Removes rsid attributes from runs (revision metadata that doesn't affect rendering)
|
|
8
|
+
- Removes proofErr elements (spell/grammar markers that block merging)
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
import defusedxml.minidom
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def merge_runs(input_dir: str) -> tuple[int, str]:
|
|
17
|
+
"""Merge adjacent runs in document.xml.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
input_dir: Path to unpacked DOCX directory
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
(merge_count, message)
|
|
24
|
+
"""
|
|
25
|
+
doc_xml = Path(input_dir) / "word" / "document.xml"
|
|
26
|
+
|
|
27
|
+
if not doc_xml.exists():
|
|
28
|
+
return 0, f"Error: {doc_xml} not found"
|
|
29
|
+
|
|
30
|
+
try:
|
|
31
|
+
dom = defusedxml.minidom.parseString(doc_xml.read_text(encoding="utf-8"))
|
|
32
|
+
root = dom.documentElement
|
|
33
|
+
|
|
34
|
+
# Clean up elements that block merging
|
|
35
|
+
_remove_elements(root, "proofErr")
|
|
36
|
+
_strip_run_rsid_attrs(root)
|
|
37
|
+
|
|
38
|
+
# Find all containers that have runs
|
|
39
|
+
containers = {run.parentNode for run in _find_elements(root, "r")}
|
|
40
|
+
|
|
41
|
+
# Merge runs in each container
|
|
42
|
+
merge_count = 0
|
|
43
|
+
for container in containers:
|
|
44
|
+
merge_count += _merge_runs_in(container)
|
|
45
|
+
|
|
46
|
+
doc_xml.write_bytes(dom.toxml(encoding="UTF-8"))
|
|
47
|
+
return merge_count, f"Merged {merge_count} runs"
|
|
48
|
+
|
|
49
|
+
except Exception as e:
|
|
50
|
+
return 0, f"Error: {e}"
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# --- Element helpers ---
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _find_elements(root, tag: str) -> list:
|
|
57
|
+
"""Find all elements matching tag name (with or without namespace)."""
|
|
58
|
+
results = []
|
|
59
|
+
|
|
60
|
+
def traverse(node):
|
|
61
|
+
if node.nodeType == node.ELEMENT_NODE:
|
|
62
|
+
name = node.localName or node.tagName
|
|
63
|
+
if name == tag or name.endswith(f":{tag}"):
|
|
64
|
+
results.append(node)
|
|
65
|
+
for child in node.childNodes:
|
|
66
|
+
traverse(child)
|
|
67
|
+
|
|
68
|
+
traverse(root)
|
|
69
|
+
return results
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _get_child(parent, tag: str):
|
|
73
|
+
"""Get first child element matching tag name."""
|
|
74
|
+
for child in parent.childNodes:
|
|
75
|
+
if child.nodeType == child.ELEMENT_NODE:
|
|
76
|
+
name = child.localName or child.tagName
|
|
77
|
+
if name == tag or name.endswith(f":{tag}"):
|
|
78
|
+
return child
|
|
79
|
+
return None
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _get_children(parent, tag: str) -> list:
|
|
83
|
+
"""Get all direct child elements matching tag name."""
|
|
84
|
+
results = []
|
|
85
|
+
for child in parent.childNodes:
|
|
86
|
+
if child.nodeType == child.ELEMENT_NODE:
|
|
87
|
+
name = child.localName or child.tagName
|
|
88
|
+
if name == tag or name.endswith(f":{tag}"):
|
|
89
|
+
results.append(child)
|
|
90
|
+
return results
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _is_adjacent(elem1, elem2) -> bool:
|
|
94
|
+
"""Check if two elements are adjacent (only whitespace between them)."""
|
|
95
|
+
node = elem1.nextSibling
|
|
96
|
+
while node:
|
|
97
|
+
if node == elem2:
|
|
98
|
+
return True
|
|
99
|
+
if node.nodeType == node.ELEMENT_NODE:
|
|
100
|
+
return False
|
|
101
|
+
if node.nodeType == node.TEXT_NODE and node.data.strip():
|
|
102
|
+
return False
|
|
103
|
+
node = node.nextSibling
|
|
104
|
+
return False
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
# --- Cleanup functions ---
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _remove_elements(root, tag: str):
|
|
111
|
+
"""Remove all elements matching tag name."""
|
|
112
|
+
for elem in _find_elements(root, tag):
|
|
113
|
+
if elem.parentNode:
|
|
114
|
+
elem.parentNode.removeChild(elem)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _strip_run_rsid_attrs(root):
|
|
118
|
+
"""Remove rsid attributes from all run elements."""
|
|
119
|
+
for run in _find_elements(root, "r"):
|
|
120
|
+
for attr in list(run.attributes.values()):
|
|
121
|
+
if "rsid" in attr.name.lower():
|
|
122
|
+
run.removeAttribute(attr.name)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
# --- Merge functions ---
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _merge_runs_in(container) -> int:
|
|
129
|
+
"""Merge adjacent runs with identical formatting in a container element."""
|
|
130
|
+
merge_count = 0
|
|
131
|
+
run = _first_child_run(container)
|
|
132
|
+
|
|
133
|
+
while run:
|
|
134
|
+
# Absorb adjacent runs with same formatting
|
|
135
|
+
while True:
|
|
136
|
+
next_elem = _next_element_sibling(run)
|
|
137
|
+
if next_elem and _is_run(next_elem) and _can_merge(run, next_elem):
|
|
138
|
+
_merge_run_content(run, next_elem)
|
|
139
|
+
container.removeChild(next_elem)
|
|
140
|
+
merge_count += 1
|
|
141
|
+
else:
|
|
142
|
+
break
|
|
143
|
+
|
|
144
|
+
_consolidate_text(run)
|
|
145
|
+
run = _next_sibling_run(run)
|
|
146
|
+
|
|
147
|
+
return merge_count
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _first_child_run(container):
|
|
151
|
+
"""Get the first run child of a container."""
|
|
152
|
+
for child in container.childNodes:
|
|
153
|
+
if child.nodeType == child.ELEMENT_NODE and _is_run(child):
|
|
154
|
+
return child
|
|
155
|
+
return None
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _next_element_sibling(node):
|
|
159
|
+
"""Get the next element sibling, skipping text/whitespace nodes."""
|
|
160
|
+
sibling = node.nextSibling
|
|
161
|
+
while sibling:
|
|
162
|
+
if sibling.nodeType == sibling.ELEMENT_NODE:
|
|
163
|
+
return sibling
|
|
164
|
+
sibling = sibling.nextSibling
|
|
165
|
+
return None
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _next_sibling_run(node):
|
|
169
|
+
"""Get the next sibling that is a run element."""
|
|
170
|
+
sibling = node.nextSibling
|
|
171
|
+
while sibling:
|
|
172
|
+
if sibling.nodeType == sibling.ELEMENT_NODE:
|
|
173
|
+
if _is_run(sibling):
|
|
174
|
+
return sibling
|
|
175
|
+
# Skip non-run elements (bookmarks, etc.) but keep looking
|
|
176
|
+
sibling = sibling.nextSibling
|
|
177
|
+
return None
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _is_run(node) -> bool:
|
|
181
|
+
"""Check if node is a run element."""
|
|
182
|
+
name = node.localName or node.tagName
|
|
183
|
+
return name == "r" or name.endswith(":r")
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _can_merge(run1, run2) -> bool:
|
|
187
|
+
"""Check if two runs have identical formatting."""
|
|
188
|
+
rpr1 = _get_child(run1, "rPr")
|
|
189
|
+
rpr2 = _get_child(run2, "rPr")
|
|
190
|
+
|
|
191
|
+
if (rpr1 is None) != (rpr2 is None):
|
|
192
|
+
return False
|
|
193
|
+
if rpr1 is None:
|
|
194
|
+
return True
|
|
195
|
+
return rpr1.toxml() == rpr2.toxml() # type: ignore
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def _merge_run_content(target, source):
|
|
199
|
+
"""Move content from source run to target run (excluding rPr)."""
|
|
200
|
+
for child in list(source.childNodes):
|
|
201
|
+
if child.nodeType == child.ELEMENT_NODE:
|
|
202
|
+
name = child.localName or child.tagName
|
|
203
|
+
if name != "rPr" and not name.endswith(":rPr"):
|
|
204
|
+
target.appendChild(child)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def _consolidate_text(run):
|
|
208
|
+
"""Merge adjacent <w:t> elements within a run."""
|
|
209
|
+
t_elements = _get_children(run, "t")
|
|
210
|
+
|
|
211
|
+
# Work backwards to safely remove elements
|
|
212
|
+
for i in range(len(t_elements) - 1, 0, -1):
|
|
213
|
+
curr, prev = t_elements[i], t_elements[i - 1]
|
|
214
|
+
|
|
215
|
+
if _is_adjacent(prev, curr):
|
|
216
|
+
prev_text = prev.firstChild.data if prev.firstChild else ""
|
|
217
|
+
curr_text = curr.firstChild.data if curr.firstChild else ""
|
|
218
|
+
merged = prev_text + curr_text
|
|
219
|
+
|
|
220
|
+
if prev.firstChild:
|
|
221
|
+
prev.firstChild.data = merged
|
|
222
|
+
else:
|
|
223
|
+
prev.appendChild(run.ownerDocument.createTextNode(merged))
|
|
224
|
+
|
|
225
|
+
# Preserve whitespace if needed
|
|
226
|
+
if merged.startswith(" ") or merged.endswith(" "):
|
|
227
|
+
prev.setAttribute("xml:space", "preserve")
|
|
228
|
+
elif prev.hasAttribute("xml:space"):
|
|
229
|
+
prev.removeAttribute("xml:space")
|
|
230
|
+
|
|
231
|
+
run.removeChild(curr)
|