@farazirfan/costar-server-executor 1.7.37 → 1.7.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (253) hide show
  1. package/dist/agent/agent.d.ts +90 -0
  2. package/dist/agent/agent.d.ts.map +1 -1
  3. package/dist/agent/agent.js +606 -0
  4. package/dist/agent/agent.js.map +1 -1
  5. package/dist/agent/pi-embedded-runner/run.d.ts.map +1 -1
  6. package/dist/agent/pi-embedded-runner/run.js +2 -1
  7. package/dist/agent/pi-embedded-runner/run.js.map +1 -1
  8. package/dist/agent/pi-embedded-runner/system-prompt.d.ts.map +1 -1
  9. package/dist/agent/pi-embedded-runner/system-prompt.js +16 -37
  10. package/dist/agent/pi-embedded-runner/system-prompt.js.map +1 -1
  11. package/dist/agent/pi-embedded-runner/tools.d.ts +4 -1
  12. package/dist/agent/pi-embedded-runner/tools.d.ts.map +1 -1
  13. package/dist/agent/pi-embedded-runner/tools.js +3 -1
  14. package/dist/agent/pi-embedded-runner/tools.js.map +1 -1
  15. package/dist/agent/pi-embedded-runner/types.d.ts +4 -0
  16. package/dist/agent/pi-embedded-runner/types.d.ts.map +1 -1
  17. package/dist/cli/env-loader.d.ts.map +1 -1
  18. package/dist/cli/env-loader.js +1 -0
  19. package/dist/cli/env-loader.js.map +1 -1
  20. package/dist/cli/setup.js +2 -2
  21. package/dist/cli/setup.js.map +1 -1
  22. package/dist/cron/normalize.d.ts +31 -0
  23. package/dist/cron/normalize.d.ts.map +1 -0
  24. package/dist/cron/normalize.js +211 -0
  25. package/dist/cron/normalize.js.map +1 -0
  26. package/dist/cron/scheduler.d.ts +33 -3
  27. package/dist/cron/scheduler.d.ts.map +1 -1
  28. package/dist/cron/scheduler.js +253 -48
  29. package/dist/cron/scheduler.js.map +1 -1
  30. package/dist/heartbeat/runner.d.ts +27 -12
  31. package/dist/heartbeat/runner.d.ts.map +1 -1
  32. package/dist/heartbeat/runner.js +82 -104
  33. package/dist/heartbeat/runner.js.map +1 -1
  34. package/dist/infra/heartbeat-events-filter.d.ts +29 -0
  35. package/dist/infra/heartbeat-events-filter.d.ts.map +1 -0
  36. package/dist/infra/heartbeat-events-filter.js +80 -0
  37. package/dist/infra/heartbeat-events-filter.js.map +1 -0
  38. package/dist/infra/index.d.ts +9 -0
  39. package/dist/infra/index.d.ts.map +1 -0
  40. package/dist/infra/index.js +9 -0
  41. package/dist/infra/index.js.map +1 -0
  42. package/dist/infra/system-events.d.ts +58 -2
  43. package/dist/infra/system-events.d.ts.map +1 -1
  44. package/dist/infra/system-events.js +80 -14
  45. package/dist/infra/system-events.js.map +1 -1
  46. package/dist/server.d.ts.map +1 -1
  47. package/dist/server.js +6 -1
  48. package/dist/server.js.map +1 -1
  49. package/dist/services/platform-keys.d.ts +19 -0
  50. package/dist/services/platform-keys.d.ts.map +1 -0
  51. package/dist/services/platform-keys.js +74 -0
  52. package/dist/services/platform-keys.js.map +1 -0
  53. package/dist/subagent/registry.d.ts +96 -0
  54. package/dist/subagent/registry.d.ts.map +1 -0
  55. package/dist/subagent/registry.js +180 -0
  56. package/dist/subagent/registry.js.map +1 -0
  57. package/dist/tools/complete-turn.d.ts +2 -2
  58. package/dist/tools/complete-turn.js +10 -10
  59. package/dist/tools/complete-turn.js.map +1 -1
  60. package/dist/tools/contacts.d.ts +13 -0
  61. package/dist/tools/contacts.d.ts.map +1 -0
  62. package/dist/tools/contacts.js +80 -0
  63. package/dist/tools/contacts.js.map +1 -0
  64. package/dist/tools/cron.d.ts +17 -2
  65. package/dist/tools/cron.d.ts.map +1 -1
  66. package/dist/tools/cron.js +117 -35
  67. package/dist/tools/cron.js.map +1 -1
  68. package/dist/tools/google-maps.d.ts +6 -6
  69. package/dist/tools/google-maps.d.ts.map +1 -1
  70. package/dist/tools/google-maps.js +207 -262
  71. package/dist/tools/google-maps.js.map +1 -1
  72. package/dist/tools/index.d.ts +17 -7
  73. package/dist/tools/index.d.ts.map +1 -1
  74. package/dist/tools/index.js +40 -9
  75. package/dist/tools/index.js.map +1 -1
  76. package/dist/tools/phone-call.d.ts +11 -0
  77. package/dist/tools/phone-call.d.ts.map +1 -0
  78. package/dist/tools/phone-call.js +151 -0
  79. package/dist/tools/phone-call.js.map +1 -0
  80. package/dist/tools/sessions-spawn.d.ts +33 -0
  81. package/dist/tools/sessions-spawn.d.ts.map +1 -0
  82. package/dist/tools/sessions-spawn.js +164 -0
  83. package/dist/tools/sessions-spawn.js.map +1 -0
  84. package/dist/tools/spotify.d.ts +12 -0
  85. package/dist/tools/spotify.d.ts.map +1 -0
  86. package/dist/tools/spotify.js +251 -0
  87. package/dist/tools/spotify.js.map +1 -0
  88. package/dist/tools/subagents.d.ts +23 -0
  89. package/dist/tools/subagents.d.ts.map +1 -0
  90. package/dist/tools/subagents.js +209 -0
  91. package/dist/tools/subagents.js.map +1 -0
  92. package/dist/tools/whatsapp.d.ts +13 -0
  93. package/dist/tools/whatsapp.d.ts.map +1 -0
  94. package/dist/tools/whatsapp.js +215 -0
  95. package/dist/tools/whatsapp.js.map +1 -0
  96. package/dist/tools/youtube.d.ts +12 -0
  97. package/dist/tools/youtube.d.ts.map +1 -0
  98. package/dist/tools/youtube.js +218 -0
  99. package/dist/tools/youtube.js.map +1 -0
  100. package/dist/utils/asterizk-auth.d.ts +43 -0
  101. package/dist/utils/asterizk-auth.d.ts.map +1 -0
  102. package/dist/utils/asterizk-auth.js +125 -0
  103. package/dist/utils/asterizk-auth.js.map +1 -0
  104. package/dist/web-server.d.ts.map +1 -1
  105. package/dist/web-server.js +132 -0
  106. package/dist/web-server.js.map +1 -1
  107. package/dist/workspace/index.d.ts +3 -4
  108. package/dist/workspace/index.d.ts.map +1 -1
  109. package/dist/workspace/index.js +3 -4
  110. package/dist/workspace/index.js.map +1 -1
  111. package/dist/workspace/templates.d.ts +8 -7
  112. package/dist/workspace/templates.d.ts.map +1 -1
  113. package/dist/workspace/templates.js +18 -127
  114. package/dist/workspace/templates.js.map +1 -1
  115. package/dist/workspace/workspace.d.ts +2 -4
  116. package/dist/workspace/workspace.d.ts.map +1 -1
  117. package/dist/workspace/workspace.js +7 -16
  118. package/dist/workspace/workspace.js.map +1 -1
  119. package/package.json +1 -1
  120. package/public/index.html +231 -0
  121. package/skills/docx/SKILL.md +468 -0
  122. package/skills/docx/scripts/__init__.py +1 -0
  123. package/skills/docx/scripts/accept_changes.py +181 -0
  124. package/skills/docx/scripts/comment.py +347 -0
  125. package/skills/docx/scripts/helpers/__init__.py +0 -0
  126. package/skills/docx/scripts/helpers/merge_runs.py +231 -0
  127. package/skills/docx/scripts/helpers/simplify_redlines.py +240 -0
  128. package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
  129. package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
  130. package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
  131. package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
  132. package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
  133. package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
  134. package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
  135. package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
  136. package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
  137. package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
  138. package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
  139. package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
  140. package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
  141. package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
  142. package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
  143. package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
  144. package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
  145. package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
  146. package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
  147. package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
  148. package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
  149. package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
  150. package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
  151. package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
  152. package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
  153. package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
  154. package/skills/docx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
  155. package/skills/docx/scripts/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
  156. package/skills/docx/scripts/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
  157. package/skills/docx/scripts/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
  158. package/skills/docx/scripts/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
  159. package/skills/docx/scripts/ooxml/schemas/mce/mc.xsd +75 -0
  160. package/skills/docx/scripts/ooxml/schemas/microsoft/wml-2010.xsd +560 -0
  161. package/skills/docx/scripts/ooxml/schemas/microsoft/wml-2012.xsd +67 -0
  162. package/skills/docx/scripts/ooxml/schemas/microsoft/wml-2018.xsd +14 -0
  163. package/skills/docx/scripts/ooxml/schemas/microsoft/wml-cex-2018.xsd +20 -0
  164. package/skills/docx/scripts/ooxml/schemas/microsoft/wml-cid-2016.xsd +13 -0
  165. package/skills/docx/scripts/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
  166. package/skills/docx/scripts/ooxml/schemas/microsoft/wml-symex-2015.xsd +8 -0
  167. package/skills/docx/scripts/ooxml/scripts/pack.py +159 -0
  168. package/skills/docx/scripts/ooxml/scripts/unpack.py +29 -0
  169. package/skills/docx/scripts/ooxml/scripts/validate.py +106 -0
  170. package/skills/docx/scripts/ooxml/scripts/validation/__init__.py +15 -0
  171. package/skills/docx/scripts/ooxml/scripts/validation/base.py +1023 -0
  172. package/skills/docx/scripts/ooxml/scripts/validation/docx.py +519 -0
  173. package/skills/docx/scripts/ooxml/scripts/validation/pptx.py +315 -0
  174. package/skills/docx/scripts/ooxml/scripts/validation/redlining.py +284 -0
  175. package/skills/docx/scripts/pack.py +166 -0
  176. package/skills/docx/scripts/templates/comments.xml +3 -0
  177. package/skills/docx/scripts/templates/commentsExtended.xml +3 -0
  178. package/skills/docx/scripts/templates/commentsExtensible.xml +3 -0
  179. package/skills/docx/scripts/templates/commentsIds.xml +3 -0
  180. package/skills/docx/scripts/templates/people.xml +3 -0
  181. package/skills/docx/scripts/unpack.py +134 -0
  182. package/skills/longform-video-generation/SKILL.md +298 -0
  183. package/skills/longform-video-generation/references/advanced_techniques.md +474 -0
  184. package/skills/longform-video-generation/references/google_api_guide.md +288 -0
  185. package/skills/longform-video-generation/scripts/video_generator.py +579 -0
  186. package/skills/pdf/FORMS.md +305 -0
  187. package/skills/pdf/REFERENCE.md +612 -0
  188. package/skills/pdf/SKILL.md +293 -0
  189. package/skills/pdf/scripts/check_bounding_boxes.py +70 -0
  190. package/skills/pdf/scripts/check_fillable_fields.py +12 -0
  191. package/skills/pdf/scripts/convert_pdf_to_images.py +35 -0
  192. package/skills/pdf/scripts/create_validation_image.py +41 -0
  193. package/skills/pdf/scripts/extract_form_field_info.py +152 -0
  194. package/skills/pdf/scripts/extract_form_structure.py +124 -0
  195. package/skills/pdf/scripts/fill_fillable_fields.py +116 -0
  196. package/skills/pdf/scripts/fill_pdf_form_with_annotations.py +136 -0
  197. package/skills/pptx/SKILL.md +171 -0
  198. package/skills/pptx/editing.md +205 -0
  199. package/skills/pptx/pptxgenjs.md +377 -0
  200. package/skills/pptx/scripts/add_slide.py +225 -0
  201. package/skills/pptx/scripts/clean.py +309 -0
  202. package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
  203. package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
  204. package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
  205. package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
  206. package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
  207. package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
  208. package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
  209. package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
  210. package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
  211. package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
  212. package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
  213. package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
  214. package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
  215. package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
  216. package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
  217. package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
  218. package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
  219. package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
  220. package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
  221. package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
  222. package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
  223. package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
  224. package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
  225. package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
  226. package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
  227. package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
  228. package/skills/pptx/scripts/ooxml/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
  229. package/skills/pptx/scripts/ooxml/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
  230. package/skills/pptx/scripts/ooxml/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
  231. package/skills/pptx/scripts/ooxml/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
  232. package/skills/pptx/scripts/ooxml/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
  233. package/skills/pptx/scripts/ooxml/schemas/mce/mc.xsd +75 -0
  234. package/skills/pptx/scripts/ooxml/schemas/microsoft/wml-2010.xsd +560 -0
  235. package/skills/pptx/scripts/ooxml/schemas/microsoft/wml-2012.xsd +67 -0
  236. package/skills/pptx/scripts/ooxml/schemas/microsoft/wml-2018.xsd +14 -0
  237. package/skills/pptx/scripts/ooxml/schemas/microsoft/wml-cex-2018.xsd +20 -0
  238. package/skills/pptx/scripts/ooxml/schemas/microsoft/wml-cid-2016.xsd +13 -0
  239. package/skills/pptx/scripts/ooxml/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
  240. package/skills/pptx/scripts/ooxml/schemas/microsoft/wml-symex-2015.xsd +8 -0
  241. package/skills/pptx/scripts/ooxml/scripts/pack.py +159 -0
  242. package/skills/pptx/scripts/ooxml/scripts/unpack.py +29 -0
  243. package/skills/pptx/scripts/ooxml/scripts/validate.py +106 -0
  244. package/skills/pptx/scripts/ooxml/scripts/validation/__init__.py +15 -0
  245. package/skills/pptx/scripts/ooxml/scripts/validation/base.py +1023 -0
  246. package/skills/pptx/scripts/ooxml/scripts/validation/docx.py +519 -0
  247. package/skills/pptx/scripts/ooxml/scripts/validation/pptx.py +315 -0
  248. package/skills/pptx/scripts/ooxml/scripts/validation/redlining.py +284 -0
  249. package/skills/pptx/scripts/pack.py +168 -0
  250. package/skills/pptx/scripts/thumbnail.py +318 -0
  251. package/skills/pptx/scripts/unpack.py +86 -0
  252. package/skills/xlsx/SKILL.md +291 -0
  253. package/skills/xlsx/recalc.py +247 -0
@@ -0,0 +1,519 @@
1
+ """
2
+ Validator for Word document XML files against XSD schemas.
3
+ """
4
+
5
+ import random
6
+ import re
7
+ import tempfile
8
+ import zipfile
9
+
10
+ import defusedxml.minidom
11
+ import lxml.etree
12
+
13
+ from .base import BaseSchemaValidator
14
+
15
+
16
+ class DOCXSchemaValidator(BaseSchemaValidator):
17
+ """Validator for Word document XML files against XSD schemas."""
18
+
19
+ # Word-specific namespaces
20
+ WORD_2006_NAMESPACE = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
21
+ W14_NAMESPACE = "http://schemas.microsoft.com/office/word/2010/wordml"
22
+ W16CID_NAMESPACE = "http://schemas.microsoft.com/office/word/2016/wordml/cid"
23
+
24
+ # Word-specific element to relationship type mappings
25
+ # Start with empty mapping - add specific cases as we discover them
26
+ ELEMENT_RELATIONSHIP_TYPES = {}
27
+
28
+ def validate(self):
29
+ """Run all validation checks and return True if all pass."""
30
+ # Test 0: XML well-formedness
31
+ if not self.validate_xml():
32
+ return False
33
+
34
+ # Test 1: Namespace declarations
35
+ all_valid = True
36
+ if not self.validate_namespaces():
37
+ all_valid = False
38
+
39
+ # Test 2: Unique IDs
40
+ if not self.validate_unique_ids():
41
+ all_valid = False
42
+
43
+ # Test 3: Relationship and file reference validation
44
+ if not self.validate_file_references():
45
+ all_valid = False
46
+
47
+ # Test 4: Content type declarations
48
+ if not self.validate_content_types():
49
+ all_valid = False
50
+
51
+ # Test 5: XSD schema validation
52
+ if not self.validate_against_xsd():
53
+ all_valid = False
54
+
55
+ # Test 6: Whitespace preservation
56
+ if not self.validate_whitespace_preservation():
57
+ all_valid = False
58
+
59
+ # Test 7: Deletion validation
60
+ if not self.validate_deletions():
61
+ all_valid = False
62
+
63
+ # Test 8: Insertion validation
64
+ if not self.validate_insertions():
65
+ all_valid = False
66
+
67
+ # Test 9: Relationship ID reference validation
68
+ if not self.validate_all_relationship_ids():
69
+ all_valid = False
70
+
71
+ # Test 10: ID constraints (paraId, durableId)
72
+ if not self.validate_id_constraints():
73
+ all_valid = False
74
+
75
+ # Test 11: Comment marker validation
76
+ if not self.validate_comment_markers():
77
+ all_valid = False
78
+
79
+ # Count and compare paragraphs
80
+ self.compare_paragraph_counts()
81
+
82
+ return all_valid
83
+
84
+ def validate_whitespace_preservation(self):
85
+ """
86
+ Validate that w:t elements with whitespace have xml:space='preserve'.
87
+ """
88
+ errors = []
89
+
90
+ for xml_file in self.xml_files:
91
+ # Only check document.xml files
92
+ if xml_file.name != "document.xml":
93
+ continue
94
+
95
+ try:
96
+ root = lxml.etree.parse(str(xml_file)).getroot()
97
+
98
+ # Find all w:t elements
99
+ for elem in root.iter(f"{{{self.WORD_2006_NAMESPACE}}}t"):
100
+ if elem.text:
101
+ text = elem.text
102
+ # Check if text starts or ends with whitespace
103
+ if re.match(r"^\s.*", text) or re.match(r".*\s$", text):
104
+ # Check if xml:space="preserve" attribute exists
105
+ xml_space_attr = f"{{{self.XML_NAMESPACE}}}space"
106
+ if (
107
+ xml_space_attr not in elem.attrib
108
+ or elem.attrib[xml_space_attr] != "preserve"
109
+ ):
110
+ # Show a preview of the text
111
+ text_preview = (
112
+ repr(text)[:50] + "..."
113
+ if len(repr(text)) > 50
114
+ else repr(text)
115
+ )
116
+ errors.append(
117
+ f" {xml_file.relative_to(self.unpacked_dir)}: "
118
+ f"Line {elem.sourceline}: w:t element with whitespace missing xml:space='preserve': {text_preview}"
119
+ )
120
+
121
+ except (lxml.etree.XMLSyntaxError, Exception) as e:
122
+ errors.append(
123
+ f" {xml_file.relative_to(self.unpacked_dir)}: Error: {e}"
124
+ )
125
+
126
+ if errors:
127
+ print(f"FAILED - Found {len(errors)} whitespace preservation violations:")
128
+ for error in errors:
129
+ print(error)
130
+ return False
131
+ else:
132
+ if self.verbose:
133
+ print("PASSED - All whitespace is properly preserved")
134
+ return True
135
+
136
+ def validate_deletions(self):
137
+ """
138
+ Validate that w:t and w:instrText elements are not within w:del elements.
139
+ Inside w:del, use w:delText and w:delInstrText instead.
140
+ XSD validation does not catch this, so we do it manually.
141
+ """
142
+ errors = []
143
+
144
+ for xml_file in self.xml_files:
145
+ # Only check document.xml files
146
+ if xml_file.name != "document.xml":
147
+ continue
148
+
149
+ try:
150
+ root = lxml.etree.parse(str(xml_file)).getroot()
151
+ namespaces = {"w": self.WORD_2006_NAMESPACE}
152
+
153
+ # Find all w:t elements that are descendants of w:del elements
154
+ for t_elem in root.xpath(".//w:del//w:t", namespaces=namespaces):
155
+ if t_elem.text:
156
+ # Show a preview of the text
157
+ text_preview = (
158
+ repr(t_elem.text)[:50] + "..."
159
+ if len(repr(t_elem.text)) > 50
160
+ else repr(t_elem.text)
161
+ )
162
+ errors.append(
163
+ f" {xml_file.relative_to(self.unpacked_dir)}: "
164
+ f"Line {t_elem.sourceline}: <w:t> found within <w:del>: {text_preview}"
165
+ )
166
+
167
+ # Find all w:instrText elements that are descendants of w:del elements
168
+ # These should be w:delInstrText instead
169
+ for instr_elem in root.xpath(".//w:del//w:instrText", namespaces=namespaces):
170
+ text_preview = (
171
+ repr(instr_elem.text or "")[:50] + "..."
172
+ if len(repr(instr_elem.text or "")) > 50
173
+ else repr(instr_elem.text or "")
174
+ )
175
+ errors.append(
176
+ f" {xml_file.relative_to(self.unpacked_dir)}: "
177
+ f"Line {instr_elem.sourceline}: <w:instrText> found within <w:del> (use <w:delInstrText>): {text_preview}"
178
+ )
179
+
180
+ except (lxml.etree.XMLSyntaxError, Exception) as e:
181
+ errors.append(
182
+ f" {xml_file.relative_to(self.unpacked_dir)}: Error: {e}"
183
+ )
184
+
185
+ if errors:
186
+ print(f"FAILED - Found {len(errors)} deletion validation violations:")
187
+ for error in errors:
188
+ print(error)
189
+ return False
190
+ else:
191
+ if self.verbose:
192
+ print("PASSED - No w:t elements found within w:del elements")
193
+ return True
194
+
195
+ def count_paragraphs_in_unpacked(self):
196
+ """Count the number of paragraphs in the unpacked document."""
197
+ count = 0
198
+
199
+ for xml_file in self.xml_files:
200
+ # Only check document.xml files
201
+ if xml_file.name != "document.xml":
202
+ continue
203
+
204
+ try:
205
+ root = lxml.etree.parse(str(xml_file)).getroot()
206
+ # Count all w:p elements
207
+ paragraphs = root.findall(f".//{{{self.WORD_2006_NAMESPACE}}}p")
208
+ count = len(paragraphs)
209
+ except Exception as e:
210
+ print(f"Error counting paragraphs in unpacked document: {e}")
211
+
212
+ return count
213
+
214
+ def count_paragraphs_in_original(self):
215
+ """Count the number of paragraphs in the original docx file."""
216
+ count = 0
217
+
218
+ try:
219
+ # Create temporary directory to unpack original
220
+ with tempfile.TemporaryDirectory() as temp_dir:
221
+ # Unpack original docx
222
+ with zipfile.ZipFile(self.original_file, "r") as zip_ref:
223
+ zip_ref.extractall(temp_dir)
224
+
225
+ # Parse document.xml
226
+ doc_xml_path = temp_dir + "/word/document.xml"
227
+ root = lxml.etree.parse(doc_xml_path).getroot()
228
+
229
+ # Count all w:p elements
230
+ paragraphs = root.findall(f".//{{{self.WORD_2006_NAMESPACE}}}p")
231
+ count = len(paragraphs)
232
+
233
+ except Exception as e:
234
+ print(f"Error counting paragraphs in original document: {e}")
235
+
236
+ return count
237
+
238
+ def validate_insertions(self):
239
+ """
240
+ Validate that w:delText elements are not within w:ins elements.
241
+ w:delText is only allowed in w:ins if nested within a w:del.
242
+ """
243
+ errors = []
244
+
245
+ for xml_file in self.xml_files:
246
+ if xml_file.name != "document.xml":
247
+ continue
248
+
249
+ try:
250
+ root = lxml.etree.parse(str(xml_file)).getroot()
251
+ namespaces = {"w": self.WORD_2006_NAMESPACE}
252
+
253
+ # Find w:delText in w:ins that are NOT within w:del
254
+ invalid_elements = root.xpath(
255
+ ".//w:ins//w:delText[not(ancestor::w:del)]", namespaces=namespaces
256
+ )
257
+
258
+ for elem in invalid_elements:
259
+ text_preview = (
260
+ repr(elem.text or "")[:50] + "..."
261
+ if len(repr(elem.text or "")) > 50
262
+ else repr(elem.text or "")
263
+ )
264
+ errors.append(
265
+ f" {xml_file.relative_to(self.unpacked_dir)}: "
266
+ f"Line {elem.sourceline}: <w:delText> within <w:ins>: {text_preview}"
267
+ )
268
+
269
+ except (lxml.etree.XMLSyntaxError, Exception) as e:
270
+ errors.append(
271
+ f" {xml_file.relative_to(self.unpacked_dir)}: Error: {e}"
272
+ )
273
+
274
+ if errors:
275
+ print(f"FAILED - Found {len(errors)} insertion validation violations:")
276
+ for error in errors:
277
+ print(error)
278
+ return False
279
+ else:
280
+ if self.verbose:
281
+ print("PASSED - No w:delText elements within w:ins elements")
282
+ return True
283
+
284
+ def compare_paragraph_counts(self):
285
+ """Compare paragraph counts between original and new document."""
286
+ original_count = self.count_paragraphs_in_original()
287
+ new_count = self.count_paragraphs_in_unpacked()
288
+
289
+ diff = new_count - original_count
290
+ diff_str = f"+{diff}" if diff > 0 else str(diff)
291
+ print(f"\nParagraphs: {original_count} → {new_count} ({diff_str})")
292
+
293
+ def _parse_id_value(self, val: str, base: int = 16) -> int:
294
+ """Parse an ID value as hex (base=16) or decimal (base=10).
295
+
296
+ Args:
297
+ val: The string value to parse
298
+ base: The numeric base (16 for hex, 10 for decimal)
299
+
300
+ Returns:
301
+ The parsed integer value
302
+ """
303
+ return int(val, base)
304
+
305
+ def validate_id_constraints(self):
306
+ """Validate paraId and durableId values per OOXML spec.
307
+
308
+ Checks:
309
+ - paraId < 0x80000000 (always hex)
310
+ - durableId < 0x7FFFFFFF (decimal in numbering.xml, hex elsewhere)
311
+ """
312
+ errors = []
313
+ para_id_attr = f"{{{self.W14_NAMESPACE}}}paraId"
314
+ durable_id_attr = f"{{{self.W16CID_NAMESPACE}}}durableId"
315
+
316
+ for xml_file in self.xml_files:
317
+ try:
318
+ for elem in lxml.etree.parse(str(xml_file)).iter():
319
+ # paraId is always hex format
320
+ if val := elem.get(para_id_attr):
321
+ if self._parse_id_value(val, base=16) >= 0x80000000:
322
+ errors.append(
323
+ f" {xml_file.name}:{elem.sourceline}: paraId={val} >= 0x80000000"
324
+ )
325
+
326
+ if val := elem.get(durable_id_attr):
327
+ # durableId in numbering.xml must be decimal.
328
+ # Word rejects hex-formatted durableIds in numbering.xml.
329
+ if xml_file.name == "numbering.xml":
330
+ try:
331
+ if self._parse_id_value(val, base=10) >= 0x7FFFFFFF:
332
+ errors.append(
333
+ f" {xml_file.name}:{elem.sourceline}: "
334
+ f"durableId={val} >= 0x7FFFFFFF"
335
+ )
336
+ except ValueError:
337
+ # Contains non-decimal characters (e.g., hex letters A-F)
338
+ errors.append(
339
+ f" {xml_file.name}:{elem.sourceline}: "
340
+ f"durableId={val} must be decimal in numbering.xml"
341
+ )
342
+ # durableId in other files (e.g. commentsIds.xml) uses hex format
343
+ else:
344
+ if self._parse_id_value(val, base=16) >= 0x7FFFFFFF:
345
+ errors.append(
346
+ f" {xml_file.name}:{elem.sourceline}: "
347
+ f"durableId={val} >= 0x7FFFFFFF"
348
+ )
349
+ except Exception:
350
+ pass
351
+
352
+ if errors:
353
+ print(f"FAILED - {len(errors)} ID constraint violations:")
354
+ for e in errors:
355
+ print(e)
356
+ elif self.verbose:
357
+ print("PASSED - All paraId/durableId values within constraints")
358
+ return not errors
359
+
360
+ def validate_comment_markers(self):
361
+ """Validate comment markers are properly paired and reference existing comments.
362
+
363
+ Checks:
364
+ - Every commentRangeStart has a matching commentRangeEnd
365
+ - Every commentRangeEnd has a matching commentRangeStart
366
+ - Every marker in document.xml references an existing comment
367
+ """
368
+ errors = []
369
+
370
+ # Find document.xml and comments.xml
371
+ document_xml = None
372
+ comments_xml = None
373
+ for xml_file in self.xml_files:
374
+ if xml_file.name == "document.xml" and "word" in str(xml_file):
375
+ document_xml = xml_file
376
+ elif xml_file.name == "comments.xml":
377
+ comments_xml = xml_file
378
+
379
+ if not document_xml:
380
+ if self.verbose:
381
+ print("PASSED - No document.xml found (skipping comment validation)")
382
+ return True
383
+
384
+ try:
385
+ doc_root = lxml.etree.parse(str(document_xml)).getroot()
386
+ namespaces = {"w": self.WORD_2006_NAMESPACE}
387
+
388
+ # Collect all comment marker IDs from document.xml
389
+ range_starts = {
390
+ elem.get(f"{{{self.WORD_2006_NAMESPACE}}}id")
391
+ for elem in doc_root.xpath(".//w:commentRangeStart", namespaces=namespaces)
392
+ }
393
+ range_ends = {
394
+ elem.get(f"{{{self.WORD_2006_NAMESPACE}}}id")
395
+ for elem in doc_root.xpath(".//w:commentRangeEnd", namespaces=namespaces)
396
+ }
397
+ references = {
398
+ elem.get(f"{{{self.WORD_2006_NAMESPACE}}}id")
399
+ for elem in doc_root.xpath(".//w:commentReference", namespaces=namespaces)
400
+ }
401
+
402
+ # Check for orphaned commentRangeEnd (missing commentRangeStart)
403
+ orphaned_ends = range_ends - range_starts
404
+ for comment_id in sorted(orphaned_ends, key=lambda x: int(x) if x and x.isdigit() else 0):
405
+ errors.append(
406
+ f" document.xml: commentRangeEnd id=\"{comment_id}\" has no matching commentRangeStart"
407
+ )
408
+
409
+ # Check for orphaned commentRangeStart (missing commentRangeEnd)
410
+ orphaned_starts = range_starts - range_ends
411
+ for comment_id in sorted(orphaned_starts, key=lambda x: int(x) if x and x.isdigit() else 0):
412
+ errors.append(
413
+ f" document.xml: commentRangeStart id=\"{comment_id}\" has no matching commentRangeEnd"
414
+ )
415
+
416
+ # Get comment IDs from comments.xml if it exists
417
+ comment_ids = set()
418
+ if comments_xml and comments_xml.exists():
419
+ comments_root = lxml.etree.parse(str(comments_xml)).getroot()
420
+ comment_ids = {
421
+ elem.get(f"{{{self.WORD_2006_NAMESPACE}}}id")
422
+ for elem in comments_root.xpath(".//w:comment", namespaces=namespaces)
423
+ }
424
+
425
+ # Check for markers referencing non-existent comments
426
+ marker_ids = range_starts | range_ends | references
427
+ invalid_refs = marker_ids - comment_ids
428
+ for comment_id in sorted(invalid_refs, key=lambda x: int(x) if x and x.isdigit() else 0):
429
+ if comment_id: # Skip None values
430
+ errors.append(
431
+ f" document.xml: marker id=\"{comment_id}\" references non-existent comment"
432
+ )
433
+
434
+ except (lxml.etree.XMLSyntaxError, Exception) as e:
435
+ errors.append(f" Error parsing XML: {e}")
436
+
437
+ if errors:
438
+ print(f"FAILED - {len(errors)} comment marker violations:")
439
+ for error in errors:
440
+ print(error)
441
+ return False
442
+ else:
443
+ if self.verbose:
444
+ print("PASSED - All comment markers properly paired")
445
+ return True
446
+
447
+ def repair(self) -> int:
448
+ """Run DOCX-specific auto-repairs."""
449
+ repairs = super().repair()
450
+ repairs += self.repair_durableId()
451
+ return repairs
452
+
453
+ def repair_durableId(self) -> int:
454
+ """Fix invalid durableId values.
455
+
456
+ Repairs:
457
+ - durableId >= 0x7FFFFFFF (value out of range)
458
+ - durableId with hex letters in numbering.xml (wrong format)
459
+
460
+ Note: paraId is not auto-repaired because it may be referenced by
461
+ commentsExtended.xml, commentsIds.xml, and comment threading (paraIdParent).
462
+ Changing paraId without updating all references would break comment associations.
463
+ """
464
+ repairs = 0
465
+
466
+ for xml_file in self.xml_files:
467
+ try:
468
+ content = xml_file.read_text(encoding="utf-8")
469
+ dom = defusedxml.minidom.parseString(content)
470
+ modified = False
471
+
472
+ for elem in dom.getElementsByTagName("*"):
473
+ if not elem.hasAttribute("w16cid:durableId"):
474
+ continue
475
+
476
+ durable_id = elem.getAttribute("w16cid:durableId")
477
+ needs_repair = False
478
+
479
+ # Check if durableId needs repair based on file type
480
+ if xml_file.name == "numbering.xml":
481
+ # numbering.xml requires decimal format
482
+ try:
483
+ needs_repair = self._parse_id_value(durable_id, base=10) >= 0x7FFFFFFF
484
+ except ValueError:
485
+ # Contains non-decimal characters (e.g., hex letters A-F)
486
+ needs_repair = True
487
+ else:
488
+ # Other files (e.g. commentsIds.xml) use hex format
489
+ try:
490
+ needs_repair = self._parse_id_value(durable_id, base=16) >= 0x7FFFFFFF
491
+ except ValueError:
492
+ needs_repair = True
493
+
494
+ if needs_repair:
495
+ # Generate new ID in the correct format for this file type
496
+ value = random.randint(1, 0x7FFFFFFE)
497
+ if xml_file.name == "numbering.xml":
498
+ new_id = str(value) # decimal for numbering.xml
499
+ else:
500
+ new_id = f"{value:08X}" # hex for other files
501
+
502
+ elem.setAttribute("w16cid:durableId", new_id)
503
+ print(
504
+ f" Repaired: {xml_file.name}: durableId {durable_id} → {new_id}"
505
+ )
506
+ repairs += 1
507
+ modified = True
508
+
509
+ if modified:
510
+ xml_file.write_bytes(dom.toxml(encoding="UTF-8"))
511
+
512
+ except Exception:
513
+ pass
514
+
515
+ return repairs
516
+
517
+
518
+ if __name__ == "__main__":
519
+ raise RuntimeError("This module should not be run directly.")