@kortix/sandbox 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (246) hide show
  1. package/config/customize.sh +143 -0
  2. package/config/kortix-env-setup.sh +25 -0
  3. package/kortix-master/package.json +22 -0
  4. package/kortix-master/src/config.ts +22 -0
  5. package/kortix-master/src/index.ts +44 -0
  6. package/kortix-master/src/routes/env.ts +65 -0
  7. package/kortix-master/src/routes/proxy.ts +108 -0
  8. package/kortix-master/src/routes/update.ts +185 -0
  9. package/kortix-master/src/services/proxy.ts +43 -0
  10. package/kortix-master/src/services/secret-store.ts +156 -0
  11. package/kortix-master/tsconfig.json +14 -0
  12. package/opencode/agents/kortix-browser.md +142 -0
  13. package/opencode/agents/kortix-build.md +62 -0
  14. package/opencode/agents/kortix-explore.md +66 -0
  15. package/opencode/agents/kortix-image-gen.md +33 -0
  16. package/opencode/agents/kortix-main.md +450 -0
  17. package/opencode/agents/kortix-plan.md +100 -0
  18. package/opencode/agents/kortix-research.md +84 -0
  19. package/opencode/agents/kortix-sheets.md +61 -0
  20. package/opencode/agents/kortix-slides.md +64 -0
  21. package/opencode/agents/kortix-web-dev.md +572 -0
  22. package/opencode/commands/email.md +36 -0
  23. package/opencode/commands/init.md +43 -0
  24. package/opencode/commands/journal.md +44 -0
  25. package/opencode/commands/memory-init.md +81 -0
  26. package/opencode/commands/memory-search.md +50 -0
  27. package/opencode/commands/memory-status.md +56 -0
  28. package/opencode/commands/research.md +36 -0
  29. package/opencode/commands/search.md +38 -0
  30. package/opencode/commands/slides.md +32 -0
  31. package/opencode/commands/spreadsheet.md +30 -0
  32. package/opencode/memory.json +37 -0
  33. package/opencode/ocx.jsonc +10 -0
  34. package/opencode/opencode.jsonc +103 -0
  35. package/opencode/package.json +25 -0
  36. package/opencode/patches/apply.sh +19 -0
  37. package/opencode/patches/opencode-pty-spawn.txt +49 -0
  38. package/opencode/plugin/background-agents.ts.disabled +483 -0
  39. package/opencode/plugin/kdco-primitives/get-project-id.ts +172 -0
  40. package/opencode/plugin/kdco-primitives/index.ts +26 -0
  41. package/opencode/plugin/kdco-primitives/log-warn.ts +51 -0
  42. package/opencode/plugin/kdco-primitives/mutex.ts +122 -0
  43. package/opencode/plugin/kdco-primitives/shell.ts +138 -0
  44. package/opencode/plugin/kdco-primitives/temp.ts +36 -0
  45. package/opencode/plugin/kdco-primitives/terminal-detect.ts +34 -0
  46. package/opencode/plugin/kdco-primitives/types.ts +13 -0
  47. package/opencode/plugin/kdco-primitives/with-timeout.ts +84 -0
  48. package/opencode/plugin/memory.ts +306 -0
  49. package/opencode/plugin/worktree/state.ts +412 -0
  50. package/opencode/plugin/worktree/terminal.ts +1002 -0
  51. package/opencode/plugin/worktree.ts +861 -0
  52. package/opencode/skills/KORTIX-browser/SKILL.md +478 -0
  53. package/opencode/skills/KORTIX-cron-triggers/SKILL.md +173 -0
  54. package/opencode/skills/KORTIX-deep-research/SKILL.md +278 -0
  55. package/opencode/skills/KORTIX-docx/SKILL.md +398 -0
  56. package/opencode/skills/KORTIX-docx/scripts/__init__.py +1 -0
  57. package/opencode/skills/KORTIX-docx/scripts/accept_changes.py +104 -0
  58. package/opencode/skills/KORTIX-docx/scripts/comment.py +244 -0
  59. package/opencode/skills/KORTIX-docx/scripts/office/helpers/__init__.py +0 -0
  60. package/opencode/skills/KORTIX-docx/scripts/office/helpers/merge_runs.py +199 -0
  61. package/opencode/skills/KORTIX-docx/scripts/office/helpers/simplify_redlines.py +197 -0
  62. package/opencode/skills/KORTIX-docx/scripts/office/pack.py +159 -0
  63. package/opencode/skills/KORTIX-docx/scripts/office/soffice.py +183 -0
  64. package/opencode/skills/KORTIX-docx/scripts/office/unpack.py +132 -0
  65. package/opencode/skills/KORTIX-docx/scripts/office/validate.py +111 -0
  66. package/opencode/skills/KORTIX-docx/scripts/office/validators/__init__.py +15 -0
  67. package/opencode/skills/KORTIX-docx/scripts/office/validators/base.py +847 -0
  68. package/opencode/skills/KORTIX-docx/scripts/office/validators/docx.py +446 -0
  69. package/opencode/skills/KORTIX-docx/scripts/office/validators/pptx.py +275 -0
  70. package/opencode/skills/KORTIX-docx/scripts/office/validators/redlining.py +247 -0
  71. package/opencode/skills/KORTIX-docx/scripts/render_docx.py +179 -0
  72. package/opencode/skills/KORTIX-docx/scripts/templates/comments.xml +3 -0
  73. package/opencode/skills/KORTIX-docx/scripts/templates/commentsExtended.xml +3 -0
  74. package/opencode/skills/KORTIX-docx/scripts/templates/commentsExtensible.xml +3 -0
  75. package/opencode/skills/KORTIX-docx/scripts/templates/commentsIds.xml +3 -0
  76. package/opencode/skills/KORTIX-docx/scripts/templates/people.xml +3 -0
  77. package/opencode/skills/KORTIX-domain-research/SKILL.md +96 -0
  78. package/opencode/skills/KORTIX-domain-research/scripts/domain-lookup.py +810 -0
  79. package/opencode/skills/KORTIX-elevenlabs/SKILL.md +230 -0
  80. package/opencode/skills/KORTIX-elevenlabs/scripts/tts.py +389 -0
  81. package/opencode/skills/KORTIX-email/SKILL.md +145 -0
  82. package/opencode/skills/KORTIX-legal-writer/SKILL.md +409 -0
  83. package/opencode/skills/KORTIX-legal-writer/references/bluebook.md +152 -0
  84. package/opencode/skills/KORTIX-legal-writer/references/document-types.md +416 -0
  85. package/opencode/skills/KORTIX-legal-writer/scripts/courtlistener.py +291 -0
  86. package/opencode/skills/KORTIX-legal-writer/scripts/ecfr_lookup.py +299 -0
  87. package/opencode/skills/KORTIX-legal-writer/scripts/verify-legal.py +507 -0
  88. package/opencode/skills/KORTIX-logo-creator/SKILL.md +293 -0
  89. package/opencode/skills/KORTIX-logo-creator/references/prompt-patterns.md +134 -0
  90. package/opencode/skills/KORTIX-logo-creator/scripts/compose_logo.py +406 -0
  91. package/opencode/skills/KORTIX-logo-creator/scripts/create_logo_sheet.py +258 -0
  92. package/opencode/skills/KORTIX-logo-creator/scripts/remove_bg.py +96 -0
  93. package/opencode/skills/KORTIX-memory/SKILL.md +261 -0
  94. package/opencode/skills/KORTIX-memory/scripts/export-sessions.py +409 -0
  95. package/opencode/skills/KORTIX-paper-creator/SKILL.md +549 -0
  96. package/opencode/skills/KORTIX-paper-creator/assets/template.tex +101 -0
  97. package/opencode/skills/KORTIX-paper-creator/scripts/compile.sh +177 -0
  98. package/opencode/skills/KORTIX-paper-creator/scripts/openalex_to_bibtex.py +220 -0
  99. package/opencode/skills/KORTIX-paper-creator/scripts/verify.sh +354 -0
  100. package/opencode/skills/KORTIX-paper-search/SKILL.md +418 -0
  101. package/opencode/skills/KORTIX-pdf/SKILL.md +232 -0
  102. package/opencode/skills/KORTIX-pdf/forms.md +36 -0
  103. package/opencode/skills/KORTIX-pdf/reference.md +105 -0
  104. package/opencode/skills/KORTIX-pdf/scripts/check_bounding_boxes.py +65 -0
  105. package/opencode/skills/KORTIX-pdf/scripts/check_fillable_fields.py +11 -0
  106. package/opencode/skills/KORTIX-pdf/scripts/convert_pdf_to_images.py +33 -0
  107. package/opencode/skills/KORTIX-pdf/scripts/create_validation_image.py +37 -0
  108. package/opencode/skills/KORTIX-pdf/scripts/extract_form_field_info.py +122 -0
  109. package/opencode/skills/KORTIX-pdf/scripts/extract_form_structure.py +115 -0
  110. package/opencode/skills/KORTIX-pdf/scripts/fill_fillable_fields.py +98 -0
  111. package/opencode/skills/KORTIX-pdf/scripts/fill_pdf_form_with_annotations.py +107 -0
  112. package/opencode/skills/KORTIX-plan/SKILL.md +228 -0
  113. package/opencode/skills/KORTIX-presentation-viewer/SKILL.md +87 -0
  114. package/opencode/skills/KORTIX-presentation-viewer/serve.ts +136 -0
  115. package/opencode/skills/KORTIX-presentation-viewer/viewer.html +559 -0
  116. package/opencode/skills/KORTIX-presentations/SKILL.md +344 -0
  117. package/opencode/skills/KORTIX-remotion/SKILL.md +56 -0
  118. package/opencode/skills/KORTIX-remotion/rules/3d.md +86 -0
  119. package/opencode/skills/KORTIX-remotion/rules/animations.md +29 -0
  120. package/opencode/skills/KORTIX-remotion/rules/assets.md +78 -0
  121. package/opencode/skills/KORTIX-remotion/rules/audio-visualization.md +198 -0
  122. package/opencode/skills/KORTIX-remotion/rules/audio.md +169 -0
  123. package/opencode/skills/KORTIX-remotion/rules/calculate-metadata.md +104 -0
  124. package/opencode/skills/KORTIX-remotion/rules/can-decode.md +75 -0
  125. package/opencode/skills/KORTIX-remotion/rules/charts.md +120 -0
  126. package/opencode/skills/KORTIX-remotion/rules/compositions.md +141 -0
  127. package/opencode/skills/KORTIX-remotion/rules/display-captions.md +184 -0
  128. package/opencode/skills/KORTIX-remotion/rules/extract-frames.md +229 -0
  129. package/opencode/skills/KORTIX-remotion/rules/ffmpeg.md +38 -0
  130. package/opencode/skills/KORTIX-remotion/rules/fonts.md +152 -0
  131. package/opencode/skills/KORTIX-remotion/rules/get-audio-duration.md +58 -0
  132. package/opencode/skills/KORTIX-remotion/rules/get-video-dimensions.md +68 -0
  133. package/opencode/skills/KORTIX-remotion/rules/get-video-duration.md +58 -0
  134. package/opencode/skills/KORTIX-remotion/rules/gifs.md +141 -0
  135. package/opencode/skills/KORTIX-remotion/rules/images.md +130 -0
  136. package/opencode/skills/KORTIX-remotion/rules/import-srt-captions.md +69 -0
  137. package/opencode/skills/KORTIX-remotion/rules/light-leaks.md +73 -0
  138. package/opencode/skills/KORTIX-remotion/rules/lottie.md +68 -0
  139. package/opencode/skills/KORTIX-remotion/rules/maps.md +401 -0
  140. package/opencode/skills/KORTIX-remotion/rules/measuring-dom-nodes.md +35 -0
  141. package/opencode/skills/KORTIX-remotion/rules/measuring-text.md +143 -0
  142. package/opencode/skills/KORTIX-remotion/rules/parameters.md +98 -0
  143. package/opencode/skills/KORTIX-remotion/rules/sequencing.md +118 -0
  144. package/opencode/skills/KORTIX-remotion/rules/subtitles.md +36 -0
  145. package/opencode/skills/KORTIX-remotion/rules/tailwind.md +11 -0
  146. package/opencode/skills/KORTIX-remotion/rules/text-animations.md +20 -0
  147. package/opencode/skills/KORTIX-remotion/rules/timing.md +179 -0
  148. package/opencode/skills/KORTIX-remotion/rules/transcribe-captions.md +70 -0
  149. package/opencode/skills/KORTIX-remotion/rules/transitions.md +197 -0
  150. package/opencode/skills/KORTIX-remotion/rules/transparent-videos.md +106 -0
  151. package/opencode/skills/KORTIX-remotion/rules/trimming.md +53 -0
  152. package/opencode/skills/KORTIX-remotion/rules/videos.md +171 -0
  153. package/opencode/skills/KORTIX-secrets/SKILL.md +280 -0
  154. package/opencode/skills/KORTIX-semantic-search/SKILL.md +213 -0
  155. package/opencode/skills/KORTIX-session-search/SKILL.md +807 -0
  156. package/opencode/skills/KORTIX-session-search/Untitled +1 -0
  157. package/opencode/skills/KORTIX-skill-creator/SKILL.md +163 -0
  158. package/opencode/skills/KORTIX-web-research/SKILL.md +69 -0
  159. package/opencode/skills/KORTIX-xlsx/LICENSE.txt +30 -0
  160. package/opencode/skills/KORTIX-xlsx/SKILL.md +549 -0
  161. package/opencode/skills/KORTIX-xlsx/scripts/office/helpers/__init__.py +0 -0
  162. package/opencode/skills/KORTIX-xlsx/scripts/office/helpers/merge_runs.py +199 -0
  163. package/opencode/skills/KORTIX-xlsx/scripts/office/helpers/simplify_redlines.py +197 -0
  164. package/opencode/skills/KORTIX-xlsx/scripts/office/pack.py +159 -0
  165. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
  166. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
  167. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
  168. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
  169. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
  170. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
  171. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
  172. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
  173. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
  174. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
  175. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
  176. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
  177. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
  178. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
  179. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
  180. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
  181. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
  182. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
  183. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
  184. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
  185. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
  186. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
  187. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
  188. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
  189. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
  190. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
  191. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
  192. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
  193. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
  194. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
  195. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
  196. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/mce/mc.xsd +75 -0
  197. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
  198. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
  199. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
  200. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
  201. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
  202. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
  203. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
  204. package/opencode/skills/KORTIX-xlsx/scripts/office/soffice.py +183 -0
  205. package/opencode/skills/KORTIX-xlsx/scripts/office/unpack.py +132 -0
  206. package/opencode/skills/KORTIX-xlsx/scripts/office/validate.py +111 -0
  207. package/opencode/skills/KORTIX-xlsx/scripts/office/validators/__init__.py +15 -0
  208. package/opencode/skills/KORTIX-xlsx/scripts/office/validators/base.py +847 -0
  209. package/opencode/skills/KORTIX-xlsx/scripts/office/validators/docx.py +446 -0
  210. package/opencode/skills/KORTIX-xlsx/scripts/office/validators/pptx.py +275 -0
  211. package/opencode/skills/KORTIX-xlsx/scripts/office/validators/redlining.py +247 -0
  212. package/opencode/skills/KORTIX-xlsx/scripts/recalc.py +184 -0
  213. package/opencode/tools/image-gen.ts +342 -0
  214. package/opencode/tools/image-search.ts +190 -0
  215. package/opencode/tools/memory-get.ts +168 -0
  216. package/opencode/tools/memory-search.ts +247 -0
  217. package/opencode/tools/presentation-gen.ts +723 -0
  218. package/opencode/tools/scrape-webpage.ts +115 -0
  219. package/opencode/tools/scripts/.python-version +1 -0
  220. package/opencode/tools/scripts/convert_pdf.py +184 -0
  221. package/opencode/tools/scripts/convert_pptx.py +562 -0
  222. package/opencode/tools/scripts/pyproject.toml +11 -0
  223. package/opencode/tools/scripts/uv.lock +287 -0
  224. package/opencode/tools/scripts/validate_slide.py +74 -0
  225. package/opencode/tools/show-user.ts +217 -0
  226. package/opencode/tools/tests/e2e-presentation-fix.ts +277 -0
  227. package/opencode/tools/tests/image-gen.test.ts +215 -0
  228. package/opencode/tools/tests/image-search.test.ts +125 -0
  229. package/opencode/tools/tests/memory-system-benchmark.ts +1076 -0
  230. package/opencode/tools/tests/presentation-gen.test.ts +389 -0
  231. package/opencode/tools/tests/scrape-webpage.test.ts +74 -0
  232. package/opencode/tools/tests/show-user.test.ts +241 -0
  233. package/opencode/tools/tests/video-gen.test.ts +110 -0
  234. package/opencode/tools/tests/web-search.test.ts +106 -0
  235. package/opencode/tools/video-gen.ts +200 -0
  236. package/opencode/tools/web-search.ts +153 -0
  237. package/opencode/tsconfig.json +29 -0
  238. package/package.json +36 -0
  239. package/patch-agent-browser.js +100 -0
  240. package/postinstall.sh +88 -0
  241. package/services/KORTIX-presentation-viewer/run +37 -0
  242. package/services/agent-browser-viewer/run +48 -0
  243. package/services/kortix-master/run +16 -0
  244. package/services/lss-sync/run +22 -0
  245. package/services/opencode-serve/run +25 -0
  246. package/services/opencode-web/run +21 -0
@@ -0,0 +1,446 @@
1
+ """
2
+ Validator for Word document XML files against XSD schemas.
3
+ """
4
+
5
+ import random
6
+ import re
7
+ import tempfile
8
+ import zipfile
9
+
10
+ import defusedxml.minidom
11
+ import lxml.etree
12
+
13
+ from .base import BaseSchemaValidator
14
+
15
+
16
+ class DOCXSchemaValidator(BaseSchemaValidator):
17
+
18
+ WORD_2006_NAMESPACE = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
19
+ W14_NAMESPACE = "http://schemas.microsoft.com/office/word/2010/wordml"
20
+ W16CID_NAMESPACE = "http://schemas.microsoft.com/office/word/2016/wordml/cid"
21
+
22
+ ELEMENT_RELATIONSHIP_TYPES = {}
23
+
24
+ def validate(self):
25
+ if not self.validate_xml():
26
+ return False
27
+
28
+ all_valid = True
29
+ if not self.validate_namespaces():
30
+ all_valid = False
31
+
32
+ if not self.validate_unique_ids():
33
+ all_valid = False
34
+
35
+ if not self.validate_file_references():
36
+ all_valid = False
37
+
38
+ if not self.validate_content_types():
39
+ all_valid = False
40
+
41
+ if not self.validate_against_xsd():
42
+ all_valid = False
43
+
44
+ if not self.validate_whitespace_preservation():
45
+ all_valid = False
46
+
47
+ if not self.validate_deletions():
48
+ all_valid = False
49
+
50
+ if not self.validate_insertions():
51
+ all_valid = False
52
+
53
+ if not self.validate_all_relationship_ids():
54
+ all_valid = False
55
+
56
+ if not self.validate_id_constraints():
57
+ all_valid = False
58
+
59
+ if not self.validate_comment_markers():
60
+ all_valid = False
61
+
62
+ self.compare_paragraph_counts()
63
+
64
+ return all_valid
65
+
66
+ def validate_whitespace_preservation(self):
67
+ errors = []
68
+
69
+ for xml_file in self.xml_files:
70
+ if xml_file.name != "document.xml":
71
+ continue
72
+
73
+ try:
74
+ root = lxml.etree.parse(str(xml_file)).getroot()
75
+
76
+ for elem in root.iter(f"{{{self.WORD_2006_NAMESPACE}}}t"):
77
+ if elem.text:
78
+ text = elem.text
79
+ if re.search(r"^[ \t\n\r]", text) or re.search(
80
+ r"[ \t\n\r]$", text
81
+ ):
82
+ xml_space_attr = f"{{{self.XML_NAMESPACE}}}space"
83
+ if (
84
+ xml_space_attr not in elem.attrib
85
+ or elem.attrib[xml_space_attr] != "preserve"
86
+ ):
87
+ text_preview = (
88
+ repr(text)[:50] + "..."
89
+ if len(repr(text)) > 50
90
+ else repr(text)
91
+ )
92
+ errors.append(
93
+ f" {xml_file.relative_to(self.unpacked_dir)}: "
94
+ f"Line {elem.sourceline}: w:t element with whitespace missing xml:space='preserve': {text_preview}"
95
+ )
96
+
97
+ except (lxml.etree.XMLSyntaxError, Exception) as e:
98
+ errors.append(
99
+ f" {xml_file.relative_to(self.unpacked_dir)}: Error: {e}"
100
+ )
101
+
102
+ if errors:
103
+ print(f"FAILED - Found {len(errors)} whitespace preservation violations:")
104
+ for error in errors:
105
+ print(error)
106
+ return False
107
+ else:
108
+ if self.verbose:
109
+ print("PASSED - All whitespace is properly preserved")
110
+ return True
111
+
112
+ def validate_deletions(self):
113
+ errors = []
114
+
115
+ for xml_file in self.xml_files:
116
+ if xml_file.name != "document.xml":
117
+ continue
118
+
119
+ try:
120
+ root = lxml.etree.parse(str(xml_file)).getroot()
121
+ namespaces = {"w": self.WORD_2006_NAMESPACE}
122
+
123
+ for t_elem in root.xpath(".//w:del//w:t", namespaces=namespaces):
124
+ if t_elem.text:
125
+ text_preview = (
126
+ repr(t_elem.text)[:50] + "..."
127
+ if len(repr(t_elem.text)) > 50
128
+ else repr(t_elem.text)
129
+ )
130
+ errors.append(
131
+ f" {xml_file.relative_to(self.unpacked_dir)}: "
132
+ f"Line {t_elem.sourceline}: <w:t> found within <w:del>: {text_preview}"
133
+ )
134
+
135
+ for instr_elem in root.xpath(
136
+ ".//w:del//w:instrText", namespaces=namespaces
137
+ ):
138
+ text_preview = (
139
+ repr(instr_elem.text or "")[:50] + "..."
140
+ if len(repr(instr_elem.text or "")) > 50
141
+ else repr(instr_elem.text or "")
142
+ )
143
+ errors.append(
144
+ f" {xml_file.relative_to(self.unpacked_dir)}: "
145
+ f"Line {instr_elem.sourceline}: <w:instrText> found within <w:del> (use <w:delInstrText>): {text_preview}"
146
+ )
147
+
148
+ except (lxml.etree.XMLSyntaxError, Exception) as e:
149
+ errors.append(
150
+ f" {xml_file.relative_to(self.unpacked_dir)}: Error: {e}"
151
+ )
152
+
153
+ if errors:
154
+ print(f"FAILED - Found {len(errors)} deletion validation violations:")
155
+ for error in errors:
156
+ print(error)
157
+ return False
158
+ else:
159
+ if self.verbose:
160
+ print("PASSED - No w:t elements found within w:del elements")
161
+ return True
162
+
163
+ def count_paragraphs_in_unpacked(self):
164
+ count = 0
165
+
166
+ for xml_file in self.xml_files:
167
+ if xml_file.name != "document.xml":
168
+ continue
169
+
170
+ try:
171
+ root = lxml.etree.parse(str(xml_file)).getroot()
172
+ paragraphs = root.findall(f".//{{{self.WORD_2006_NAMESPACE}}}p")
173
+ count = len(paragraphs)
174
+ except Exception as e:
175
+ print(f"Error counting paragraphs in unpacked document: {e}")
176
+
177
+ return count
178
+
179
+ def count_paragraphs_in_original(self):
180
+ original = self.original_file
181
+ if original is None:
182
+ return 0
183
+
184
+ count = 0
185
+
186
+ try:
187
+ with tempfile.TemporaryDirectory() as temp_dir:
188
+ with zipfile.ZipFile(original, "r") as zip_ref:
189
+ zip_ref.extractall(temp_dir)
190
+
191
+ doc_xml_path = temp_dir + "/word/document.xml"
192
+ root = lxml.etree.parse(doc_xml_path).getroot()
193
+
194
+ paragraphs = root.findall(f".//{{{self.WORD_2006_NAMESPACE}}}p")
195
+ count = len(paragraphs)
196
+
197
+ except Exception as e:
198
+ print(f"Error counting paragraphs in original document: {e}")
199
+
200
+ return count
201
+
202
+ def validate_insertions(self):
203
+ errors = []
204
+
205
+ for xml_file in self.xml_files:
206
+ if xml_file.name != "document.xml":
207
+ continue
208
+
209
+ try:
210
+ root = lxml.etree.parse(str(xml_file)).getroot()
211
+ namespaces = {"w": self.WORD_2006_NAMESPACE}
212
+
213
+ invalid_elements = root.xpath(
214
+ ".//w:ins//w:delText[not(ancestor::w:del)]", namespaces=namespaces
215
+ )
216
+
217
+ for elem in invalid_elements:
218
+ text_preview = (
219
+ repr(elem.text or "")[:50] + "..."
220
+ if len(repr(elem.text or "")) > 50
221
+ else repr(elem.text or "")
222
+ )
223
+ errors.append(
224
+ f" {xml_file.relative_to(self.unpacked_dir)}: "
225
+ f"Line {elem.sourceline}: <w:delText> within <w:ins>: {text_preview}"
226
+ )
227
+
228
+ except (lxml.etree.XMLSyntaxError, Exception) as e:
229
+ errors.append(
230
+ f" {xml_file.relative_to(self.unpacked_dir)}: Error: {e}"
231
+ )
232
+
233
+ if errors:
234
+ print(f"FAILED - Found {len(errors)} insertion validation violations:")
235
+ for error in errors:
236
+ print(error)
237
+ return False
238
+ else:
239
+ if self.verbose:
240
+ print("PASSED - No w:delText elements within w:ins elements")
241
+ return True
242
+
243
+ def compare_paragraph_counts(self):
244
+ original_count = self.count_paragraphs_in_original()
245
+ new_count = self.count_paragraphs_in_unpacked()
246
+
247
+ diff = new_count - original_count
248
+ diff_str = f"+{diff}" if diff > 0 else str(diff)
249
+ print(f"\nParagraphs: {original_count} → {new_count} ({diff_str})")
250
+
251
+ def _parse_id_value(self, val: str, base: int = 16) -> int:
252
+ return int(val, base)
253
+
254
+ def validate_id_constraints(self):
255
+ errors = []
256
+ para_id_attr = f"{{{self.W14_NAMESPACE}}}paraId"
257
+ durable_id_attr = f"{{{self.W16CID_NAMESPACE}}}durableId"
258
+
259
+ for xml_file in self.xml_files:
260
+ try:
261
+ for elem in lxml.etree.parse(str(xml_file)).iter():
262
+ if val := elem.get(para_id_attr):
263
+ if self._parse_id_value(val, base=16) >= 0x80000000:
264
+ errors.append(
265
+ f" {xml_file.name}:{elem.sourceline}: paraId={val} >= 0x80000000"
266
+ )
267
+
268
+ if val := elem.get(durable_id_attr):
269
+ if xml_file.name == "numbering.xml":
270
+ try:
271
+ if self._parse_id_value(val, base=10) >= 0x7FFFFFFF:
272
+ errors.append(
273
+ f" {xml_file.name}:{elem.sourceline}: "
274
+ f"durableId={val} >= 0x7FFFFFFF"
275
+ )
276
+ except ValueError:
277
+ errors.append(
278
+ f" {xml_file.name}:{elem.sourceline}: "
279
+ f"durableId={val} must be decimal in numbering.xml"
280
+ )
281
+ else:
282
+ if self._parse_id_value(val, base=16) >= 0x7FFFFFFF:
283
+ errors.append(
284
+ f" {xml_file.name}:{elem.sourceline}: "
285
+ f"durableId={val} >= 0x7FFFFFFF"
286
+ )
287
+ except Exception:
288
+ pass
289
+
290
+ if errors:
291
+ print(f"FAILED - {len(errors)} ID constraint violations:")
292
+ for e in errors:
293
+ print(e)
294
+ elif self.verbose:
295
+ print("PASSED - All paraId/durableId values within constraints")
296
+ return not errors
297
+
298
+ def validate_comment_markers(self):
299
+ errors = []
300
+
301
+ document_xml = None
302
+ comments_xml = None
303
+ for xml_file in self.xml_files:
304
+ if xml_file.name == "document.xml" and "word" in str(xml_file):
305
+ document_xml = xml_file
306
+ elif xml_file.name == "comments.xml":
307
+ comments_xml = xml_file
308
+
309
+ if not document_xml:
310
+ if self.verbose:
311
+ print("PASSED - No document.xml found (skipping comment validation)")
312
+ return True
313
+
314
+ try:
315
+ doc_root = lxml.etree.parse(str(document_xml)).getroot()
316
+ namespaces = {"w": self.WORD_2006_NAMESPACE}
317
+
318
+ range_starts = {
319
+ elem.get(f"{{{self.WORD_2006_NAMESPACE}}}id")
320
+ for elem in doc_root.xpath(
321
+ ".//w:commentRangeStart", namespaces=namespaces
322
+ )
323
+ }
324
+ range_ends = {
325
+ elem.get(f"{{{self.WORD_2006_NAMESPACE}}}id")
326
+ for elem in doc_root.xpath(
327
+ ".//w:commentRangeEnd", namespaces=namespaces
328
+ )
329
+ }
330
+ references = {
331
+ elem.get(f"{{{self.WORD_2006_NAMESPACE}}}id")
332
+ for elem in doc_root.xpath(
333
+ ".//w:commentReference", namespaces=namespaces
334
+ )
335
+ }
336
+
337
+ orphaned_ends = range_ends - range_starts
338
+ for comment_id in sorted(
339
+ orphaned_ends, key=lambda x: int(x) if x and x.isdigit() else 0
340
+ ):
341
+ errors.append(
342
+ f' document.xml: commentRangeEnd id="{comment_id}" has no matching commentRangeStart'
343
+ )
344
+
345
+ orphaned_starts = range_starts - range_ends
346
+ for comment_id in sorted(
347
+ orphaned_starts, key=lambda x: int(x) if x and x.isdigit() else 0
348
+ ):
349
+ errors.append(
350
+ f' document.xml: commentRangeStart id="{comment_id}" has no matching commentRangeEnd'
351
+ )
352
+
353
+ comment_ids = set()
354
+ if comments_xml and comments_xml.exists():
355
+ comments_root = lxml.etree.parse(str(comments_xml)).getroot()
356
+ comment_ids = {
357
+ elem.get(f"{{{self.WORD_2006_NAMESPACE}}}id")
358
+ for elem in comments_root.xpath(
359
+ ".//w:comment", namespaces=namespaces
360
+ )
361
+ }
362
+
363
+ marker_ids = range_starts | range_ends | references
364
+ invalid_refs = marker_ids - comment_ids
365
+ for comment_id in sorted(
366
+ invalid_refs, key=lambda x: int(x) if x and x.isdigit() else 0
367
+ ):
368
+ if comment_id:
369
+ errors.append(
370
+ f' document.xml: marker id="{comment_id}" references non-existent comment'
371
+ )
372
+
373
+ except (lxml.etree.XMLSyntaxError, Exception) as e:
374
+ errors.append(f" Error parsing XML: {e}")
375
+
376
+ if errors:
377
+ print(f"FAILED - {len(errors)} comment marker violations:")
378
+ for error in errors:
379
+ print(error)
380
+ return False
381
+ else:
382
+ if self.verbose:
383
+ print("PASSED - All comment markers properly paired")
384
+ return True
385
+
386
+ def repair(self) -> int:
387
+ repairs = super().repair()
388
+ repairs += self.repair_durableId()
389
+ return repairs
390
+
391
+ def repair_durableId(self) -> int:
392
+ repairs = 0
393
+
394
+ for xml_file in self.xml_files:
395
+ try:
396
+ content = xml_file.read_text(encoding="utf-8")
397
+ dom = defusedxml.minidom.parseString(content)
398
+ modified = False
399
+
400
+ for elem in dom.getElementsByTagName("*"):
401
+ if not elem.hasAttribute("w16cid:durableId"):
402
+ continue
403
+
404
+ durable_id = elem.getAttribute("w16cid:durableId")
405
+ needs_repair = False
406
+
407
+ if xml_file.name == "numbering.xml":
408
+ try:
409
+ needs_repair = (
410
+ self._parse_id_value(durable_id, base=10) >= 0x7FFFFFFF
411
+ )
412
+ except ValueError:
413
+ needs_repair = True
414
+ else:
415
+ try:
416
+ needs_repair = (
417
+ self._parse_id_value(durable_id, base=16) >= 0x7FFFFFFF
418
+ )
419
+ except ValueError:
420
+ needs_repair = True
421
+
422
+ if needs_repair:
423
+ value = random.randint(1, 0x7FFFFFFE)
424
+ if xml_file.name == "numbering.xml":
425
+ new_id = str(value)
426
+ else:
427
+ new_id = f"{value:08X}"
428
+
429
+ elem.setAttribute("w16cid:durableId", new_id)
430
+ print(
431
+ f" Repaired: {xml_file.name}: durableId {durable_id} → {new_id}"
432
+ )
433
+ repairs += 1
434
+ modified = True
435
+
436
+ if modified:
437
+ xml_file.write_bytes(dom.toxml(encoding="UTF-8"))
438
+
439
+ except Exception:
440
+ pass
441
+
442
+ return repairs
443
+
444
+
445
+ if __name__ == "__main__":
446
+ raise RuntimeError("This module should not be run directly.")