@kortix/sandbox 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (246) hide show
  1. package/config/customize.sh +143 -0
  2. package/config/kortix-env-setup.sh +25 -0
  3. package/kortix-master/package.json +22 -0
  4. package/kortix-master/src/config.ts +22 -0
  5. package/kortix-master/src/index.ts +44 -0
  6. package/kortix-master/src/routes/env.ts +65 -0
  7. package/kortix-master/src/routes/proxy.ts +108 -0
  8. package/kortix-master/src/routes/update.ts +185 -0
  9. package/kortix-master/src/services/proxy.ts +43 -0
  10. package/kortix-master/src/services/secret-store.ts +156 -0
  11. package/kortix-master/tsconfig.json +14 -0
  12. package/opencode/agents/kortix-browser.md +142 -0
  13. package/opencode/agents/kortix-build.md +62 -0
  14. package/opencode/agents/kortix-explore.md +66 -0
  15. package/opencode/agents/kortix-image-gen.md +33 -0
  16. package/opencode/agents/kortix-main.md +450 -0
  17. package/opencode/agents/kortix-plan.md +100 -0
  18. package/opencode/agents/kortix-research.md +84 -0
  19. package/opencode/agents/kortix-sheets.md +61 -0
  20. package/opencode/agents/kortix-slides.md +64 -0
  21. package/opencode/agents/kortix-web-dev.md +572 -0
  22. package/opencode/commands/email.md +36 -0
  23. package/opencode/commands/init.md +43 -0
  24. package/opencode/commands/journal.md +44 -0
  25. package/opencode/commands/memory-init.md +81 -0
  26. package/opencode/commands/memory-search.md +50 -0
  27. package/opencode/commands/memory-status.md +56 -0
  28. package/opencode/commands/research.md +36 -0
  29. package/opencode/commands/search.md +38 -0
  30. package/opencode/commands/slides.md +32 -0
  31. package/opencode/commands/spreadsheet.md +30 -0
  32. package/opencode/memory.json +37 -0
  33. package/opencode/ocx.jsonc +10 -0
  34. package/opencode/opencode.jsonc +103 -0
  35. package/opencode/package.json +25 -0
  36. package/opencode/patches/apply.sh +19 -0
  37. package/opencode/patches/opencode-pty-spawn.txt +49 -0
  38. package/opencode/plugin/background-agents.ts.disabled +483 -0
  39. package/opencode/plugin/kdco-primitives/get-project-id.ts +172 -0
  40. package/opencode/plugin/kdco-primitives/index.ts +26 -0
  41. package/opencode/plugin/kdco-primitives/log-warn.ts +51 -0
  42. package/opencode/plugin/kdco-primitives/mutex.ts +122 -0
  43. package/opencode/plugin/kdco-primitives/shell.ts +138 -0
  44. package/opencode/plugin/kdco-primitives/temp.ts +36 -0
  45. package/opencode/plugin/kdco-primitives/terminal-detect.ts +34 -0
  46. package/opencode/plugin/kdco-primitives/types.ts +13 -0
  47. package/opencode/plugin/kdco-primitives/with-timeout.ts +84 -0
  48. package/opencode/plugin/memory.ts +306 -0
  49. package/opencode/plugin/worktree/state.ts +412 -0
  50. package/opencode/plugin/worktree/terminal.ts +1002 -0
  51. package/opencode/plugin/worktree.ts +861 -0
  52. package/opencode/skills/KORTIX-browser/SKILL.md +478 -0
  53. package/opencode/skills/KORTIX-cron-triggers/SKILL.md +173 -0
  54. package/opencode/skills/KORTIX-deep-research/SKILL.md +278 -0
  55. package/opencode/skills/KORTIX-docx/SKILL.md +398 -0
  56. package/opencode/skills/KORTIX-docx/scripts/__init__.py +1 -0
  57. package/opencode/skills/KORTIX-docx/scripts/accept_changes.py +104 -0
  58. package/opencode/skills/KORTIX-docx/scripts/comment.py +244 -0
  59. package/opencode/skills/KORTIX-docx/scripts/office/helpers/__init__.py +0 -0
  60. package/opencode/skills/KORTIX-docx/scripts/office/helpers/merge_runs.py +199 -0
  61. package/opencode/skills/KORTIX-docx/scripts/office/helpers/simplify_redlines.py +197 -0
  62. package/opencode/skills/KORTIX-docx/scripts/office/pack.py +159 -0
  63. package/opencode/skills/KORTIX-docx/scripts/office/soffice.py +183 -0
  64. package/opencode/skills/KORTIX-docx/scripts/office/unpack.py +132 -0
  65. package/opencode/skills/KORTIX-docx/scripts/office/validate.py +111 -0
  66. package/opencode/skills/KORTIX-docx/scripts/office/validators/__init__.py +15 -0
  67. package/opencode/skills/KORTIX-docx/scripts/office/validators/base.py +847 -0
  68. package/opencode/skills/KORTIX-docx/scripts/office/validators/docx.py +446 -0
  69. package/opencode/skills/KORTIX-docx/scripts/office/validators/pptx.py +275 -0
  70. package/opencode/skills/KORTIX-docx/scripts/office/validators/redlining.py +247 -0
  71. package/opencode/skills/KORTIX-docx/scripts/render_docx.py +179 -0
  72. package/opencode/skills/KORTIX-docx/scripts/templates/comments.xml +3 -0
  73. package/opencode/skills/KORTIX-docx/scripts/templates/commentsExtended.xml +3 -0
  74. package/opencode/skills/KORTIX-docx/scripts/templates/commentsExtensible.xml +3 -0
  75. package/opencode/skills/KORTIX-docx/scripts/templates/commentsIds.xml +3 -0
  76. package/opencode/skills/KORTIX-docx/scripts/templates/people.xml +3 -0
  77. package/opencode/skills/KORTIX-domain-research/SKILL.md +96 -0
  78. package/opencode/skills/KORTIX-domain-research/scripts/domain-lookup.py +810 -0
  79. package/opencode/skills/KORTIX-elevenlabs/SKILL.md +230 -0
  80. package/opencode/skills/KORTIX-elevenlabs/scripts/tts.py +389 -0
  81. package/opencode/skills/KORTIX-email/SKILL.md +145 -0
  82. package/opencode/skills/KORTIX-legal-writer/SKILL.md +409 -0
  83. package/opencode/skills/KORTIX-legal-writer/references/bluebook.md +152 -0
  84. package/opencode/skills/KORTIX-legal-writer/references/document-types.md +416 -0
  85. package/opencode/skills/KORTIX-legal-writer/scripts/courtlistener.py +291 -0
  86. package/opencode/skills/KORTIX-legal-writer/scripts/ecfr_lookup.py +299 -0
  87. package/opencode/skills/KORTIX-legal-writer/scripts/verify-legal.py +507 -0
  88. package/opencode/skills/KORTIX-logo-creator/SKILL.md +293 -0
  89. package/opencode/skills/KORTIX-logo-creator/references/prompt-patterns.md +134 -0
  90. package/opencode/skills/KORTIX-logo-creator/scripts/compose_logo.py +406 -0
  91. package/opencode/skills/KORTIX-logo-creator/scripts/create_logo_sheet.py +258 -0
  92. package/opencode/skills/KORTIX-logo-creator/scripts/remove_bg.py +96 -0
  93. package/opencode/skills/KORTIX-memory/SKILL.md +261 -0
  94. package/opencode/skills/KORTIX-memory/scripts/export-sessions.py +409 -0
  95. package/opencode/skills/KORTIX-paper-creator/SKILL.md +549 -0
  96. package/opencode/skills/KORTIX-paper-creator/assets/template.tex +101 -0
  97. package/opencode/skills/KORTIX-paper-creator/scripts/compile.sh +177 -0
  98. package/opencode/skills/KORTIX-paper-creator/scripts/openalex_to_bibtex.py +220 -0
  99. package/opencode/skills/KORTIX-paper-creator/scripts/verify.sh +354 -0
  100. package/opencode/skills/KORTIX-paper-search/SKILL.md +418 -0
  101. package/opencode/skills/KORTIX-pdf/SKILL.md +232 -0
  102. package/opencode/skills/KORTIX-pdf/forms.md +36 -0
  103. package/opencode/skills/KORTIX-pdf/reference.md +105 -0
  104. package/opencode/skills/KORTIX-pdf/scripts/check_bounding_boxes.py +65 -0
  105. package/opencode/skills/KORTIX-pdf/scripts/check_fillable_fields.py +11 -0
  106. package/opencode/skills/KORTIX-pdf/scripts/convert_pdf_to_images.py +33 -0
  107. package/opencode/skills/KORTIX-pdf/scripts/create_validation_image.py +37 -0
  108. package/opencode/skills/KORTIX-pdf/scripts/extract_form_field_info.py +122 -0
  109. package/opencode/skills/KORTIX-pdf/scripts/extract_form_structure.py +115 -0
  110. package/opencode/skills/KORTIX-pdf/scripts/fill_fillable_fields.py +98 -0
  111. package/opencode/skills/KORTIX-pdf/scripts/fill_pdf_form_with_annotations.py +107 -0
  112. package/opencode/skills/KORTIX-plan/SKILL.md +228 -0
  113. package/opencode/skills/KORTIX-presentation-viewer/SKILL.md +87 -0
  114. package/opencode/skills/KORTIX-presentation-viewer/serve.ts +136 -0
  115. package/opencode/skills/KORTIX-presentation-viewer/viewer.html +559 -0
  116. package/opencode/skills/KORTIX-presentations/SKILL.md +344 -0
  117. package/opencode/skills/KORTIX-remotion/SKILL.md +56 -0
  118. package/opencode/skills/KORTIX-remotion/rules/3d.md +86 -0
  119. package/opencode/skills/KORTIX-remotion/rules/animations.md +29 -0
  120. package/opencode/skills/KORTIX-remotion/rules/assets.md +78 -0
  121. package/opencode/skills/KORTIX-remotion/rules/audio-visualization.md +198 -0
  122. package/opencode/skills/KORTIX-remotion/rules/audio.md +169 -0
  123. package/opencode/skills/KORTIX-remotion/rules/calculate-metadata.md +104 -0
  124. package/opencode/skills/KORTIX-remotion/rules/can-decode.md +75 -0
  125. package/opencode/skills/KORTIX-remotion/rules/charts.md +120 -0
  126. package/opencode/skills/KORTIX-remotion/rules/compositions.md +141 -0
  127. package/opencode/skills/KORTIX-remotion/rules/display-captions.md +184 -0
  128. package/opencode/skills/KORTIX-remotion/rules/extract-frames.md +229 -0
  129. package/opencode/skills/KORTIX-remotion/rules/ffmpeg.md +38 -0
  130. package/opencode/skills/KORTIX-remotion/rules/fonts.md +152 -0
  131. package/opencode/skills/KORTIX-remotion/rules/get-audio-duration.md +58 -0
  132. package/opencode/skills/KORTIX-remotion/rules/get-video-dimensions.md +68 -0
  133. package/opencode/skills/KORTIX-remotion/rules/get-video-duration.md +58 -0
  134. package/opencode/skills/KORTIX-remotion/rules/gifs.md +141 -0
  135. package/opencode/skills/KORTIX-remotion/rules/images.md +130 -0
  136. package/opencode/skills/KORTIX-remotion/rules/import-srt-captions.md +69 -0
  137. package/opencode/skills/KORTIX-remotion/rules/light-leaks.md +73 -0
  138. package/opencode/skills/KORTIX-remotion/rules/lottie.md +68 -0
  139. package/opencode/skills/KORTIX-remotion/rules/maps.md +401 -0
  140. package/opencode/skills/KORTIX-remotion/rules/measuring-dom-nodes.md +35 -0
  141. package/opencode/skills/KORTIX-remotion/rules/measuring-text.md +143 -0
  142. package/opencode/skills/KORTIX-remotion/rules/parameters.md +98 -0
  143. package/opencode/skills/KORTIX-remotion/rules/sequencing.md +118 -0
  144. package/opencode/skills/KORTIX-remotion/rules/subtitles.md +36 -0
  145. package/opencode/skills/KORTIX-remotion/rules/tailwind.md +11 -0
  146. package/opencode/skills/KORTIX-remotion/rules/text-animations.md +20 -0
  147. package/opencode/skills/KORTIX-remotion/rules/timing.md +179 -0
  148. package/opencode/skills/KORTIX-remotion/rules/transcribe-captions.md +70 -0
  149. package/opencode/skills/KORTIX-remotion/rules/transitions.md +197 -0
  150. package/opencode/skills/KORTIX-remotion/rules/transparent-videos.md +106 -0
  151. package/opencode/skills/KORTIX-remotion/rules/trimming.md +53 -0
  152. package/opencode/skills/KORTIX-remotion/rules/videos.md +171 -0
  153. package/opencode/skills/KORTIX-secrets/SKILL.md +280 -0
  154. package/opencode/skills/KORTIX-semantic-search/SKILL.md +213 -0
  155. package/opencode/skills/KORTIX-session-search/SKILL.md +807 -0
  156. package/opencode/skills/KORTIX-session-search/Untitled +1 -0
  157. package/opencode/skills/KORTIX-skill-creator/SKILL.md +163 -0
  158. package/opencode/skills/KORTIX-web-research/SKILL.md +69 -0
  159. package/opencode/skills/KORTIX-xlsx/LICENSE.txt +30 -0
  160. package/opencode/skills/KORTIX-xlsx/SKILL.md +549 -0
  161. package/opencode/skills/KORTIX-xlsx/scripts/office/helpers/__init__.py +0 -0
  162. package/opencode/skills/KORTIX-xlsx/scripts/office/helpers/merge_runs.py +199 -0
  163. package/opencode/skills/KORTIX-xlsx/scripts/office/helpers/simplify_redlines.py +197 -0
  164. package/opencode/skills/KORTIX-xlsx/scripts/office/pack.py +159 -0
  165. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
  166. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
  167. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
  168. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
  169. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
  170. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
  171. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
  172. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
  173. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
  174. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
  175. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
  176. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
  177. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
  178. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
  179. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
  180. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
  181. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
  182. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
  183. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
  184. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
  185. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
  186. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
  187. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
  188. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
  189. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
  190. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
  191. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
  192. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
  193. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
  194. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
  195. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
  196. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/mce/mc.xsd +75 -0
  197. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
  198. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
  199. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
  200. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
  201. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
  202. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
  203. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
  204. package/opencode/skills/KORTIX-xlsx/scripts/office/soffice.py +183 -0
  205. package/opencode/skills/KORTIX-xlsx/scripts/office/unpack.py +132 -0
  206. package/opencode/skills/KORTIX-xlsx/scripts/office/validate.py +111 -0
  207. package/opencode/skills/KORTIX-xlsx/scripts/office/validators/__init__.py +15 -0
  208. package/opencode/skills/KORTIX-xlsx/scripts/office/validators/base.py +847 -0
  209. package/opencode/skills/KORTIX-xlsx/scripts/office/validators/docx.py +446 -0
  210. package/opencode/skills/KORTIX-xlsx/scripts/office/validators/pptx.py +275 -0
  211. package/opencode/skills/KORTIX-xlsx/scripts/office/validators/redlining.py +247 -0
  212. package/opencode/skills/KORTIX-xlsx/scripts/recalc.py +184 -0
  213. package/opencode/tools/image-gen.ts +342 -0
  214. package/opencode/tools/image-search.ts +190 -0
  215. package/opencode/tools/memory-get.ts +168 -0
  216. package/opencode/tools/memory-search.ts +247 -0
  217. package/opencode/tools/presentation-gen.ts +723 -0
  218. package/opencode/tools/scrape-webpage.ts +115 -0
  219. package/opencode/tools/scripts/.python-version +1 -0
  220. package/opencode/tools/scripts/convert_pdf.py +184 -0
  221. package/opencode/tools/scripts/convert_pptx.py +562 -0
  222. package/opencode/tools/scripts/pyproject.toml +11 -0
  223. package/opencode/tools/scripts/uv.lock +287 -0
  224. package/opencode/tools/scripts/validate_slide.py +74 -0
  225. package/opencode/tools/show-user.ts +217 -0
  226. package/opencode/tools/tests/e2e-presentation-fix.ts +277 -0
  227. package/opencode/tools/tests/image-gen.test.ts +215 -0
  228. package/opencode/tools/tests/image-search.test.ts +125 -0
  229. package/opencode/tools/tests/memory-system-benchmark.ts +1076 -0
  230. package/opencode/tools/tests/presentation-gen.test.ts +389 -0
  231. package/opencode/tools/tests/scrape-webpage.test.ts +74 -0
  232. package/opencode/tools/tests/show-user.test.ts +241 -0
  233. package/opencode/tools/tests/video-gen.test.ts +110 -0
  234. package/opencode/tools/tests/web-search.test.ts +106 -0
  235. package/opencode/tools/video-gen.ts +200 -0
  236. package/opencode/tools/web-search.ts +153 -0
  237. package/opencode/tsconfig.json +29 -0
  238. package/package.json +36 -0
  239. package/patch-agent-browser.js +100 -0
  240. package/postinstall.sh +88 -0
  241. package/services/KORTIX-presentation-viewer/run +37 -0
  242. package/services/agent-browser-viewer/run +48 -0
  243. package/services/kortix-master/run +16 -0
  244. package/services/lss-sync/run +22 -0
  245. package/services/opencode-serve/run +25 -0
  246. package/services/opencode-web/run +21 -0
@@ -0,0 +1,199 @@
1
+ """Merge adjacent runs with identical formatting in DOCX.
2
+
3
+ Merges adjacent <w:r> elements that have identical <w:rPr> properties.
4
+ Works on runs in paragraphs and inside tracked changes (<w:ins>, <w:del>).
5
+
6
+ Also:
7
+ - Removes rsid attributes from runs (revision metadata that doesn't affect rendering)
8
+ - Removes proofErr elements (spell/grammar markers that block merging)
9
+ """
10
+
11
+ from pathlib import Path
12
+
13
+ import defusedxml.minidom
14
+
15
+
16
+ def merge_runs(input_dir: str) -> tuple[int, str]:
17
+ doc_xml = Path(input_dir) / "word" / "document.xml"
18
+
19
+ if not doc_xml.exists():
20
+ return 0, f"Error: {doc_xml} not found"
21
+
22
+ try:
23
+ dom = defusedxml.minidom.parseString(doc_xml.read_text(encoding="utf-8"))
24
+ root = dom.documentElement
25
+
26
+ _remove_elements(root, "proofErr")
27
+ _strip_run_rsid_attrs(root)
28
+
29
+ containers = {run.parentNode for run in _find_elements(root, "r")}
30
+
31
+ merge_count = 0
32
+ for container in containers:
33
+ merge_count += _merge_runs_in(container)
34
+
35
+ doc_xml.write_bytes(dom.toxml(encoding="UTF-8"))
36
+ return merge_count, f"Merged {merge_count} runs"
37
+
38
+ except Exception as e:
39
+ return 0, f"Error: {e}"
40
+
41
+
42
+
43
+
44
+ def _find_elements(root, tag: str) -> list:
45
+ results = []
46
+
47
+ def traverse(node):
48
+ if node.nodeType == node.ELEMENT_NODE:
49
+ name = node.localName or node.tagName
50
+ if name == tag or name.endswith(f":{tag}"):
51
+ results.append(node)
52
+ for child in node.childNodes:
53
+ traverse(child)
54
+
55
+ traverse(root)
56
+ return results
57
+
58
+
59
+ def _get_child(parent, tag: str):
60
+ for child in parent.childNodes:
61
+ if child.nodeType == child.ELEMENT_NODE:
62
+ name = child.localName or child.tagName
63
+ if name == tag or name.endswith(f":{tag}"):
64
+ return child
65
+ return None
66
+
67
+
68
+ def _get_children(parent, tag: str) -> list:
69
+ results = []
70
+ for child in parent.childNodes:
71
+ if child.nodeType == child.ELEMENT_NODE:
72
+ name = child.localName or child.tagName
73
+ if name == tag or name.endswith(f":{tag}"):
74
+ results.append(child)
75
+ return results
76
+
77
+
78
+ def _is_adjacent(elem1, elem2) -> bool:
79
+ node = elem1.nextSibling
80
+ while node:
81
+ if node == elem2:
82
+ return True
83
+ if node.nodeType == node.ELEMENT_NODE:
84
+ return False
85
+ if node.nodeType == node.TEXT_NODE and node.data.strip():
86
+ return False
87
+ node = node.nextSibling
88
+ return False
89
+
90
+
91
+
92
+
93
+ def _remove_elements(root, tag: str):
94
+ for elem in _find_elements(root, tag):
95
+ if elem.parentNode:
96
+ elem.parentNode.removeChild(elem)
97
+
98
+
99
+ def _strip_run_rsid_attrs(root):
100
+ for run in _find_elements(root, "r"):
101
+ for attr in list(run.attributes.values()):
102
+ if "rsid" in attr.name.lower():
103
+ run.removeAttribute(attr.name)
104
+
105
+
106
+
107
+
108
+ def _merge_runs_in(container) -> int:
109
+ merge_count = 0
110
+ run = _first_child_run(container)
111
+
112
+ while run:
113
+ while True:
114
+ next_elem = _next_element_sibling(run)
115
+ if next_elem and _is_run(next_elem) and _can_merge(run, next_elem):
116
+ _merge_run_content(run, next_elem)
117
+ container.removeChild(next_elem)
118
+ merge_count += 1
119
+ else:
120
+ break
121
+
122
+ _consolidate_text(run)
123
+ run = _next_sibling_run(run)
124
+
125
+ return merge_count
126
+
127
+
128
+ def _first_child_run(container):
129
+ for child in container.childNodes:
130
+ if child.nodeType == child.ELEMENT_NODE and _is_run(child):
131
+ return child
132
+ return None
133
+
134
+
135
+ def _next_element_sibling(node):
136
+ sibling = node.nextSibling
137
+ while sibling:
138
+ if sibling.nodeType == sibling.ELEMENT_NODE:
139
+ return sibling
140
+ sibling = sibling.nextSibling
141
+ return None
142
+
143
+
144
+ def _next_sibling_run(node):
145
+ sibling = node.nextSibling
146
+ while sibling:
147
+ if sibling.nodeType == sibling.ELEMENT_NODE:
148
+ if _is_run(sibling):
149
+ return sibling
150
+ sibling = sibling.nextSibling
151
+ return None
152
+
153
+
154
+ def _is_run(node) -> bool:
155
+ name = node.localName or node.tagName
156
+ return name == "r" or name.endswith(":r")
157
+
158
+
159
+ def _can_merge(run1, run2) -> bool:
160
+ rpr1 = _get_child(run1, "rPr")
161
+ rpr2 = _get_child(run2, "rPr")
162
+
163
+ if (rpr1 is None) != (rpr2 is None):
164
+ return False
165
+ if rpr1 is None:
166
+ return True
167
+ return rpr1.toxml() == rpr2.toxml()
168
+
169
+
170
+ def _merge_run_content(target, source):
171
+ for child in list(source.childNodes):
172
+ if child.nodeType == child.ELEMENT_NODE:
173
+ name = child.localName or child.tagName
174
+ if name != "rPr" and not name.endswith(":rPr"):
175
+ target.appendChild(child)
176
+
177
+
178
+ def _consolidate_text(run):
179
+ t_elements = _get_children(run, "t")
180
+
181
+ for i in range(len(t_elements) - 1, 0, -1):
182
+ curr, prev = t_elements[i], t_elements[i - 1]
183
+
184
+ if _is_adjacent(prev, curr):
185
+ prev_text = prev.firstChild.data if prev.firstChild else ""
186
+ curr_text = curr.firstChild.data if curr.firstChild else ""
187
+ merged = prev_text + curr_text
188
+
189
+ if prev.firstChild:
190
+ prev.firstChild.data = merged
191
+ else:
192
+ prev.appendChild(run.ownerDocument.createTextNode(merged))
193
+
194
+ if merged.startswith(" ") or merged.endswith(" "):
195
+ prev.setAttribute("xml:space", "preserve")
196
+ elif prev.hasAttribute("xml:space"):
197
+ prev.removeAttribute("xml:space")
198
+
199
+ run.removeChild(curr)
@@ -0,0 +1,197 @@
1
+ """Simplify tracked changes by merging adjacent w:ins or w:del elements.
2
+
3
+ Merges adjacent <w:ins> elements from the same author into a single element.
4
+ Same for <w:del> elements. This makes heavily-redlined documents easier to
5
+ work with by reducing the number of tracked change wrappers.
6
+
7
+ Rules:
8
+ - Only merges w:ins with w:ins, w:del with w:del (same element type)
9
+ - Only merges if same author (ignores timestamp differences)
10
+ - Only merges if truly adjacent (only whitespace between them)
11
+ """
12
+
13
+ import xml.etree.ElementTree as ET
14
+ import zipfile
15
+ from pathlib import Path
16
+
17
+ import defusedxml.minidom
18
+
19
+ WORD_NS = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
20
+
21
+
22
+ def simplify_redlines(input_dir: str) -> tuple[int, str]:
23
+ doc_xml = Path(input_dir) / "word" / "document.xml"
24
+
25
+ if not doc_xml.exists():
26
+ return 0, f"Error: {doc_xml} not found"
27
+
28
+ try:
29
+ dom = defusedxml.minidom.parseString(doc_xml.read_text(encoding="utf-8"))
30
+ root = dom.documentElement
31
+
32
+ merge_count = 0
33
+
34
+ containers = _find_elements(root, "p") + _find_elements(root, "tc")
35
+
36
+ for container in containers:
37
+ merge_count += _merge_tracked_changes_in(container, "ins")
38
+ merge_count += _merge_tracked_changes_in(container, "del")
39
+
40
+ doc_xml.write_bytes(dom.toxml(encoding="UTF-8"))
41
+ return merge_count, f"Simplified {merge_count} tracked changes"
42
+
43
+ except Exception as e:
44
+ return 0, f"Error: {e}"
45
+
46
+
47
+ def _merge_tracked_changes_in(container, tag: str) -> int:
48
+ merge_count = 0
49
+
50
+ tracked = [
51
+ child
52
+ for child in container.childNodes
53
+ if child.nodeType == child.ELEMENT_NODE and _is_element(child, tag)
54
+ ]
55
+
56
+ if len(tracked) < 2:
57
+ return 0
58
+
59
+ i = 0
60
+ while i < len(tracked) - 1:
61
+ curr = tracked[i]
62
+ next_elem = tracked[i + 1]
63
+
64
+ if _can_merge_tracked(curr, next_elem):
65
+ _merge_tracked_content(curr, next_elem)
66
+ container.removeChild(next_elem)
67
+ tracked.pop(i + 1)
68
+ merge_count += 1
69
+ else:
70
+ i += 1
71
+
72
+ return merge_count
73
+
74
+
75
+ def _is_element(node, tag: str) -> bool:
76
+ name = node.localName or node.tagName
77
+ return name == tag or name.endswith(f":{tag}")
78
+
79
+
80
+ def _get_author(elem) -> str:
81
+ author = elem.getAttribute("w:author")
82
+ if not author:
83
+ for attr in elem.attributes.values():
84
+ if attr.localName == "author" or attr.name.endswith(":author"):
85
+ return attr.value
86
+ return author
87
+
88
+
89
+ def _can_merge_tracked(elem1, elem2) -> bool:
90
+ if _get_author(elem1) != _get_author(elem2):
91
+ return False
92
+
93
+ node = elem1.nextSibling
94
+ while node and node != elem2:
95
+ if node.nodeType == node.ELEMENT_NODE:
96
+ return False
97
+ if node.nodeType == node.TEXT_NODE and node.data.strip():
98
+ return False
99
+ node = node.nextSibling
100
+
101
+ return True
102
+
103
+
104
+ def _merge_tracked_content(target, source):
105
+ while source.firstChild:
106
+ child = source.firstChild
107
+ source.removeChild(child)
108
+ target.appendChild(child)
109
+
110
+
111
+ def _find_elements(root, tag: str) -> list:
112
+ results = []
113
+
114
+ def traverse(node):
115
+ if node.nodeType == node.ELEMENT_NODE:
116
+ name = node.localName or node.tagName
117
+ if name == tag or name.endswith(f":{tag}"):
118
+ results.append(node)
119
+ for child in node.childNodes:
120
+ traverse(child)
121
+
122
+ traverse(root)
123
+ return results
124
+
125
+
126
+ def get_tracked_change_authors(doc_xml_path: Path) -> dict[str, int]:
127
+ if not doc_xml_path.exists():
128
+ return {}
129
+
130
+ try:
131
+ tree = ET.parse(doc_xml_path)
132
+ root = tree.getroot()
133
+ except ET.ParseError:
134
+ return {}
135
+
136
+ namespaces = {"w": WORD_NS}
137
+ author_attr = f"{{{WORD_NS}}}author"
138
+
139
+ authors: dict[str, int] = {}
140
+ for tag in ["ins", "del"]:
141
+ for elem in root.findall(f".//w:{tag}", namespaces):
142
+ author = elem.get(author_attr)
143
+ if author:
144
+ authors[author] = authors.get(author, 0) + 1
145
+
146
+ return authors
147
+
148
+
149
+ def _get_authors_from_docx(docx_path: Path) -> dict[str, int]:
150
+ try:
151
+ with zipfile.ZipFile(docx_path, "r") as zf:
152
+ if "word/document.xml" not in zf.namelist():
153
+ return {}
154
+ with zf.open("word/document.xml") as f:
155
+ tree = ET.parse(f)
156
+ root = tree.getroot()
157
+
158
+ namespaces = {"w": WORD_NS}
159
+ author_attr = f"{{{WORD_NS}}}author"
160
+
161
+ authors: dict[str, int] = {}
162
+ for tag in ["ins", "del"]:
163
+ for elem in root.findall(f".//w:{tag}", namespaces):
164
+ author = elem.get(author_attr)
165
+ if author:
166
+ authors[author] = authors.get(author, 0) + 1
167
+ return authors
168
+ except (zipfile.BadZipFile, ET.ParseError):
169
+ return {}
170
+
171
+
172
+ def infer_author(modified_dir: Path, original_docx: Path, default: str = "Claude") -> str:
173
+ modified_xml = modified_dir / "word" / "document.xml"
174
+ modified_authors = get_tracked_change_authors(modified_xml)
175
+
176
+ if not modified_authors:
177
+ return default
178
+
179
+ original_authors = _get_authors_from_docx(original_docx)
180
+
181
+ new_changes: dict[str, int] = {}
182
+ for author, count in modified_authors.items():
183
+ original_count = original_authors.get(author, 0)
184
+ diff = count - original_count
185
+ if diff > 0:
186
+ new_changes[author] = diff
187
+
188
+ if not new_changes:
189
+ return default
190
+
191
+ if len(new_changes) == 1:
192
+ return next(iter(new_changes))
193
+
194
+ raise ValueError(
195
+ f"Multiple authors added new changes: {new_changes}. "
196
+ "Cannot infer which author to validate."
197
+ )
@@ -0,0 +1,159 @@
1
+ """Pack a directory into a DOCX, PPTX, or XLSX file.
2
+
3
+ Validates with auto-repair, condenses XML formatting, and creates the Office file.
4
+
5
+ Usage:
6
+ python pack.py <input_directory> <output_file> [--original <file>] [--validate true|false]
7
+
8
+ Examples:
9
+ python pack.py unpacked/ output.docx --original input.docx
10
+ python pack.py unpacked/ output.pptx --validate false
11
+ """
12
+
13
+ import argparse
14
+ import sys
15
+ import shutil
16
+ import tempfile
17
+ import zipfile
18
+ from pathlib import Path
19
+
20
+ import defusedxml.minidom
21
+
22
+ from validators import DOCXSchemaValidator, PPTXSchemaValidator, RedliningValidator
23
+
24
+ def pack(
25
+ input_directory: str,
26
+ output_file: str,
27
+ original_file: str | None = None,
28
+ validate: bool = True,
29
+ infer_author_func=None,
30
+ ) -> tuple[None, str]:
31
+ input_dir = Path(input_directory)
32
+ output_path = Path(output_file)
33
+ suffix = output_path.suffix.lower()
34
+
35
+ if not input_dir.is_dir():
36
+ return None, f"Error: {input_dir} is not a directory"
37
+
38
+ if suffix not in {".docx", ".pptx", ".xlsx"}:
39
+ return None, f"Error: {output_file} must be a .docx, .pptx, or .xlsx file"
40
+
41
+ if validate and original_file:
42
+ original_path = Path(original_file)
43
+ if original_path.exists():
44
+ success, output = _run_validation(
45
+ input_dir, original_path, suffix, infer_author_func
46
+ )
47
+ if output:
48
+ print(output)
49
+ if not success:
50
+ return None, f"Error: Validation failed for {input_dir}"
51
+
52
+ with tempfile.TemporaryDirectory() as temp_dir:
53
+ temp_content_dir = Path(temp_dir) / "content"
54
+ shutil.copytree(input_dir, temp_content_dir)
55
+
56
+ for pattern in ["*.xml", "*.rels"]:
57
+ for xml_file in temp_content_dir.rglob(pattern):
58
+ _condense_xml(xml_file)
59
+
60
+ output_path.parent.mkdir(parents=True, exist_ok=True)
61
+ with zipfile.ZipFile(output_path, "w", zipfile.ZIP_DEFLATED) as zf:
62
+ for f in temp_content_dir.rglob("*"):
63
+ if f.is_file():
64
+ zf.write(f, f.relative_to(temp_content_dir))
65
+
66
+ return None, f"Successfully packed {input_dir} to {output_file}"
67
+
68
+
69
+ def _run_validation(
70
+ unpacked_dir: Path,
71
+ original_file: Path,
72
+ suffix: str,
73
+ infer_author_func=None,
74
+ ) -> tuple[bool, str | None]:
75
+ output_lines = []
76
+ validators = []
77
+
78
+ if suffix == ".docx":
79
+ author = "Claude"
80
+ if infer_author_func:
81
+ try:
82
+ author = infer_author_func(unpacked_dir, original_file)
83
+ except ValueError as e:
84
+ print(f"Warning: {e} Using default author 'Claude'.", file=sys.stderr)
85
+
86
+ validators = [
87
+ DOCXSchemaValidator(unpacked_dir, original_file),
88
+ RedliningValidator(unpacked_dir, original_file, author=author),
89
+ ]
90
+ elif suffix == ".pptx":
91
+ validators = [PPTXSchemaValidator(unpacked_dir, original_file)]
92
+
93
+ if not validators:
94
+ return True, None
95
+
96
+ total_repairs = sum(v.repair() for v in validators)
97
+ if total_repairs:
98
+ output_lines.append(f"Auto-repaired {total_repairs} issue(s)")
99
+
100
+ success = all(v.validate() for v in validators)
101
+
102
+ if success:
103
+ output_lines.append("All validations PASSED!")
104
+
105
+ return success, "\n".join(output_lines) if output_lines else None
106
+
107
+
108
+ def _condense_xml(xml_file: Path) -> None:
109
+ try:
110
+ with open(xml_file, encoding="utf-8") as f:
111
+ dom = defusedxml.minidom.parse(f)
112
+
113
+ for element in dom.getElementsByTagName("*"):
114
+ if element.tagName.endswith(":t"):
115
+ continue
116
+
117
+ for child in list(element.childNodes):
118
+ if (
119
+ child.nodeType == child.TEXT_NODE
120
+ and child.nodeValue
121
+ and child.nodeValue.strip() == ""
122
+ ) or child.nodeType == child.COMMENT_NODE:
123
+ element.removeChild(child)
124
+
125
+ xml_file.write_bytes(dom.toxml(encoding="UTF-8"))
126
+ except Exception as e:
127
+ print(f"ERROR: Failed to parse {xml_file.name}: {e}", file=sys.stderr)
128
+ raise
129
+
130
+
131
+ if __name__ == "__main__":
132
+ parser = argparse.ArgumentParser(
133
+ description="Pack a directory into a DOCX, PPTX, or XLSX file"
134
+ )
135
+ parser.add_argument("input_directory", help="Unpacked Office document directory")
136
+ parser.add_argument("output_file", help="Output Office file (.docx/.pptx/.xlsx)")
137
+ parser.add_argument(
138
+ "--original",
139
+ help="Original file for validation comparison",
140
+ )
141
+ parser.add_argument(
142
+ "--validate",
143
+ type=lambda x: x.lower() == "true",
144
+ default=True,
145
+ metavar="true|false",
146
+ help="Run validation with auto-repair (default: true)",
147
+ )
148
+ args = parser.parse_args()
149
+
150
+ _, message = pack(
151
+ args.input_directory,
152
+ args.output_file,
153
+ original_file=args.original,
154
+ validate=args.validate,
155
+ )
156
+ print(message)
157
+
158
+ if "Error" in message:
159
+ sys.exit(1)