@kortix/sandbox 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (246) hide show
  1. package/config/customize.sh +143 -0
  2. package/config/kortix-env-setup.sh +25 -0
  3. package/kortix-master/package.json +22 -0
  4. package/kortix-master/src/config.ts +22 -0
  5. package/kortix-master/src/index.ts +44 -0
  6. package/kortix-master/src/routes/env.ts +65 -0
  7. package/kortix-master/src/routes/proxy.ts +108 -0
  8. package/kortix-master/src/routes/update.ts +185 -0
  9. package/kortix-master/src/services/proxy.ts +43 -0
  10. package/kortix-master/src/services/secret-store.ts +156 -0
  11. package/kortix-master/tsconfig.json +14 -0
  12. package/opencode/agents/kortix-browser.md +142 -0
  13. package/opencode/agents/kortix-build.md +62 -0
  14. package/opencode/agents/kortix-explore.md +66 -0
  15. package/opencode/agents/kortix-image-gen.md +33 -0
  16. package/opencode/agents/kortix-main.md +450 -0
  17. package/opencode/agents/kortix-plan.md +100 -0
  18. package/opencode/agents/kortix-research.md +84 -0
  19. package/opencode/agents/kortix-sheets.md +61 -0
  20. package/opencode/agents/kortix-slides.md +64 -0
  21. package/opencode/agents/kortix-web-dev.md +572 -0
  22. package/opencode/commands/email.md +36 -0
  23. package/opencode/commands/init.md +43 -0
  24. package/opencode/commands/journal.md +44 -0
  25. package/opencode/commands/memory-init.md +81 -0
  26. package/opencode/commands/memory-search.md +50 -0
  27. package/opencode/commands/memory-status.md +56 -0
  28. package/opencode/commands/research.md +36 -0
  29. package/opencode/commands/search.md +38 -0
  30. package/opencode/commands/slides.md +32 -0
  31. package/opencode/commands/spreadsheet.md +30 -0
  32. package/opencode/memory.json +37 -0
  33. package/opencode/ocx.jsonc +10 -0
  34. package/opencode/opencode.jsonc +103 -0
  35. package/opencode/package.json +25 -0
  36. package/opencode/patches/apply.sh +19 -0
  37. package/opencode/patches/opencode-pty-spawn.txt +49 -0
  38. package/opencode/plugin/background-agents.ts.disabled +483 -0
  39. package/opencode/plugin/kdco-primitives/get-project-id.ts +172 -0
  40. package/opencode/plugin/kdco-primitives/index.ts +26 -0
  41. package/opencode/plugin/kdco-primitives/log-warn.ts +51 -0
  42. package/opencode/plugin/kdco-primitives/mutex.ts +122 -0
  43. package/opencode/plugin/kdco-primitives/shell.ts +138 -0
  44. package/opencode/plugin/kdco-primitives/temp.ts +36 -0
  45. package/opencode/plugin/kdco-primitives/terminal-detect.ts +34 -0
  46. package/opencode/plugin/kdco-primitives/types.ts +13 -0
  47. package/opencode/plugin/kdco-primitives/with-timeout.ts +84 -0
  48. package/opencode/plugin/memory.ts +306 -0
  49. package/opencode/plugin/worktree/state.ts +412 -0
  50. package/opencode/plugin/worktree/terminal.ts +1002 -0
  51. package/opencode/plugin/worktree.ts +861 -0
  52. package/opencode/skills/KORTIX-browser/SKILL.md +478 -0
  53. package/opencode/skills/KORTIX-cron-triggers/SKILL.md +173 -0
  54. package/opencode/skills/KORTIX-deep-research/SKILL.md +278 -0
  55. package/opencode/skills/KORTIX-docx/SKILL.md +398 -0
  56. package/opencode/skills/KORTIX-docx/scripts/__init__.py +1 -0
  57. package/opencode/skills/KORTIX-docx/scripts/accept_changes.py +104 -0
  58. package/opencode/skills/KORTIX-docx/scripts/comment.py +244 -0
  59. package/opencode/skills/KORTIX-docx/scripts/office/helpers/__init__.py +0 -0
  60. package/opencode/skills/KORTIX-docx/scripts/office/helpers/merge_runs.py +199 -0
  61. package/opencode/skills/KORTIX-docx/scripts/office/helpers/simplify_redlines.py +197 -0
  62. package/opencode/skills/KORTIX-docx/scripts/office/pack.py +159 -0
  63. package/opencode/skills/KORTIX-docx/scripts/office/soffice.py +183 -0
  64. package/opencode/skills/KORTIX-docx/scripts/office/unpack.py +132 -0
  65. package/opencode/skills/KORTIX-docx/scripts/office/validate.py +111 -0
  66. package/opencode/skills/KORTIX-docx/scripts/office/validators/__init__.py +15 -0
  67. package/opencode/skills/KORTIX-docx/scripts/office/validators/base.py +847 -0
  68. package/opencode/skills/KORTIX-docx/scripts/office/validators/docx.py +446 -0
  69. package/opencode/skills/KORTIX-docx/scripts/office/validators/pptx.py +275 -0
  70. package/opencode/skills/KORTIX-docx/scripts/office/validators/redlining.py +247 -0
  71. package/opencode/skills/KORTIX-docx/scripts/render_docx.py +179 -0
  72. package/opencode/skills/KORTIX-docx/scripts/templates/comments.xml +3 -0
  73. package/opencode/skills/KORTIX-docx/scripts/templates/commentsExtended.xml +3 -0
  74. package/opencode/skills/KORTIX-docx/scripts/templates/commentsExtensible.xml +3 -0
  75. package/opencode/skills/KORTIX-docx/scripts/templates/commentsIds.xml +3 -0
  76. package/opencode/skills/KORTIX-docx/scripts/templates/people.xml +3 -0
  77. package/opencode/skills/KORTIX-domain-research/SKILL.md +96 -0
  78. package/opencode/skills/KORTIX-domain-research/scripts/domain-lookup.py +810 -0
  79. package/opencode/skills/KORTIX-elevenlabs/SKILL.md +230 -0
  80. package/opencode/skills/KORTIX-elevenlabs/scripts/tts.py +389 -0
  81. package/opencode/skills/KORTIX-email/SKILL.md +145 -0
  82. package/opencode/skills/KORTIX-legal-writer/SKILL.md +409 -0
  83. package/opencode/skills/KORTIX-legal-writer/references/bluebook.md +152 -0
  84. package/opencode/skills/KORTIX-legal-writer/references/document-types.md +416 -0
  85. package/opencode/skills/KORTIX-legal-writer/scripts/courtlistener.py +291 -0
  86. package/opencode/skills/KORTIX-legal-writer/scripts/ecfr_lookup.py +299 -0
  87. package/opencode/skills/KORTIX-legal-writer/scripts/verify-legal.py +507 -0
  88. package/opencode/skills/KORTIX-logo-creator/SKILL.md +293 -0
  89. package/opencode/skills/KORTIX-logo-creator/references/prompt-patterns.md +134 -0
  90. package/opencode/skills/KORTIX-logo-creator/scripts/compose_logo.py +406 -0
  91. package/opencode/skills/KORTIX-logo-creator/scripts/create_logo_sheet.py +258 -0
  92. package/opencode/skills/KORTIX-logo-creator/scripts/remove_bg.py +96 -0
  93. package/opencode/skills/KORTIX-memory/SKILL.md +261 -0
  94. package/opencode/skills/KORTIX-memory/scripts/export-sessions.py +409 -0
  95. package/opencode/skills/KORTIX-paper-creator/SKILL.md +549 -0
  96. package/opencode/skills/KORTIX-paper-creator/assets/template.tex +101 -0
  97. package/opencode/skills/KORTIX-paper-creator/scripts/compile.sh +177 -0
  98. package/opencode/skills/KORTIX-paper-creator/scripts/openalex_to_bibtex.py +220 -0
  99. package/opencode/skills/KORTIX-paper-creator/scripts/verify.sh +354 -0
  100. package/opencode/skills/KORTIX-paper-search/SKILL.md +418 -0
  101. package/opencode/skills/KORTIX-pdf/SKILL.md +232 -0
  102. package/opencode/skills/KORTIX-pdf/forms.md +36 -0
  103. package/opencode/skills/KORTIX-pdf/reference.md +105 -0
  104. package/opencode/skills/KORTIX-pdf/scripts/check_bounding_boxes.py +65 -0
  105. package/opencode/skills/KORTIX-pdf/scripts/check_fillable_fields.py +11 -0
  106. package/opencode/skills/KORTIX-pdf/scripts/convert_pdf_to_images.py +33 -0
  107. package/opencode/skills/KORTIX-pdf/scripts/create_validation_image.py +37 -0
  108. package/opencode/skills/KORTIX-pdf/scripts/extract_form_field_info.py +122 -0
  109. package/opencode/skills/KORTIX-pdf/scripts/extract_form_structure.py +115 -0
  110. package/opencode/skills/KORTIX-pdf/scripts/fill_fillable_fields.py +98 -0
  111. package/opencode/skills/KORTIX-pdf/scripts/fill_pdf_form_with_annotations.py +107 -0
  112. package/opencode/skills/KORTIX-plan/SKILL.md +228 -0
  113. package/opencode/skills/KORTIX-presentation-viewer/SKILL.md +87 -0
  114. package/opencode/skills/KORTIX-presentation-viewer/serve.ts +136 -0
  115. package/opencode/skills/KORTIX-presentation-viewer/viewer.html +559 -0
  116. package/opencode/skills/KORTIX-presentations/SKILL.md +344 -0
  117. package/opencode/skills/KORTIX-remotion/SKILL.md +56 -0
  118. package/opencode/skills/KORTIX-remotion/rules/3d.md +86 -0
  119. package/opencode/skills/KORTIX-remotion/rules/animations.md +29 -0
  120. package/opencode/skills/KORTIX-remotion/rules/assets.md +78 -0
  121. package/opencode/skills/KORTIX-remotion/rules/audio-visualization.md +198 -0
  122. package/opencode/skills/KORTIX-remotion/rules/audio.md +169 -0
  123. package/opencode/skills/KORTIX-remotion/rules/calculate-metadata.md +104 -0
  124. package/opencode/skills/KORTIX-remotion/rules/can-decode.md +75 -0
  125. package/opencode/skills/KORTIX-remotion/rules/charts.md +120 -0
  126. package/opencode/skills/KORTIX-remotion/rules/compositions.md +141 -0
  127. package/opencode/skills/KORTIX-remotion/rules/display-captions.md +184 -0
  128. package/opencode/skills/KORTIX-remotion/rules/extract-frames.md +229 -0
  129. package/opencode/skills/KORTIX-remotion/rules/ffmpeg.md +38 -0
  130. package/opencode/skills/KORTIX-remotion/rules/fonts.md +152 -0
  131. package/opencode/skills/KORTIX-remotion/rules/get-audio-duration.md +58 -0
  132. package/opencode/skills/KORTIX-remotion/rules/get-video-dimensions.md +68 -0
  133. package/opencode/skills/KORTIX-remotion/rules/get-video-duration.md +58 -0
  134. package/opencode/skills/KORTIX-remotion/rules/gifs.md +141 -0
  135. package/opencode/skills/KORTIX-remotion/rules/images.md +130 -0
  136. package/opencode/skills/KORTIX-remotion/rules/import-srt-captions.md +69 -0
  137. package/opencode/skills/KORTIX-remotion/rules/light-leaks.md +73 -0
  138. package/opencode/skills/KORTIX-remotion/rules/lottie.md +68 -0
  139. package/opencode/skills/KORTIX-remotion/rules/maps.md +401 -0
  140. package/opencode/skills/KORTIX-remotion/rules/measuring-dom-nodes.md +35 -0
  141. package/opencode/skills/KORTIX-remotion/rules/measuring-text.md +143 -0
  142. package/opencode/skills/KORTIX-remotion/rules/parameters.md +98 -0
  143. package/opencode/skills/KORTIX-remotion/rules/sequencing.md +118 -0
  144. package/opencode/skills/KORTIX-remotion/rules/subtitles.md +36 -0
  145. package/opencode/skills/KORTIX-remotion/rules/tailwind.md +11 -0
  146. package/opencode/skills/KORTIX-remotion/rules/text-animations.md +20 -0
  147. package/opencode/skills/KORTIX-remotion/rules/timing.md +179 -0
  148. package/opencode/skills/KORTIX-remotion/rules/transcribe-captions.md +70 -0
  149. package/opencode/skills/KORTIX-remotion/rules/transitions.md +197 -0
  150. package/opencode/skills/KORTIX-remotion/rules/transparent-videos.md +106 -0
  151. package/opencode/skills/KORTIX-remotion/rules/trimming.md +53 -0
  152. package/opencode/skills/KORTIX-remotion/rules/videos.md +171 -0
  153. package/opencode/skills/KORTIX-secrets/SKILL.md +280 -0
  154. package/opencode/skills/KORTIX-semantic-search/SKILL.md +213 -0
  155. package/opencode/skills/KORTIX-session-search/SKILL.md +807 -0
  156. package/opencode/skills/KORTIX-session-search/Untitled +1 -0
  157. package/opencode/skills/KORTIX-skill-creator/SKILL.md +163 -0
  158. package/opencode/skills/KORTIX-web-research/SKILL.md +69 -0
  159. package/opencode/skills/KORTIX-xlsx/LICENSE.txt +30 -0
  160. package/opencode/skills/KORTIX-xlsx/SKILL.md +549 -0
  161. package/opencode/skills/KORTIX-xlsx/scripts/office/helpers/__init__.py +0 -0
  162. package/opencode/skills/KORTIX-xlsx/scripts/office/helpers/merge_runs.py +199 -0
  163. package/opencode/skills/KORTIX-xlsx/scripts/office/helpers/simplify_redlines.py +197 -0
  164. package/opencode/skills/KORTIX-xlsx/scripts/office/pack.py +159 -0
  165. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
  166. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
  167. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
  168. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
  169. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
  170. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
  171. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
  172. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
  173. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
  174. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
  175. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
  176. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
  177. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
  178. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
  179. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
  180. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
  181. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
  182. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
  183. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
  184. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
  185. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
  186. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
  187. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
  188. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
  189. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
  190. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
  191. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
  192. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
  193. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
  194. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
  195. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
  196. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/mce/mc.xsd +75 -0
  197. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
  198. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
  199. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
  200. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
  201. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
  202. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
  203. package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
  204. package/opencode/skills/KORTIX-xlsx/scripts/office/soffice.py +183 -0
  205. package/opencode/skills/KORTIX-xlsx/scripts/office/unpack.py +132 -0
  206. package/opencode/skills/KORTIX-xlsx/scripts/office/validate.py +111 -0
  207. package/opencode/skills/KORTIX-xlsx/scripts/office/validators/__init__.py +15 -0
  208. package/opencode/skills/KORTIX-xlsx/scripts/office/validators/base.py +847 -0
  209. package/opencode/skills/KORTIX-xlsx/scripts/office/validators/docx.py +446 -0
  210. package/opencode/skills/KORTIX-xlsx/scripts/office/validators/pptx.py +275 -0
  211. package/opencode/skills/KORTIX-xlsx/scripts/office/validators/redlining.py +247 -0
  212. package/opencode/skills/KORTIX-xlsx/scripts/recalc.py +184 -0
  213. package/opencode/tools/image-gen.ts +342 -0
  214. package/opencode/tools/image-search.ts +190 -0
  215. package/opencode/tools/memory-get.ts +168 -0
  216. package/opencode/tools/memory-search.ts +247 -0
  217. package/opencode/tools/presentation-gen.ts +723 -0
  218. package/opencode/tools/scrape-webpage.ts +115 -0
  219. package/opencode/tools/scripts/.python-version +1 -0
  220. package/opencode/tools/scripts/convert_pdf.py +184 -0
  221. package/opencode/tools/scripts/convert_pptx.py +562 -0
  222. package/opencode/tools/scripts/pyproject.toml +11 -0
  223. package/opencode/tools/scripts/uv.lock +287 -0
  224. package/opencode/tools/scripts/validate_slide.py +74 -0
  225. package/opencode/tools/show-user.ts +217 -0
  226. package/opencode/tools/tests/e2e-presentation-fix.ts +277 -0
  227. package/opencode/tools/tests/image-gen.test.ts +215 -0
  228. package/opencode/tools/tests/image-search.test.ts +125 -0
  229. package/opencode/tools/tests/memory-system-benchmark.ts +1076 -0
  230. package/opencode/tools/tests/presentation-gen.test.ts +389 -0
  231. package/opencode/tools/tests/scrape-webpage.test.ts +74 -0
  232. package/opencode/tools/tests/show-user.test.ts +241 -0
  233. package/opencode/tools/tests/video-gen.test.ts +110 -0
  234. package/opencode/tools/tests/web-search.test.ts +106 -0
  235. package/opencode/tools/video-gen.ts +200 -0
  236. package/opencode/tools/web-search.ts +153 -0
  237. package/opencode/tsconfig.json +29 -0
  238. package/package.json +36 -0
  239. package/patch-agent-browser.js +100 -0
  240. package/postinstall.sh +88 -0
  241. package/services/KORTIX-presentation-viewer/run +37 -0
  242. package/services/agent-browser-viewer/run +48 -0
  243. package/services/kortix-master/run +16 -0
  244. package/services/lss-sync/run +22 -0
  245. package/services/opencode-serve/run +25 -0
  246. package/services/opencode-web/run +21 -0
@@ -0,0 +1,507 @@
1
+ #!/usr/bin/env python3
2
+ """TDD Verification Suite for Legal Documents (DOCX).
3
+
4
+ Runs a comprehensive checklist against a legal document directory and reports pass/fail.
5
+ Designed to be run after every section is written.
6
+
7
+ Usage:
8
+ python3 verify-legal.py <document-dir>
9
+ python3 verify-legal.py legal/contract-acme/
10
+ python3 verify-legal.py legal/memo-smith/ --strict # treat warnings as failures
11
+
12
+ Expects:
13
+ <document-dir>/document.docx (the main document)
14
+ <document-dir>/metadata.json (document metadata: type, parties, jurisdiction, etc.)
15
+
16
+ Exit codes: 0 = all pass, 1 = failures found
17
+ """
18
+
19
+ import sys
20
+ import os
21
+ import re
22
+ import json
23
+ from pathlib import Path
24
+
25
+ # ─── Results tracking ───────────────────────────────────────────────────────
26
+
27
+ PASS_COUNT = 0
28
+ FAIL_COUNT = 0
29
+ WARN_COUNT = 0
30
+ STRICT = False
31
+
32
+ def check_pass(msg):
33
+ global PASS_COUNT
34
+ print(f" PASS: {msg}")
35
+ PASS_COUNT += 1
36
+
37
+ def check_fail(msg):
38
+ global FAIL_COUNT
39
+ print(f" FAIL: {msg}")
40
+ FAIL_COUNT += 1
41
+
42
+ def check_warn(msg):
43
+ global WARN_COUNT, FAIL_COUNT
44
+ print(f" WARN: {msg}")
45
+ WARN_COUNT += 1
46
+ if STRICT:
47
+ FAIL_COUNT += 1
48
+
49
+
50
+ # ─── Text extraction from DOCX ─────────────────────────────────────────────
51
+
52
+ def extract_docx_text(docx_path):
53
+ """Extract full text from a DOCX file using python-docx or fallback to XML."""
54
+ try:
55
+ from docx import Document
56
+ doc = Document(docx_path)
57
+ paragraphs = []
58
+ for para in doc.paragraphs:
59
+ paragraphs.append({
60
+ "text": para.text,
61
+ "style": para.style.name if para.style else "",
62
+ })
63
+ # Also extract from tables
64
+ for table in doc.tables:
65
+ for row in table.rows:
66
+ for cell in row.cells:
67
+ paragraphs.append({"text": cell.text, "style": "TableCell"})
68
+ return paragraphs
69
+ except ImportError:
70
+ # Fallback: extract text from DOCX XML directly
71
+ import zipfile
72
+ import xml.etree.ElementTree as ET
73
+ ns = {"w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main"}
74
+ with zipfile.ZipFile(docx_path) as z:
75
+ with z.open("word/document.xml") as f:
76
+ tree = ET.parse(f)
77
+ paragraphs = []
78
+ for para in tree.findall(".//w:p", ns):
79
+ texts = [t.text for t in para.findall(".//w:t", ns) if t.text]
80
+ paragraphs.append({"text": " ".join(texts), "style": ""})
81
+ return paragraphs
82
+
83
+
84
+ def get_full_text(paragraphs):
85
+ """Join all paragraph texts into a single string."""
86
+ return "\n".join(p["text"] for p in paragraphs if p["text"].strip())
87
+
88
+
89
+ # ─── Check: Document exists and is non-trivial ─────────────────────────────
90
+
91
+ def check_document_exists(doc_dir):
92
+ print("--- Document Status ---")
93
+ docx_files = list(Path(doc_dir).glob("*.docx"))
94
+ if not docx_files:
95
+ check_fail("No .docx file found in document directory")
96
+ return None
97
+ docx_path = docx_files[0]
98
+ size = docx_path.stat().st_size
99
+ if size < 500:
100
+ check_fail(f"Document suspiciously small ({size} bytes)")
101
+ return None
102
+ check_pass(f"Document exists: {docx_path.name} ({size:,} bytes)")
103
+ return str(docx_path)
104
+
105
+
106
+ # ─── Check: Defined terms consistency ───────────────────────────────────────
107
+
108
+ def check_defined_terms(full_text, metadata):
109
+ print("\n--- Defined Terms ---")
110
+ doc_type = metadata.get("type", "").lower()
111
+
112
+ # Find defined terms: words/phrases in quotes followed by definition pattern
113
+ # Pattern 1: "Term" means / shall mean / is defined as
114
+ defined_pattern = r'"([A-Z][A-Za-z\s]+?)"(?:\s+(?:means?|shall mean|is defined as|has the meaning))'
115
+ defined_terms = set(re.findall(defined_pattern, full_text))
116
+
117
+ # Pattern 2: ("Term") — parenthetical definition
118
+ paren_pattern = r'\("([A-Z][A-Za-z\s]+?)"\)'
119
+ defined_terms.update(re.findall(paren_pattern, full_text))
120
+
121
+ if not defined_terms and doc_type in ("contract", "agreement", "nda", "settlement"):
122
+ check_warn("No defined terms detected in a contract-type document")
123
+ return
124
+
125
+ if not defined_terms:
126
+ check_pass("No defined terms expected for this document type")
127
+ return
128
+
129
+ # Find all capitalized terms that look like defined terms (2+ chars, Title Case, not at sentence start)
130
+ # This is a heuristic — not perfect but catches most issues
131
+ cap_terms_in_body = set()
132
+ for line in full_text.split("\n"):
133
+ # Find capitalized words that aren't at the start of a sentence
134
+ words = line.split()
135
+ for i, word in enumerate(words):
136
+ cleaned = re.sub(r'[^A-Za-z]', '', word)
137
+ if (cleaned and cleaned[0].isupper() and len(cleaned) > 1
138
+ and cleaned not in ("The", "This", "That", "These", "Those", "Such",
139
+ "Section", "Article", "Exhibit", "Schedule",
140
+ "Party", "Parties", "Agreement", "Court",
141
+ "Plaintiff", "Defendant", "State", "United",
142
+ "Federal", "January", "February", "March",
143
+ "April", "May", "June", "July", "August",
144
+ "September", "October", "November", "December")):
145
+ cap_terms_in_body.add(cleaned)
146
+
147
+ # Check: every defined term is used in the body
148
+ unused = []
149
+ for term in defined_terms:
150
+ # Check if the term appears outside its definition
151
+ uses = len(re.findall(re.escape(term), full_text)) - 1 # subtract the definition itself
152
+ if uses <= 0:
153
+ unused.append(term)
154
+
155
+ if unused:
156
+ check_warn(f"{len(unused)} defined term(s) never used: {', '.join(unused[:5])}")
157
+ else:
158
+ check_pass(f"All {len(defined_terms)} defined terms are used in the document")
159
+
160
+ return defined_terms
161
+
162
+
163
+ # ─── Check: Cross-reference integrity ───────────────────────────────────────
164
+
165
+ def check_cross_references(full_text):
166
+ print("\n--- Cross-References ---")
167
+
168
+ # Find section references: "Section X.Y", "Article X", "Exhibit A"
169
+ section_refs = set(re.findall(r'Section\s+(\d+(?:\.\d+)*)', full_text))
170
+ article_refs = set(re.findall(r'Article\s+(\w+)', full_text))
171
+ exhibit_refs = set(re.findall(r'Exhibit\s+([A-Z](?:-\d+)?)', full_text))
172
+ schedule_refs = set(re.findall(r'Schedule\s+(\d+|[A-Z])', full_text))
173
+
174
+ # Find actual section/article headings
175
+ # Pattern: "1.2" or "1.2.3" at start of a line-like context
176
+ actual_sections = set(re.findall(r'(?:^|\n)\s*(\d+(?:\.\d+)+)[.\s]', full_text))
177
+ # Add top-level sections
178
+ actual_sections.update(re.findall(r'(?:^|\n)\s*(\d+)[.\s]+[A-Z]', full_text))
179
+
180
+ broken_refs = []
181
+ for ref in section_refs:
182
+ if ref not in actual_sections:
183
+ # Check if it might be a top-level section
184
+ top = ref.split(".")[0]
185
+ if top not in actual_sections and ref not in actual_sections:
186
+ broken_refs.append(f"Section {ref}")
187
+
188
+ if broken_refs and len(broken_refs) <= 3:
189
+ # Only report if we found actual sections (otherwise we can't verify)
190
+ if actual_sections:
191
+ check_warn(f"Potentially broken references: {', '.join(broken_refs[:5])}")
192
+ else:
193
+ check_pass("Cross-references present (section structure not parseable for verification)")
194
+ elif broken_refs:
195
+ check_fail(f"{len(broken_refs)} potentially broken section reference(s)")
196
+ else:
197
+ total = len(section_refs) + len(article_refs) + len(exhibit_refs) + len(schedule_refs)
198
+ if total > 0:
199
+ check_pass(f"All {total} cross-references appear valid")
200
+ else:
201
+ check_pass("No cross-references to check")
202
+
203
+
204
+ # ─── Check: Citation format (Bluebook) ─────────────────────────────────────
205
+
206
+ def check_citations(full_text, metadata):
207
+ print("\n--- Legal Citations ---")
208
+ doc_type = metadata.get("type", "").lower()
209
+
210
+ # Only check citations in litigation/research documents
211
+ citation_types = ("memo", "memorandum", "brief", "motion", "complaint", "opinion", "petition")
212
+ if not any(t in doc_type for t in citation_types):
213
+ check_pass("Citation format not applicable for this document type")
214
+ return
215
+
216
+ # Find case citations: Party v. Party, Vol Reporter Page (Court Year)
217
+ case_pattern = r'[A-Z][a-z]+\s+v\.\s+[A-Z][a-z]+'
218
+ case_cites = re.findall(case_pattern, full_text)
219
+
220
+ # Find statute citations: ## U.S.C. § ##
221
+ statute_pattern = r'\d+\s+U\.S\.C\.\s+§\s*\d+'
222
+ statute_cites = re.findall(statute_pattern, full_text)
223
+
224
+ # Find CFR citations: ## C.F.R. § ##
225
+ cfr_pattern = r'\d+\s+C\.F\.R\.\s+§\s*\d+'
226
+ cfr_cites = re.findall(cfr_pattern, full_text)
227
+
228
+ total_cites = len(case_cites) + len(statute_cites) + len(cfr_cites)
229
+
230
+ if total_cites == 0:
231
+ check_warn("No legal citations found in a litigation/research document")
232
+ return
233
+
234
+ check_pass(f"Found {total_cites} citation(s): {len(case_cites)} cases, {len(statute_cites)} statutes, {len(cfr_cites)} regulations")
235
+
236
+ # Check for common citation format errors
237
+ errors = []
238
+
239
+ # Check: "v." not "vs." or "v "
240
+ vs_errors = len(re.findall(r'\bvs\.\s', full_text))
241
+ if vs_errors:
242
+ errors.append(f'{vs_errors} instance(s) of "vs." (should be "v.")')
243
+
244
+ # Check: Id. should be italicized (we can't check formatting, but can check usage)
245
+ id_uses = len(re.findall(r'\bId\.\s', full_text))
246
+ # Id. should only follow immediately after another citation (approximate check)
247
+
248
+ # Check: pinpoint citations present (page number after first page)
249
+ # Only flag when the same case is cited multiple times without pinpoints
250
+ # A citation like "418 U.S. 241 (1974)" is fine — it's the full case citation.
251
+ # A pinpoint looks like "418 U.S. 241, 258 (1974)" — citing a specific page.
252
+ proper_pinpoint = len(re.findall(r'\d+\s+(?:F\.\d+d|F\.\d+th|S\.\s*Ct|U\.S\.)\s+\d+,\s*\d+', full_text))
253
+
254
+ if proper_pinpoint == 0 and case_cites and len(case_cites) > 5:
255
+ errors.append("No pinpoint page references found — consider adding specific page cites for key propositions")
256
+
257
+ if errors:
258
+ for e in errors:
259
+ check_warn(e)
260
+ else:
261
+ check_pass("Citation format appears correct")
262
+
263
+
264
+ # ─── Check: Placeholder / draft artifact detection ─────────────────────────
265
+
266
+ def check_placeholders(full_text):
267
+ print("\n--- Completeness ---")
268
+
269
+ placeholders = {
270
+ "brackets": re.findall(r'\[(?:INSERT|TBD|TODO|FILL IN|______|NAME|DATE|AMOUNT|ADDRESS|NUMBER|TO BE)[^\]]*\]', full_text, re.I),
271
+ # Exclude signature lines (By: ___) — only flag standalone blank-fill underscores
272
+ "underscores": [m for m in re.findall(r'_{4,}', full_text)
273
+ if not re.search(r'(?:By|Name|Title|Date|Signature):\s*' + re.escape(m), full_text)],
274
+ "todo_comments": re.findall(r'(?:TODO|FIXME|XXX|HACK|TBD|PLACEHOLDER)', full_text, re.I),
275
+ "highlight_markers": re.findall(r'\[HIGHLIGHT\]|\[REVIEW\]|\[CHECK\]|\[VERIFY\]', full_text, re.I),
276
+ "draft_watermarks": re.findall(r'\bDRAFT\b', full_text),
277
+ }
278
+
279
+ total = sum(len(v) for v in placeholders.values())
280
+ if total == 0:
281
+ check_pass("No placeholders, TODOs, or draft artifacts found")
282
+ else:
283
+ details = []
284
+ if placeholders["brackets"]:
285
+ details.append(f'{len(placeholders["brackets"])} [INSERT/TBD] bracket(s)')
286
+ if placeholders["underscores"]:
287
+ details.append(f'{len(placeholders["underscores"])} blank line(s) (____)')
288
+ if placeholders["todo_comments"]:
289
+ details.append(f'{len(placeholders["todo_comments"])} TODO/TBD marker(s)')
290
+ if placeholders["highlight_markers"]:
291
+ details.append(f'{len(placeholders["highlight_markers"])} [REVIEW/CHECK] marker(s)')
292
+ if placeholders["draft_watermarks"]:
293
+ details.append(f'{len(placeholders["draft_watermarks"])} DRAFT watermark(s)')
294
+ check_fail(f"{total} placeholder(s)/draft artifact(s): {'; '.join(details)}")
295
+
296
+
297
+ # ─── Check: Party name consistency ──────────────────────────────────────────
298
+
299
+ def check_party_consistency(full_text, metadata):
300
+ print("\n--- Party Names ---")
301
+
302
+ parties = metadata.get("parties", [])
303
+ if not parties:
304
+ check_pass("No parties specified in metadata (skipping)")
305
+ return
306
+
307
+ for party in parties:
308
+ name = party.get("name", "")
309
+ short = party.get("short_name", "")
310
+ if name and short:
311
+ # Check that the short name is actually used after being defined
312
+ uses = len(re.findall(re.escape(short), full_text))
313
+ if uses == 0:
314
+ check_warn(f'Short name "{short}" for party "{name}" never used')
315
+ # Check for the full name being used after it should have been shortened
316
+ # (heuristic: if short name exists, full name shouldn't appear more than ~3 times)
317
+ full_uses = len(re.findall(re.escape(name), full_text))
318
+ if full_uses > 5 and uses > 0:
319
+ check_warn(f'Full name "{name}" used {full_uses} times (consider using "{short}" consistently)')
320
+ elif name:
321
+ check_pass(f'Party "{name}" referenced in document')
322
+
323
+
324
+ # ─── Check: Boilerplate provisions (contracts) ─────────────────────────────
325
+
326
+ def check_boilerplate(full_text, metadata):
327
+ print("\n--- Required Provisions ---")
328
+ doc_type = metadata.get("type", "").lower()
329
+
330
+ if doc_type not in ("contract", "agreement", "nda", "settlement", "terms of service",
331
+ "employment agreement", "services agreement", "license agreement"):
332
+ check_pass("Boilerplate check not applicable for this document type")
333
+ return
334
+
335
+ required_provisions = {
336
+ "governing law": r'(?:governing\s+law|choice\s+of\s+law|governed\s+by.*laws\s+of)',
337
+ "entire agreement": r'(?:entire\s+agreement|constitutes?\s+the\s+entire)',
338
+ "severability": r'(?:severab|invalid.*unenforceab|unenforceab.*sever)',
339
+ "amendment": r'(?:amend(?:ment|ed).*(?:writ(?:ten|ing)|signed)|(?:not\s+be\s+)?modif(?:y|ied|ication).*(?:except|writ))',
340
+ "notices": r'(?:notice.*(?:shall|must|will|be)\s+.*(?:writ(?:ten|ing)|deliver|given|sent)|all\s+notices?\s+under)',
341
+ "assignment": r'(?:assign(?:ment)?.*(?:without|prior|consent)|neither\s+party\s+may\s+assign)',
342
+ }
343
+
344
+ missing = []
345
+ found = []
346
+ for provision, pattern in required_provisions.items():
347
+ if re.search(pattern, full_text, re.I):
348
+ found.append(provision)
349
+ else:
350
+ missing.append(provision)
351
+
352
+ if missing:
353
+ check_warn(f"Missing standard provisions: {', '.join(missing)}")
354
+ if found:
355
+ check_pass(f"Found {len(found)}/{len(required_provisions)} standard provisions: {', '.join(found)}")
356
+
357
+
358
+ # ─── Check: Shall/May/Must consistency ─────────────────────────────────────
359
+
360
+ def check_modal_verbs(full_text, metadata):
361
+ print("\n--- Language Precision ---")
362
+ doc_type = metadata.get("type", "").lower()
363
+
364
+ if doc_type not in ("contract", "agreement", "nda", "settlement", "regulation",
365
+ "terms of service", "employment agreement"):
366
+ check_pass("Modal verb check not applicable for this document type")
367
+ return
368
+
369
+ shall_count = len(re.findall(r'\bshall\b', full_text, re.I))
370
+ must_count = len(re.findall(r'\bmust\b', full_text, re.I))
371
+ may_count = len(re.findall(r'\bmay\b', full_text, re.I))
372
+ will_count = len(re.findall(r'\bwill\b', full_text, re.I))
373
+
374
+ # Check for "shall not" (obligation not to) vs "may not" (prohibition) confusion
375
+ shall_not = len(re.findall(r'\bshall\s+not\b', full_text, re.I))
376
+
377
+ check_pass(f"Modal verbs: shall={shall_count}, must={must_count}, may={may_count}, will={will_count}")
378
+
379
+ # Warn if both "shall" and "must" are used (inconsistent — pick one style)
380
+ if shall_count > 3 and must_count > 3:
381
+ check_warn('Both "shall" and "must" used frequently — consider standardizing to one')
382
+
383
+
384
+ # ─── Check: Word count / length ─────────────────────────────────────────────
385
+
386
+ def check_length(full_text, metadata):
387
+ print("\n--- Document Length ---")
388
+ word_count = len(full_text.split())
389
+ doc_type = metadata.get("type", "").lower()
390
+
391
+ # Page limit checks for briefs
392
+ if "brief" in doc_type or "motion" in doc_type:
393
+ page_limit = metadata.get("page_limit")
394
+ word_limit = metadata.get("word_limit")
395
+ if word_limit and word_count > word_limit:
396
+ check_fail(f"Word count {word_count:,} exceeds limit of {word_limit:,}")
397
+ elif word_limit:
398
+ check_pass(f"Word count {word_count:,} within limit of {word_limit:,}")
399
+ else:
400
+ check_pass(f"Word count: {word_count:,} (no limit specified)")
401
+ elif word_count < 50:
402
+ check_warn(f"Document very short ({word_count} words)")
403
+ else:
404
+ check_pass(f"Word count: {word_count:,}")
405
+
406
+
407
+ # ─── Check: Date consistency ────────────────────────────────────────────────
408
+
409
+ def check_date_consistency(full_text):
410
+ print("\n--- Date Format ---")
411
+
412
+ # Find different date formats
413
+ long_dates = re.findall(r'(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4}', full_text)
414
+ slash_dates = re.findall(r'\d{1,2}/\d{1,2}/\d{2,4}', full_text)
415
+ dash_dates = re.findall(r'\d{4}-\d{2}-\d{2}', full_text)
416
+
417
+ formats_used = 0
418
+ if long_dates: formats_used += 1
419
+ if slash_dates: formats_used += 1
420
+ if dash_dates: formats_used += 1
421
+
422
+ if formats_used > 1:
423
+ check_warn(f"Inconsistent date formats: {len(long_dates)} long, {len(slash_dates)} slash, {len(dash_dates)} ISO")
424
+ elif formats_used == 1:
425
+ total = len(long_dates) + len(slash_dates) + len(dash_dates)
426
+ check_pass(f"Consistent date format ({total} dates found)")
427
+ else:
428
+ check_pass("No dates to check")
429
+
430
+
431
+ # ─── Main ───────────────────────────────────────────────────────────────────
432
+
433
+ def main():
434
+ global STRICT
435
+
436
+ if len(sys.argv) < 2:
437
+ print("Usage: verify-legal.py <document-dir> [--strict]", file=sys.stderr)
438
+ sys.exit(1)
439
+
440
+ doc_dir = sys.argv[1].rstrip("/")
441
+ STRICT = "--strict" in sys.argv
442
+
443
+ print(f"=== Legal Document Verification: {doc_dir} ===\n")
444
+
445
+ # Load metadata
446
+ metadata_path = os.path.join(doc_dir, "metadata.json")
447
+ if os.path.exists(metadata_path):
448
+ with open(metadata_path) as f:
449
+ metadata = json.load(f)
450
+ else:
451
+ metadata = {}
452
+ print(" NOTE: No metadata.json found, running with defaults\n")
453
+
454
+ # Check 1: Document exists
455
+ docx_path = check_document_exists(doc_dir)
456
+ if not docx_path:
457
+ print(f"\n{'='*35}")
458
+ print(f" PASS: {PASS_COUNT}")
459
+ print(f" WARN: {WARN_COUNT}")
460
+ print(f" FAIL: {FAIL_COUNT}")
461
+ print(f"{'='*35}")
462
+ print(" RESULT: FAILED")
463
+ sys.exit(1)
464
+
465
+ # Extract text
466
+ try:
467
+ paragraphs = extract_docx_text(docx_path)
468
+ full_text = get_full_text(paragraphs)
469
+ except Exception as e:
470
+ check_fail(f"Could not read document: {e}")
471
+ print(f"\n{'='*35}")
472
+ print(f" PASS: {PASS_COUNT}")
473
+ print(f" FAIL: {FAIL_COUNT}")
474
+ print(f"{'='*35}")
475
+ print(" RESULT: FAILED")
476
+ sys.exit(1)
477
+
478
+ # Run all checks
479
+ check_defined_terms(full_text, metadata)
480
+ check_cross_references(full_text)
481
+ check_citations(full_text, metadata)
482
+ check_placeholders(full_text)
483
+ check_party_consistency(full_text, metadata)
484
+ check_boilerplate(full_text, metadata)
485
+ check_modal_verbs(full_text, metadata)
486
+ check_length(full_text, metadata)
487
+ check_date_consistency(full_text)
488
+
489
+ # Summary
490
+ print(f"\n{'='*35}")
491
+ print(f" PASS: {PASS_COUNT}")
492
+ print(f" WARN: {WARN_COUNT}")
493
+ print(f" FAIL: {FAIL_COUNT}")
494
+ print(f"{'='*35}")
495
+
496
+ if FAIL_COUNT > 0:
497
+ print(" RESULT: FAILED")
498
+ sys.exit(1)
499
+ elif WARN_COUNT > 0:
500
+ print(" RESULT: PASSED (with warnings)")
501
+ else:
502
+ print(" RESULT: ALL CHECKS PASSED")
503
+ sys.exit(0)
504
+
505
+
506
+ if __name__ == "__main__":
507
+ main()