@kortix/sandbox 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/customize.sh +143 -0
- package/config/kortix-env-setup.sh +25 -0
- package/kortix-master/package.json +22 -0
- package/kortix-master/src/config.ts +22 -0
- package/kortix-master/src/index.ts +44 -0
- package/kortix-master/src/routes/env.ts +65 -0
- package/kortix-master/src/routes/proxy.ts +108 -0
- package/kortix-master/src/routes/update.ts +185 -0
- package/kortix-master/src/services/proxy.ts +43 -0
- package/kortix-master/src/services/secret-store.ts +156 -0
- package/kortix-master/tsconfig.json +14 -0
- package/opencode/agents/kortix-browser.md +142 -0
- package/opencode/agents/kortix-build.md +62 -0
- package/opencode/agents/kortix-explore.md +66 -0
- package/opencode/agents/kortix-image-gen.md +33 -0
- package/opencode/agents/kortix-main.md +450 -0
- package/opencode/agents/kortix-plan.md +100 -0
- package/opencode/agents/kortix-research.md +84 -0
- package/opencode/agents/kortix-sheets.md +61 -0
- package/opencode/agents/kortix-slides.md +64 -0
- package/opencode/agents/kortix-web-dev.md +572 -0
- package/opencode/commands/email.md +36 -0
- package/opencode/commands/init.md +43 -0
- package/opencode/commands/journal.md +44 -0
- package/opencode/commands/memory-init.md +81 -0
- package/opencode/commands/memory-search.md +50 -0
- package/opencode/commands/memory-status.md +56 -0
- package/opencode/commands/research.md +36 -0
- package/opencode/commands/search.md +38 -0
- package/opencode/commands/slides.md +32 -0
- package/opencode/commands/spreadsheet.md +30 -0
- package/opencode/memory.json +37 -0
- package/opencode/ocx.jsonc +10 -0
- package/opencode/opencode.jsonc +103 -0
- package/opencode/package.json +25 -0
- package/opencode/patches/apply.sh +19 -0
- package/opencode/patches/opencode-pty-spawn.txt +49 -0
- package/opencode/plugin/background-agents.ts.disabled +483 -0
- package/opencode/plugin/kdco-primitives/get-project-id.ts +172 -0
- package/opencode/plugin/kdco-primitives/index.ts +26 -0
- package/opencode/plugin/kdco-primitives/log-warn.ts +51 -0
- package/opencode/plugin/kdco-primitives/mutex.ts +122 -0
- package/opencode/plugin/kdco-primitives/shell.ts +138 -0
- package/opencode/plugin/kdco-primitives/temp.ts +36 -0
- package/opencode/plugin/kdco-primitives/terminal-detect.ts +34 -0
- package/opencode/plugin/kdco-primitives/types.ts +13 -0
- package/opencode/plugin/kdco-primitives/with-timeout.ts +84 -0
- package/opencode/plugin/memory.ts +306 -0
- package/opencode/plugin/worktree/state.ts +412 -0
- package/opencode/plugin/worktree/terminal.ts +1002 -0
- package/opencode/plugin/worktree.ts +861 -0
- package/opencode/skills/KORTIX-browser/SKILL.md +478 -0
- package/opencode/skills/KORTIX-cron-triggers/SKILL.md +173 -0
- package/opencode/skills/KORTIX-deep-research/SKILL.md +278 -0
- package/opencode/skills/KORTIX-docx/SKILL.md +398 -0
- package/opencode/skills/KORTIX-docx/scripts/__init__.py +1 -0
- package/opencode/skills/KORTIX-docx/scripts/accept_changes.py +104 -0
- package/opencode/skills/KORTIX-docx/scripts/comment.py +244 -0
- package/opencode/skills/KORTIX-docx/scripts/office/helpers/__init__.py +0 -0
- package/opencode/skills/KORTIX-docx/scripts/office/helpers/merge_runs.py +199 -0
- package/opencode/skills/KORTIX-docx/scripts/office/helpers/simplify_redlines.py +197 -0
- package/opencode/skills/KORTIX-docx/scripts/office/pack.py +159 -0
- package/opencode/skills/KORTIX-docx/scripts/office/soffice.py +183 -0
- package/opencode/skills/KORTIX-docx/scripts/office/unpack.py +132 -0
- package/opencode/skills/KORTIX-docx/scripts/office/validate.py +111 -0
- package/opencode/skills/KORTIX-docx/scripts/office/validators/__init__.py +15 -0
- package/opencode/skills/KORTIX-docx/scripts/office/validators/base.py +847 -0
- package/opencode/skills/KORTIX-docx/scripts/office/validators/docx.py +446 -0
- package/opencode/skills/KORTIX-docx/scripts/office/validators/pptx.py +275 -0
- package/opencode/skills/KORTIX-docx/scripts/office/validators/redlining.py +247 -0
- package/opencode/skills/KORTIX-docx/scripts/render_docx.py +179 -0
- package/opencode/skills/KORTIX-docx/scripts/templates/comments.xml +3 -0
- package/opencode/skills/KORTIX-docx/scripts/templates/commentsExtended.xml +3 -0
- package/opencode/skills/KORTIX-docx/scripts/templates/commentsExtensible.xml +3 -0
- package/opencode/skills/KORTIX-docx/scripts/templates/commentsIds.xml +3 -0
- package/opencode/skills/KORTIX-docx/scripts/templates/people.xml +3 -0
- package/opencode/skills/KORTIX-domain-research/SKILL.md +96 -0
- package/opencode/skills/KORTIX-domain-research/scripts/domain-lookup.py +810 -0
- package/opencode/skills/KORTIX-elevenlabs/SKILL.md +230 -0
- package/opencode/skills/KORTIX-elevenlabs/scripts/tts.py +389 -0
- package/opencode/skills/KORTIX-email/SKILL.md +145 -0
- package/opencode/skills/KORTIX-legal-writer/SKILL.md +409 -0
- package/opencode/skills/KORTIX-legal-writer/references/bluebook.md +152 -0
- package/opencode/skills/KORTIX-legal-writer/references/document-types.md +416 -0
- package/opencode/skills/KORTIX-legal-writer/scripts/courtlistener.py +291 -0
- package/opencode/skills/KORTIX-legal-writer/scripts/ecfr_lookup.py +299 -0
- package/opencode/skills/KORTIX-legal-writer/scripts/verify-legal.py +507 -0
- package/opencode/skills/KORTIX-logo-creator/SKILL.md +293 -0
- package/opencode/skills/KORTIX-logo-creator/references/prompt-patterns.md +134 -0
- package/opencode/skills/KORTIX-logo-creator/scripts/compose_logo.py +406 -0
- package/opencode/skills/KORTIX-logo-creator/scripts/create_logo_sheet.py +258 -0
- package/opencode/skills/KORTIX-logo-creator/scripts/remove_bg.py +96 -0
- package/opencode/skills/KORTIX-memory/SKILL.md +261 -0
- package/opencode/skills/KORTIX-memory/scripts/export-sessions.py +409 -0
- package/opencode/skills/KORTIX-paper-creator/SKILL.md +549 -0
- package/opencode/skills/KORTIX-paper-creator/assets/template.tex +101 -0
- package/opencode/skills/KORTIX-paper-creator/scripts/compile.sh +177 -0
- package/opencode/skills/KORTIX-paper-creator/scripts/openalex_to_bibtex.py +220 -0
- package/opencode/skills/KORTIX-paper-creator/scripts/verify.sh +354 -0
- package/opencode/skills/KORTIX-paper-search/SKILL.md +418 -0
- package/opencode/skills/KORTIX-pdf/SKILL.md +232 -0
- package/opencode/skills/KORTIX-pdf/forms.md +36 -0
- package/opencode/skills/KORTIX-pdf/reference.md +105 -0
- package/opencode/skills/KORTIX-pdf/scripts/check_bounding_boxes.py +65 -0
- package/opencode/skills/KORTIX-pdf/scripts/check_fillable_fields.py +11 -0
- package/opencode/skills/KORTIX-pdf/scripts/convert_pdf_to_images.py +33 -0
- package/opencode/skills/KORTIX-pdf/scripts/create_validation_image.py +37 -0
- package/opencode/skills/KORTIX-pdf/scripts/extract_form_field_info.py +122 -0
- package/opencode/skills/KORTIX-pdf/scripts/extract_form_structure.py +115 -0
- package/opencode/skills/KORTIX-pdf/scripts/fill_fillable_fields.py +98 -0
- package/opencode/skills/KORTIX-pdf/scripts/fill_pdf_form_with_annotations.py +107 -0
- package/opencode/skills/KORTIX-plan/SKILL.md +228 -0
- package/opencode/skills/KORTIX-presentation-viewer/SKILL.md +87 -0
- package/opencode/skills/KORTIX-presentation-viewer/serve.ts +136 -0
- package/opencode/skills/KORTIX-presentation-viewer/viewer.html +559 -0
- package/opencode/skills/KORTIX-presentations/SKILL.md +344 -0
- package/opencode/skills/KORTIX-remotion/SKILL.md +56 -0
- package/opencode/skills/KORTIX-remotion/rules/3d.md +86 -0
- package/opencode/skills/KORTIX-remotion/rules/animations.md +29 -0
- package/opencode/skills/KORTIX-remotion/rules/assets.md +78 -0
- package/opencode/skills/KORTIX-remotion/rules/audio-visualization.md +198 -0
- package/opencode/skills/KORTIX-remotion/rules/audio.md +169 -0
- package/opencode/skills/KORTIX-remotion/rules/calculate-metadata.md +104 -0
- package/opencode/skills/KORTIX-remotion/rules/can-decode.md +75 -0
- package/opencode/skills/KORTIX-remotion/rules/charts.md +120 -0
- package/opencode/skills/KORTIX-remotion/rules/compositions.md +141 -0
- package/opencode/skills/KORTIX-remotion/rules/display-captions.md +184 -0
- package/opencode/skills/KORTIX-remotion/rules/extract-frames.md +229 -0
- package/opencode/skills/KORTIX-remotion/rules/ffmpeg.md +38 -0
- package/opencode/skills/KORTIX-remotion/rules/fonts.md +152 -0
- package/opencode/skills/KORTIX-remotion/rules/get-audio-duration.md +58 -0
- package/opencode/skills/KORTIX-remotion/rules/get-video-dimensions.md +68 -0
- package/opencode/skills/KORTIX-remotion/rules/get-video-duration.md +58 -0
- package/opencode/skills/KORTIX-remotion/rules/gifs.md +141 -0
- package/opencode/skills/KORTIX-remotion/rules/images.md +130 -0
- package/opencode/skills/KORTIX-remotion/rules/import-srt-captions.md +69 -0
- package/opencode/skills/KORTIX-remotion/rules/light-leaks.md +73 -0
- package/opencode/skills/KORTIX-remotion/rules/lottie.md +68 -0
- package/opencode/skills/KORTIX-remotion/rules/maps.md +401 -0
- package/opencode/skills/KORTIX-remotion/rules/measuring-dom-nodes.md +35 -0
- package/opencode/skills/KORTIX-remotion/rules/measuring-text.md +143 -0
- package/opencode/skills/KORTIX-remotion/rules/parameters.md +98 -0
- package/opencode/skills/KORTIX-remotion/rules/sequencing.md +118 -0
- package/opencode/skills/KORTIX-remotion/rules/subtitles.md +36 -0
- package/opencode/skills/KORTIX-remotion/rules/tailwind.md +11 -0
- package/opencode/skills/KORTIX-remotion/rules/text-animations.md +20 -0
- package/opencode/skills/KORTIX-remotion/rules/timing.md +179 -0
- package/opencode/skills/KORTIX-remotion/rules/transcribe-captions.md +70 -0
- package/opencode/skills/KORTIX-remotion/rules/transitions.md +197 -0
- package/opencode/skills/KORTIX-remotion/rules/transparent-videos.md +106 -0
- package/opencode/skills/KORTIX-remotion/rules/trimming.md +53 -0
- package/opencode/skills/KORTIX-remotion/rules/videos.md +171 -0
- package/opencode/skills/KORTIX-secrets/SKILL.md +280 -0
- package/opencode/skills/KORTIX-semantic-search/SKILL.md +213 -0
- package/opencode/skills/KORTIX-session-search/SKILL.md +807 -0
- package/opencode/skills/KORTIX-session-search/Untitled +1 -0
- package/opencode/skills/KORTIX-skill-creator/SKILL.md +163 -0
- package/opencode/skills/KORTIX-web-research/SKILL.md +69 -0
- package/opencode/skills/KORTIX-xlsx/LICENSE.txt +30 -0
- package/opencode/skills/KORTIX-xlsx/SKILL.md +549 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/helpers/__init__.py +0 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/helpers/merge_runs.py +199 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/helpers/simplify_redlines.py +197 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/pack.py +159 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/mce/mc.xsd +75 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/soffice.py +183 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/unpack.py +132 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/validate.py +111 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/validators/__init__.py +15 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/validators/base.py +847 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/validators/docx.py +446 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/validators/pptx.py +275 -0
- package/opencode/skills/KORTIX-xlsx/scripts/office/validators/redlining.py +247 -0
- package/opencode/skills/KORTIX-xlsx/scripts/recalc.py +184 -0
- package/opencode/tools/image-gen.ts +342 -0
- package/opencode/tools/image-search.ts +190 -0
- package/opencode/tools/memory-get.ts +168 -0
- package/opencode/tools/memory-search.ts +247 -0
- package/opencode/tools/presentation-gen.ts +723 -0
- package/opencode/tools/scrape-webpage.ts +115 -0
- package/opencode/tools/scripts/.python-version +1 -0
- package/opencode/tools/scripts/convert_pdf.py +184 -0
- package/opencode/tools/scripts/convert_pptx.py +562 -0
- package/opencode/tools/scripts/pyproject.toml +11 -0
- package/opencode/tools/scripts/uv.lock +287 -0
- package/opencode/tools/scripts/validate_slide.py +74 -0
- package/opencode/tools/show-user.ts +217 -0
- package/opencode/tools/tests/e2e-presentation-fix.ts +277 -0
- package/opencode/tools/tests/image-gen.test.ts +215 -0
- package/opencode/tools/tests/image-search.test.ts +125 -0
- package/opencode/tools/tests/memory-system-benchmark.ts +1076 -0
- package/opencode/tools/tests/presentation-gen.test.ts +389 -0
- package/opencode/tools/tests/scrape-webpage.test.ts +74 -0
- package/opencode/tools/tests/show-user.test.ts +241 -0
- package/opencode/tools/tests/video-gen.test.ts +110 -0
- package/opencode/tools/tests/web-search.test.ts +106 -0
- package/opencode/tools/video-gen.ts +200 -0
- package/opencode/tools/web-search.ts +153 -0
- package/opencode/tsconfig.json +29 -0
- package/package.json +36 -0
- package/patch-agent-browser.js +100 -0
- package/postinstall.sh +88 -0
- package/services/KORTIX-presentation-viewer/run +37 -0
- package/services/agent-browser-viewer/run +48 -0
- package/services/kortix-master/run +16 -0
- package/services/lss-sync/run +22 -0
- package/services/opencode-serve/run +25 -0
- package/services/opencode-web/run +21 -0
|
@@ -0,0 +1,507 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""TDD Verification Suite for Legal Documents (DOCX).
|
|
3
|
+
|
|
4
|
+
Runs a comprehensive checklist against a legal document directory and reports pass/fail.
|
|
5
|
+
Designed to be run after every section is written.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
python3 verify-legal.py <document-dir>
|
|
9
|
+
python3 verify-legal.py legal/contract-acme/
|
|
10
|
+
python3 verify-legal.py legal/memo-smith/ --strict # treat warnings as failures
|
|
11
|
+
|
|
12
|
+
Expects:
|
|
13
|
+
<document-dir>/document.docx (the main document)
|
|
14
|
+
<document-dir>/metadata.json (document metadata: type, parties, jurisdiction, etc.)
|
|
15
|
+
|
|
16
|
+
Exit codes: 0 = all pass, 1 = failures found
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import sys
|
|
20
|
+
import os
|
|
21
|
+
import re
|
|
22
|
+
import json
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
|
|
25
|
+
# ─── Results tracking ───────────────────────────────────────────────────────
|
|
26
|
+
|
|
27
|
+
PASS_COUNT = 0
|
|
28
|
+
FAIL_COUNT = 0
|
|
29
|
+
WARN_COUNT = 0
|
|
30
|
+
STRICT = False
|
|
31
|
+
|
|
32
|
+
def check_pass(msg):
|
|
33
|
+
global PASS_COUNT
|
|
34
|
+
print(f" PASS: {msg}")
|
|
35
|
+
PASS_COUNT += 1
|
|
36
|
+
|
|
37
|
+
def check_fail(msg):
|
|
38
|
+
global FAIL_COUNT
|
|
39
|
+
print(f" FAIL: {msg}")
|
|
40
|
+
FAIL_COUNT += 1
|
|
41
|
+
|
|
42
|
+
def check_warn(msg):
|
|
43
|
+
global WARN_COUNT, FAIL_COUNT
|
|
44
|
+
print(f" WARN: {msg}")
|
|
45
|
+
WARN_COUNT += 1
|
|
46
|
+
if STRICT:
|
|
47
|
+
FAIL_COUNT += 1
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
# ─── Text extraction from DOCX ─────────────────────────────────────────────
|
|
51
|
+
|
|
52
|
+
def extract_docx_text(docx_path):
|
|
53
|
+
"""Extract full text from a DOCX file using python-docx or fallback to XML."""
|
|
54
|
+
try:
|
|
55
|
+
from docx import Document
|
|
56
|
+
doc = Document(docx_path)
|
|
57
|
+
paragraphs = []
|
|
58
|
+
for para in doc.paragraphs:
|
|
59
|
+
paragraphs.append({
|
|
60
|
+
"text": para.text,
|
|
61
|
+
"style": para.style.name if para.style else "",
|
|
62
|
+
})
|
|
63
|
+
# Also extract from tables
|
|
64
|
+
for table in doc.tables:
|
|
65
|
+
for row in table.rows:
|
|
66
|
+
for cell in row.cells:
|
|
67
|
+
paragraphs.append({"text": cell.text, "style": "TableCell"})
|
|
68
|
+
return paragraphs
|
|
69
|
+
except ImportError:
|
|
70
|
+
# Fallback: extract text from DOCX XML directly
|
|
71
|
+
import zipfile
|
|
72
|
+
import xml.etree.ElementTree as ET
|
|
73
|
+
ns = {"w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main"}
|
|
74
|
+
with zipfile.ZipFile(docx_path) as z:
|
|
75
|
+
with z.open("word/document.xml") as f:
|
|
76
|
+
tree = ET.parse(f)
|
|
77
|
+
paragraphs = []
|
|
78
|
+
for para in tree.findall(".//w:p", ns):
|
|
79
|
+
texts = [t.text for t in para.findall(".//w:t", ns) if t.text]
|
|
80
|
+
paragraphs.append({"text": " ".join(texts), "style": ""})
|
|
81
|
+
return paragraphs
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def get_full_text(paragraphs):
|
|
85
|
+
"""Join all paragraph texts into a single string."""
|
|
86
|
+
return "\n".join(p["text"] for p in paragraphs if p["text"].strip())
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
# ─── Check: Document exists and is non-trivial ─────────────────────────────
|
|
90
|
+
|
|
91
|
+
def check_document_exists(doc_dir):
|
|
92
|
+
print("--- Document Status ---")
|
|
93
|
+
docx_files = list(Path(doc_dir).glob("*.docx"))
|
|
94
|
+
if not docx_files:
|
|
95
|
+
check_fail("No .docx file found in document directory")
|
|
96
|
+
return None
|
|
97
|
+
docx_path = docx_files[0]
|
|
98
|
+
size = docx_path.stat().st_size
|
|
99
|
+
if size < 500:
|
|
100
|
+
check_fail(f"Document suspiciously small ({size} bytes)")
|
|
101
|
+
return None
|
|
102
|
+
check_pass(f"Document exists: {docx_path.name} ({size:,} bytes)")
|
|
103
|
+
return str(docx_path)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
# ─── Check: Defined terms consistency ───────────────────────────────────────
|
|
107
|
+
|
|
108
|
+
def check_defined_terms(full_text, metadata):
|
|
109
|
+
print("\n--- Defined Terms ---")
|
|
110
|
+
doc_type = metadata.get("type", "").lower()
|
|
111
|
+
|
|
112
|
+
# Find defined terms: words/phrases in quotes followed by definition pattern
|
|
113
|
+
# Pattern 1: "Term" means / shall mean / is defined as
|
|
114
|
+
defined_pattern = r'"([A-Z][A-Za-z\s]+?)"(?:\s+(?:means?|shall mean|is defined as|has the meaning))'
|
|
115
|
+
defined_terms = set(re.findall(defined_pattern, full_text))
|
|
116
|
+
|
|
117
|
+
# Pattern 2: ("Term") — parenthetical definition
|
|
118
|
+
paren_pattern = r'\("([A-Z][A-Za-z\s]+?)"\)'
|
|
119
|
+
defined_terms.update(re.findall(paren_pattern, full_text))
|
|
120
|
+
|
|
121
|
+
if not defined_terms and doc_type in ("contract", "agreement", "nda", "settlement"):
|
|
122
|
+
check_warn("No defined terms detected in a contract-type document")
|
|
123
|
+
return
|
|
124
|
+
|
|
125
|
+
if not defined_terms:
|
|
126
|
+
check_pass("No defined terms expected for this document type")
|
|
127
|
+
return
|
|
128
|
+
|
|
129
|
+
# Find all capitalized terms that look like defined terms (2+ chars, Title Case, not at sentence start)
|
|
130
|
+
# This is a heuristic — not perfect but catches most issues
|
|
131
|
+
cap_terms_in_body = set()
|
|
132
|
+
for line in full_text.split("\n"):
|
|
133
|
+
# Find capitalized words that aren't at the start of a sentence
|
|
134
|
+
words = line.split()
|
|
135
|
+
for i, word in enumerate(words):
|
|
136
|
+
cleaned = re.sub(r'[^A-Za-z]', '', word)
|
|
137
|
+
if (cleaned and cleaned[0].isupper() and len(cleaned) > 1
|
|
138
|
+
and cleaned not in ("The", "This", "That", "These", "Those", "Such",
|
|
139
|
+
"Section", "Article", "Exhibit", "Schedule",
|
|
140
|
+
"Party", "Parties", "Agreement", "Court",
|
|
141
|
+
"Plaintiff", "Defendant", "State", "United",
|
|
142
|
+
"Federal", "January", "February", "March",
|
|
143
|
+
"April", "May", "June", "July", "August",
|
|
144
|
+
"September", "October", "November", "December")):
|
|
145
|
+
cap_terms_in_body.add(cleaned)
|
|
146
|
+
|
|
147
|
+
# Check: every defined term is used in the body
|
|
148
|
+
unused = []
|
|
149
|
+
for term in defined_terms:
|
|
150
|
+
# Check if the term appears outside its definition
|
|
151
|
+
uses = len(re.findall(re.escape(term), full_text)) - 1 # subtract the definition itself
|
|
152
|
+
if uses <= 0:
|
|
153
|
+
unused.append(term)
|
|
154
|
+
|
|
155
|
+
if unused:
|
|
156
|
+
check_warn(f"{len(unused)} defined term(s) never used: {', '.join(unused[:5])}")
|
|
157
|
+
else:
|
|
158
|
+
check_pass(f"All {len(defined_terms)} defined terms are used in the document")
|
|
159
|
+
|
|
160
|
+
return defined_terms
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
# ─── Check: Cross-reference integrity ───────────────────────────────────────
|
|
164
|
+
|
|
165
|
+
def check_cross_references(full_text):
|
|
166
|
+
print("\n--- Cross-References ---")
|
|
167
|
+
|
|
168
|
+
# Find section references: "Section X.Y", "Article X", "Exhibit A"
|
|
169
|
+
section_refs = set(re.findall(r'Section\s+(\d+(?:\.\d+)*)', full_text))
|
|
170
|
+
article_refs = set(re.findall(r'Article\s+(\w+)', full_text))
|
|
171
|
+
exhibit_refs = set(re.findall(r'Exhibit\s+([A-Z](?:-\d+)?)', full_text))
|
|
172
|
+
schedule_refs = set(re.findall(r'Schedule\s+(\d+|[A-Z])', full_text))
|
|
173
|
+
|
|
174
|
+
# Find actual section/article headings
|
|
175
|
+
# Pattern: "1.2" or "1.2.3" at start of a line-like context
|
|
176
|
+
actual_sections = set(re.findall(r'(?:^|\n)\s*(\d+(?:\.\d+)+)[.\s]', full_text))
|
|
177
|
+
# Add top-level sections
|
|
178
|
+
actual_sections.update(re.findall(r'(?:^|\n)\s*(\d+)[.\s]+[A-Z]', full_text))
|
|
179
|
+
|
|
180
|
+
broken_refs = []
|
|
181
|
+
for ref in section_refs:
|
|
182
|
+
if ref not in actual_sections:
|
|
183
|
+
# Check if it might be a top-level section
|
|
184
|
+
top = ref.split(".")[0]
|
|
185
|
+
if top not in actual_sections and ref not in actual_sections:
|
|
186
|
+
broken_refs.append(f"Section {ref}")
|
|
187
|
+
|
|
188
|
+
if broken_refs and len(broken_refs) <= 3:
|
|
189
|
+
# Only report if we found actual sections (otherwise we can't verify)
|
|
190
|
+
if actual_sections:
|
|
191
|
+
check_warn(f"Potentially broken references: {', '.join(broken_refs[:5])}")
|
|
192
|
+
else:
|
|
193
|
+
check_pass("Cross-references present (section structure not parseable for verification)")
|
|
194
|
+
elif broken_refs:
|
|
195
|
+
check_fail(f"{len(broken_refs)} potentially broken section reference(s)")
|
|
196
|
+
else:
|
|
197
|
+
total = len(section_refs) + len(article_refs) + len(exhibit_refs) + len(schedule_refs)
|
|
198
|
+
if total > 0:
|
|
199
|
+
check_pass(f"All {total} cross-references appear valid")
|
|
200
|
+
else:
|
|
201
|
+
check_pass("No cross-references to check")
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
# ─── Check: Citation format (Bluebook) ─────────────────────────────────────
|
|
205
|
+
|
|
206
|
+
def check_citations(full_text, metadata):
|
|
207
|
+
print("\n--- Legal Citations ---")
|
|
208
|
+
doc_type = metadata.get("type", "").lower()
|
|
209
|
+
|
|
210
|
+
# Only check citations in litigation/research documents
|
|
211
|
+
citation_types = ("memo", "memorandum", "brief", "motion", "complaint", "opinion", "petition")
|
|
212
|
+
if not any(t in doc_type for t in citation_types):
|
|
213
|
+
check_pass("Citation format not applicable for this document type")
|
|
214
|
+
return
|
|
215
|
+
|
|
216
|
+
# Find case citations: Party v. Party, Vol Reporter Page (Court Year)
|
|
217
|
+
case_pattern = r'[A-Z][a-z]+\s+v\.\s+[A-Z][a-z]+'
|
|
218
|
+
case_cites = re.findall(case_pattern, full_text)
|
|
219
|
+
|
|
220
|
+
# Find statute citations: ## U.S.C. § ##
|
|
221
|
+
statute_pattern = r'\d+\s+U\.S\.C\.\s+§\s*\d+'
|
|
222
|
+
statute_cites = re.findall(statute_pattern, full_text)
|
|
223
|
+
|
|
224
|
+
# Find CFR citations: ## C.F.R. § ##
|
|
225
|
+
cfr_pattern = r'\d+\s+C\.F\.R\.\s+§\s*\d+'
|
|
226
|
+
cfr_cites = re.findall(cfr_pattern, full_text)
|
|
227
|
+
|
|
228
|
+
total_cites = len(case_cites) + len(statute_cites) + len(cfr_cites)
|
|
229
|
+
|
|
230
|
+
if total_cites == 0:
|
|
231
|
+
check_warn("No legal citations found in a litigation/research document")
|
|
232
|
+
return
|
|
233
|
+
|
|
234
|
+
check_pass(f"Found {total_cites} citation(s): {len(case_cites)} cases, {len(statute_cites)} statutes, {len(cfr_cites)} regulations")
|
|
235
|
+
|
|
236
|
+
# Check for common citation format errors
|
|
237
|
+
errors = []
|
|
238
|
+
|
|
239
|
+
# Check: "v." not "vs." or "v "
|
|
240
|
+
vs_errors = len(re.findall(r'\bvs\.\s', full_text))
|
|
241
|
+
if vs_errors:
|
|
242
|
+
errors.append(f'{vs_errors} instance(s) of "vs." (should be "v.")')
|
|
243
|
+
|
|
244
|
+
# Check: Id. should be italicized (we can't check formatting, but can check usage)
|
|
245
|
+
id_uses = len(re.findall(r'\bId\.\s', full_text))
|
|
246
|
+
# Id. should only follow immediately after another citation (approximate check)
|
|
247
|
+
|
|
248
|
+
# Check: pinpoint citations present (page number after first page)
|
|
249
|
+
# Only flag when the same case is cited multiple times without pinpoints
|
|
250
|
+
# A citation like "418 U.S. 241 (1974)" is fine — it's the full case citation.
|
|
251
|
+
# A pinpoint looks like "418 U.S. 241, 258 (1974)" — citing a specific page.
|
|
252
|
+
proper_pinpoint = len(re.findall(r'\d+\s+(?:F\.\d+d|F\.\d+th|S\.\s*Ct|U\.S\.)\s+\d+,\s*\d+', full_text))
|
|
253
|
+
|
|
254
|
+
if proper_pinpoint == 0 and case_cites and len(case_cites) > 5:
|
|
255
|
+
errors.append("No pinpoint page references found — consider adding specific page cites for key propositions")
|
|
256
|
+
|
|
257
|
+
if errors:
|
|
258
|
+
for e in errors:
|
|
259
|
+
check_warn(e)
|
|
260
|
+
else:
|
|
261
|
+
check_pass("Citation format appears correct")
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
# ─── Check: Placeholder / draft artifact detection ─────────────────────────
|
|
265
|
+
|
|
266
|
+
def check_placeholders(full_text):
|
|
267
|
+
print("\n--- Completeness ---")
|
|
268
|
+
|
|
269
|
+
placeholders = {
|
|
270
|
+
"brackets": re.findall(r'\[(?:INSERT|TBD|TODO|FILL IN|______|NAME|DATE|AMOUNT|ADDRESS|NUMBER|TO BE)[^\]]*\]', full_text, re.I),
|
|
271
|
+
# Exclude signature lines (By: ___) — only flag standalone blank-fill underscores
|
|
272
|
+
"underscores": [m for m in re.findall(r'_{4,}', full_text)
|
|
273
|
+
if not re.search(r'(?:By|Name|Title|Date|Signature):\s*' + re.escape(m), full_text)],
|
|
274
|
+
"todo_comments": re.findall(r'(?:TODO|FIXME|XXX|HACK|TBD|PLACEHOLDER)', full_text, re.I),
|
|
275
|
+
"highlight_markers": re.findall(r'\[HIGHLIGHT\]|\[REVIEW\]|\[CHECK\]|\[VERIFY\]', full_text, re.I),
|
|
276
|
+
"draft_watermarks": re.findall(r'\bDRAFT\b', full_text),
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
total = sum(len(v) for v in placeholders.values())
|
|
280
|
+
if total == 0:
|
|
281
|
+
check_pass("No placeholders, TODOs, or draft artifacts found")
|
|
282
|
+
else:
|
|
283
|
+
details = []
|
|
284
|
+
if placeholders["brackets"]:
|
|
285
|
+
details.append(f'{len(placeholders["brackets"])} [INSERT/TBD] bracket(s)')
|
|
286
|
+
if placeholders["underscores"]:
|
|
287
|
+
details.append(f'{len(placeholders["underscores"])} blank line(s) (____)')
|
|
288
|
+
if placeholders["todo_comments"]:
|
|
289
|
+
details.append(f'{len(placeholders["todo_comments"])} TODO/TBD marker(s)')
|
|
290
|
+
if placeholders["highlight_markers"]:
|
|
291
|
+
details.append(f'{len(placeholders["highlight_markers"])} [REVIEW/CHECK] marker(s)')
|
|
292
|
+
if placeholders["draft_watermarks"]:
|
|
293
|
+
details.append(f'{len(placeholders["draft_watermarks"])} DRAFT watermark(s)')
|
|
294
|
+
check_fail(f"{total} placeholder(s)/draft artifact(s): {'; '.join(details)}")
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
# ─── Check: Party name consistency ──────────────────────────────────────────
|
|
298
|
+
|
|
299
|
+
def check_party_consistency(full_text, metadata):
|
|
300
|
+
print("\n--- Party Names ---")
|
|
301
|
+
|
|
302
|
+
parties = metadata.get("parties", [])
|
|
303
|
+
if not parties:
|
|
304
|
+
check_pass("No parties specified in metadata (skipping)")
|
|
305
|
+
return
|
|
306
|
+
|
|
307
|
+
for party in parties:
|
|
308
|
+
name = party.get("name", "")
|
|
309
|
+
short = party.get("short_name", "")
|
|
310
|
+
if name and short:
|
|
311
|
+
# Check that the short name is actually used after being defined
|
|
312
|
+
uses = len(re.findall(re.escape(short), full_text))
|
|
313
|
+
if uses == 0:
|
|
314
|
+
check_warn(f'Short name "{short}" for party "{name}" never used')
|
|
315
|
+
# Check for the full name being used after it should have been shortened
|
|
316
|
+
# (heuristic: if short name exists, full name shouldn't appear more than ~3 times)
|
|
317
|
+
full_uses = len(re.findall(re.escape(name), full_text))
|
|
318
|
+
if full_uses > 5 and uses > 0:
|
|
319
|
+
check_warn(f'Full name "{name}" used {full_uses} times (consider using "{short}" consistently)')
|
|
320
|
+
elif name:
|
|
321
|
+
check_pass(f'Party "{name}" referenced in document')
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
# ─── Check: Boilerplate provisions (contracts) ─────────────────────────────
|
|
325
|
+
|
|
326
|
+
def check_boilerplate(full_text, metadata):
|
|
327
|
+
print("\n--- Required Provisions ---")
|
|
328
|
+
doc_type = metadata.get("type", "").lower()
|
|
329
|
+
|
|
330
|
+
if doc_type not in ("contract", "agreement", "nda", "settlement", "terms of service",
|
|
331
|
+
"employment agreement", "services agreement", "license agreement"):
|
|
332
|
+
check_pass("Boilerplate check not applicable for this document type")
|
|
333
|
+
return
|
|
334
|
+
|
|
335
|
+
required_provisions = {
|
|
336
|
+
"governing law": r'(?:governing\s+law|choice\s+of\s+law|governed\s+by.*laws\s+of)',
|
|
337
|
+
"entire agreement": r'(?:entire\s+agreement|constitutes?\s+the\s+entire)',
|
|
338
|
+
"severability": r'(?:severab|invalid.*unenforceab|unenforceab.*sever)',
|
|
339
|
+
"amendment": r'(?:amend(?:ment|ed).*(?:writ(?:ten|ing)|signed)|(?:not\s+be\s+)?modif(?:y|ied|ication).*(?:except|writ))',
|
|
340
|
+
"notices": r'(?:notice.*(?:shall|must|will|be)\s+.*(?:writ(?:ten|ing)|deliver|given|sent)|all\s+notices?\s+under)',
|
|
341
|
+
"assignment": r'(?:assign(?:ment)?.*(?:without|prior|consent)|neither\s+party\s+may\s+assign)',
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
missing = []
|
|
345
|
+
found = []
|
|
346
|
+
for provision, pattern in required_provisions.items():
|
|
347
|
+
if re.search(pattern, full_text, re.I):
|
|
348
|
+
found.append(provision)
|
|
349
|
+
else:
|
|
350
|
+
missing.append(provision)
|
|
351
|
+
|
|
352
|
+
if missing:
|
|
353
|
+
check_warn(f"Missing standard provisions: {', '.join(missing)}")
|
|
354
|
+
if found:
|
|
355
|
+
check_pass(f"Found {len(found)}/{len(required_provisions)} standard provisions: {', '.join(found)}")
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
# ─── Check: Shall/May/Must consistency ─────────────────────────────────────
|
|
359
|
+
|
|
360
|
+
def check_modal_verbs(full_text, metadata):
|
|
361
|
+
print("\n--- Language Precision ---")
|
|
362
|
+
doc_type = metadata.get("type", "").lower()
|
|
363
|
+
|
|
364
|
+
if doc_type not in ("contract", "agreement", "nda", "settlement", "regulation",
|
|
365
|
+
"terms of service", "employment agreement"):
|
|
366
|
+
check_pass("Modal verb check not applicable for this document type")
|
|
367
|
+
return
|
|
368
|
+
|
|
369
|
+
shall_count = len(re.findall(r'\bshall\b', full_text, re.I))
|
|
370
|
+
must_count = len(re.findall(r'\bmust\b', full_text, re.I))
|
|
371
|
+
may_count = len(re.findall(r'\bmay\b', full_text, re.I))
|
|
372
|
+
will_count = len(re.findall(r'\bwill\b', full_text, re.I))
|
|
373
|
+
|
|
374
|
+
# Check for "shall not" (obligation not to) vs "may not" (prohibition) confusion
|
|
375
|
+
shall_not = len(re.findall(r'\bshall\s+not\b', full_text, re.I))
|
|
376
|
+
|
|
377
|
+
check_pass(f"Modal verbs: shall={shall_count}, must={must_count}, may={may_count}, will={will_count}")
|
|
378
|
+
|
|
379
|
+
# Warn if both "shall" and "must" are used (inconsistent — pick one style)
|
|
380
|
+
if shall_count > 3 and must_count > 3:
|
|
381
|
+
check_warn('Both "shall" and "must" used frequently — consider standardizing to one')
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
# ─── Check: Word count / length ─────────────────────────────────────────────
|
|
385
|
+
|
|
386
|
+
def check_length(full_text, metadata):
|
|
387
|
+
print("\n--- Document Length ---")
|
|
388
|
+
word_count = len(full_text.split())
|
|
389
|
+
doc_type = metadata.get("type", "").lower()
|
|
390
|
+
|
|
391
|
+
# Page limit checks for briefs
|
|
392
|
+
if "brief" in doc_type or "motion" in doc_type:
|
|
393
|
+
page_limit = metadata.get("page_limit")
|
|
394
|
+
word_limit = metadata.get("word_limit")
|
|
395
|
+
if word_limit and word_count > word_limit:
|
|
396
|
+
check_fail(f"Word count {word_count:,} exceeds limit of {word_limit:,}")
|
|
397
|
+
elif word_limit:
|
|
398
|
+
check_pass(f"Word count {word_count:,} within limit of {word_limit:,}")
|
|
399
|
+
else:
|
|
400
|
+
check_pass(f"Word count: {word_count:,} (no limit specified)")
|
|
401
|
+
elif word_count < 50:
|
|
402
|
+
check_warn(f"Document very short ({word_count} words)")
|
|
403
|
+
else:
|
|
404
|
+
check_pass(f"Word count: {word_count:,}")
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
# ─── Check: Date consistency ────────────────────────────────────────────────
|
|
408
|
+
|
|
409
|
+
def check_date_consistency(full_text):
|
|
410
|
+
print("\n--- Date Format ---")
|
|
411
|
+
|
|
412
|
+
# Find different date formats
|
|
413
|
+
long_dates = re.findall(r'(?:January|February|March|April|May|June|July|August|September|October|November|December)\s+\d{1,2},?\s+\d{4}', full_text)
|
|
414
|
+
slash_dates = re.findall(r'\d{1,2}/\d{1,2}/\d{2,4}', full_text)
|
|
415
|
+
dash_dates = re.findall(r'\d{4}-\d{2}-\d{2}', full_text)
|
|
416
|
+
|
|
417
|
+
formats_used = 0
|
|
418
|
+
if long_dates: formats_used += 1
|
|
419
|
+
if slash_dates: formats_used += 1
|
|
420
|
+
if dash_dates: formats_used += 1
|
|
421
|
+
|
|
422
|
+
if formats_used > 1:
|
|
423
|
+
check_warn(f"Inconsistent date formats: {len(long_dates)} long, {len(slash_dates)} slash, {len(dash_dates)} ISO")
|
|
424
|
+
elif formats_used == 1:
|
|
425
|
+
total = len(long_dates) + len(slash_dates) + len(dash_dates)
|
|
426
|
+
check_pass(f"Consistent date format ({total} dates found)")
|
|
427
|
+
else:
|
|
428
|
+
check_pass("No dates to check")
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
# ─── Main ───────────────────────────────────────────────────────────────────
|
|
432
|
+
|
|
433
|
+
def main():
|
|
434
|
+
global STRICT
|
|
435
|
+
|
|
436
|
+
if len(sys.argv) < 2:
|
|
437
|
+
print("Usage: verify-legal.py <document-dir> [--strict]", file=sys.stderr)
|
|
438
|
+
sys.exit(1)
|
|
439
|
+
|
|
440
|
+
doc_dir = sys.argv[1].rstrip("/")
|
|
441
|
+
STRICT = "--strict" in sys.argv
|
|
442
|
+
|
|
443
|
+
print(f"=== Legal Document Verification: {doc_dir} ===\n")
|
|
444
|
+
|
|
445
|
+
# Load metadata
|
|
446
|
+
metadata_path = os.path.join(doc_dir, "metadata.json")
|
|
447
|
+
if os.path.exists(metadata_path):
|
|
448
|
+
with open(metadata_path) as f:
|
|
449
|
+
metadata = json.load(f)
|
|
450
|
+
else:
|
|
451
|
+
metadata = {}
|
|
452
|
+
print(" NOTE: No metadata.json found, running with defaults\n")
|
|
453
|
+
|
|
454
|
+
# Check 1: Document exists
|
|
455
|
+
docx_path = check_document_exists(doc_dir)
|
|
456
|
+
if not docx_path:
|
|
457
|
+
print(f"\n{'='*35}")
|
|
458
|
+
print(f" PASS: {PASS_COUNT}")
|
|
459
|
+
print(f" WARN: {WARN_COUNT}")
|
|
460
|
+
print(f" FAIL: {FAIL_COUNT}")
|
|
461
|
+
print(f"{'='*35}")
|
|
462
|
+
print(" RESULT: FAILED")
|
|
463
|
+
sys.exit(1)
|
|
464
|
+
|
|
465
|
+
# Extract text
|
|
466
|
+
try:
|
|
467
|
+
paragraphs = extract_docx_text(docx_path)
|
|
468
|
+
full_text = get_full_text(paragraphs)
|
|
469
|
+
except Exception as e:
|
|
470
|
+
check_fail(f"Could not read document: {e}")
|
|
471
|
+
print(f"\n{'='*35}")
|
|
472
|
+
print(f" PASS: {PASS_COUNT}")
|
|
473
|
+
print(f" FAIL: {FAIL_COUNT}")
|
|
474
|
+
print(f"{'='*35}")
|
|
475
|
+
print(" RESULT: FAILED")
|
|
476
|
+
sys.exit(1)
|
|
477
|
+
|
|
478
|
+
# Run all checks
|
|
479
|
+
check_defined_terms(full_text, metadata)
|
|
480
|
+
check_cross_references(full_text)
|
|
481
|
+
check_citations(full_text, metadata)
|
|
482
|
+
check_placeholders(full_text)
|
|
483
|
+
check_party_consistency(full_text, metadata)
|
|
484
|
+
check_boilerplate(full_text, metadata)
|
|
485
|
+
check_modal_verbs(full_text, metadata)
|
|
486
|
+
check_length(full_text, metadata)
|
|
487
|
+
check_date_consistency(full_text)
|
|
488
|
+
|
|
489
|
+
# Summary
|
|
490
|
+
print(f"\n{'='*35}")
|
|
491
|
+
print(f" PASS: {PASS_COUNT}")
|
|
492
|
+
print(f" WARN: {WARN_COUNT}")
|
|
493
|
+
print(f" FAIL: {FAIL_COUNT}")
|
|
494
|
+
print(f"{'='*35}")
|
|
495
|
+
|
|
496
|
+
if FAIL_COUNT > 0:
|
|
497
|
+
print(" RESULT: FAILED")
|
|
498
|
+
sys.exit(1)
|
|
499
|
+
elif WARN_COUNT > 0:
|
|
500
|
+
print(" RESULT: PASSED (with warnings)")
|
|
501
|
+
else:
|
|
502
|
+
print(" RESULT: ALL CHECKS PASSED")
|
|
503
|
+
sys.exit(0)
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
if __name__ == "__main__":
|
|
507
|
+
main()
|