extract-cli 0.1.12__tar.gz → 0.1.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. {extract_cli-0.1.12 → extract_cli-0.1.13}/CHANGELOG.md +22 -0
  2. {extract_cli-0.1.12 → extract_cli-0.1.13}/Makefile +3 -0
  3. {extract_cli-0.1.12 → extract_cli-0.1.13}/PKG-INFO +22 -2
  4. {extract_cli-0.1.12 → extract_cli-0.1.13}/README.md +21 -1
  5. {extract_cli-0.1.12 → extract_cli-0.1.13}/extract_cli.py +28 -10
  6. {extract_cli-0.1.12 → extract_cli-0.1.13}/pyproject.toml +1 -1
  7. extract_cli-0.1.13/tests/eval/ATTRIBUTION.md +20 -0
  8. extract_cli-0.1.13/tests/eval/corpus/consulting_mtm.htm +980 -0
  9. extract_cli-0.1.13/tests/eval/corpus/emp_arcp.htm +18 -0
  10. extract_cli-0.1.13/tests/eval/corpus/emp_celsci.txt +494 -0
  11. extract_cli-0.1.13/tests/eval/corpus/emp_quadgraphics.htm +1318 -0
  12. extract_cli-0.1.13/tests/eval/corpus/msa_kpmg.txt +754 -0
  13. extract_cli-0.1.13/tests/eval/corpus/services_visteon.txt +1054 -0
  14. extract_cli-0.1.13/tests/eval/evaluate.py +123 -0
  15. extract_cli-0.1.13/tests/eval/gold.json +51 -0
  16. {extract_cli-0.1.12 → extract_cli-0.1.13}/tests/fixtures/employment_docx.docx.expected.json +1 -1
  17. {extract_cli-0.1.12 → extract_cli-0.1.13}/tests/fixtures/heading_docx.docx.expected.json +1 -1
  18. {extract_cli-0.1.12 → extract_cli-0.1.13}/tests/fixtures/lease_allcaps.txt.expected.json +1 -1
  19. {extract_cli-0.1.12 → extract_cli-0.1.13}/tests/fixtures/license_pdf.pdf.expected.json +1 -1
  20. {extract_cli-0.1.12 → extract_cli-0.1.13}/tests/fixtures/nda_h2.md.expected.json +1 -1
  21. {extract_cli-0.1.12 → extract_cli-0.1.13}/tests/fixtures/numbered_docx.docx.expected.json +1 -1
  22. {extract_cli-0.1.12 → extract_cli-0.1.13}/tests/fixtures/scanned.pdf.expected.json +1 -1
  23. {extract_cli-0.1.12 → extract_cli-0.1.13}/tests/fixtures/services_bold.txt.expected.json +1 -1
  24. {extract_cli-0.1.12 → extract_cli-0.1.13}/tests/fixtures/services_html.html.expected.json +1 -1
  25. {extract_cli-0.1.12 → extract_cli-0.1.13}/tests/test_deterministic.py +17 -0
  26. extract_cli-0.1.13/tests/test_eval.py +26 -0
  27. {extract_cli-0.1.12 → extract_cli-0.1.13}/.gitignore +0 -0
  28. {extract_cli-0.1.12 → extract_cli-0.1.13}/AGENTS.md +0 -0
  29. {extract_cli-0.1.12 → extract_cli-0.1.13}/ARCHITECTURE.md +0 -0
  30. {extract_cli-0.1.12 → extract_cli-0.1.13}/CONTRIBUTING.md +0 -0
  31. {extract_cli-0.1.12 → extract_cli-0.1.13}/LICENSE +0 -0
  32. {extract_cli-0.1.12 → extract_cli-0.1.13}/config/llm.json.example +0 -0
  33. {extract_cli-0.1.12 → extract_cli-0.1.13}/docs/INTEROP.md +0 -0
  34. {extract_cli-0.1.12 → extract_cli-0.1.13}/docs/spec/extract-output.schema.json +0 -0
  35. {extract_cli-0.1.12 → extract_cli-0.1.13}/llms.txt +0 -0
  36. {extract_cli-0.1.12 → extract_cli-0.1.13}/scripts/release.py +0 -0
  37. {extract_cli-0.1.12 → extract_cli-0.1.13}/scripts/validate_against_spec.py +0 -0
  38. {extract_cli-0.1.12 → extract_cli-0.1.13}/tests/_fixtures_build.py +0 -0
  39. {extract_cli-0.1.12 → extract_cli-0.1.13}/tests/_make_goldens.py +0 -0
  40. {extract_cli-0.1.12 → extract_cli-0.1.13}/tests/_schema_validator.py +0 -0
  41. {extract_cli-0.1.12 → extract_cli-0.1.13}/tests/conftest.py +0 -0
  42. {extract_cli-0.1.12 → extract_cli-0.1.13}/tests/fixtures/employment_docx.docx +0 -0
  43. {extract_cli-0.1.12 → extract_cli-0.1.13}/tests/fixtures/heading_docx.docx +0 -0
  44. {extract_cli-0.1.12 → extract_cli-0.1.13}/tests/fixtures/lease_allcaps.txt +0 -0
  45. {extract_cli-0.1.12 → extract_cli-0.1.13}/tests/fixtures/license_pdf.pdf +0 -0
  46. {extract_cli-0.1.12 → extract_cli-0.1.13}/tests/fixtures/nda_h2.md +0 -0
  47. {extract_cli-0.1.12 → extract_cli-0.1.13}/tests/fixtures/numbered_docx.docx +0 -0
  48. {extract_cli-0.1.12 → extract_cli-0.1.13}/tests/fixtures/scanned.pdf +0 -0
  49. {extract_cli-0.1.12 → extract_cli-0.1.13}/tests/fixtures/services_bold.txt +0 -0
  50. {extract_cli-0.1.12 → extract_cli-0.1.13}/tests/fixtures/services_html.html +0 -0
  51. {extract_cli-0.1.12 → extract_cli-0.1.13}/tests/test_clause_map.py +0 -0
  52. {extract_cli-0.1.12 → extract_cli-0.1.13}/tests/test_cli.py +0 -0
  53. {extract_cli-0.1.12 → extract_cli-0.1.13}/tests/test_coverage.py +0 -0
  54. {extract_cli-0.1.12 → extract_cli-0.1.13}/tests/test_llm.py +0 -0
  55. {extract_cli-0.1.12 → extract_cli-0.1.13}/tests/test_misc.py +0 -0
  56. {extract_cli-0.1.12 → extract_cli-0.1.13}/tests/test_property.py +0 -0
  57. {extract_cli-0.1.12 → extract_cli-0.1.13}/tests/test_schema_conformance.py +0 -0
@@ -6,6 +6,27 @@ to [Semantic Versioning](https://semver.org/). Per the suite convention
6
6
  (see [`docs/INTEROP.md`](docs/INTEROP.md)), **backward-incompatible changes to
7
7
  the output schema require a major version bump**; new optional fields are minor.
8
8
 
9
+ ## [0.1.13] - 2026-05-22
10
+
11
+ ### Added
12
+ - **Accuracy benchmark** (`tests/eval/`, `make eval`). Scores the deterministic
13
+ tier against a small corpus of real, executed SEC-EDGAR contracts with
14
+ hand-verified ground truth, reporting precision/recall/F1 per field — turning
15
+ "best-effort" into a measured number. Current: parties F1 0.96, effective
16
+ date / governing law / jurisdiction 1.00, clause recall 0.45 (heading
17
+ detection on dense HTML is the known weak spot). `tests/test_eval.py` gates it
18
+ so accuracy can't silently regress.
19
+
20
+ ### Fixed / improved (surfaced by the benchmark)
21
+ - **Governing-law detection** now covers the common connector phrasings beyond
22
+ "governed by the laws of X": "governed by, **and enforced in accordance
23
+ with,** the laws of X", "**interpreted and enforced in accordance with** the
24
+ laws of X", "**construed under** the laws of X". (Benchmark: governing law
25
+ 0.67 → 1.00.)
26
+ - **Jurisdiction normalization** now maps **all 50 US states + DC** (plus more
27
+ Canadian provinces / UK nations / countries), not just a dozen. (Benchmark:
28
+ jurisdiction 0.67 → 1.00.)
29
+
9
30
  ## [0.1.12] - 2026-05-22
10
31
 
11
32
  ### Security
@@ -333,6 +354,7 @@ Initial release — the open-loop front door of the contract-ops CLI suite.
333
354
  intentionally *not* governed by the output schema (the schema describes the
334
355
  full default output).
335
356
 
357
+ [0.1.13]: https://github.com/DrBaher/extract-cli/releases/tag/v0.1.13
336
358
  [0.1.12]: https://github.com/DrBaher/extract-cli/releases/tag/v0.1.12
337
359
  [0.1.11]: https://github.com/DrBaher/extract-cli/releases/tag/v0.1.11
338
360
  [0.1.10]: https://github.com/DrBaher/extract-cli/releases/tag/v0.1.10
@@ -40,6 +40,9 @@ coverage:
40
40
  typecheck:
41
41
  $(PYTHON) -m mypy --strict extract_cli.py
42
42
 
43
+ eval:
44
+ $(PYTHON) tests/eval/evaluate.py
45
+
43
46
  build: clean
44
47
  $(PYTHON) -m build
45
48
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: extract-cli
3
- Version: 0.1.12
3
+ Version: 0.1.13
4
4
  Summary: Open-loop front door of the contract-ops CLI suite: ingest any contract (.md/.txt/.html/.docx/.pdf) and emit structured JSON.
5
5
  Project-URL: Homepage, https://cli.drbaher.com/
6
6
  Project-URL: Repository, https://github.com/DrBaher/extract-cli
@@ -256,13 +256,33 @@ paths. Configure it once and every suite tool that adopts the same lookup gets
256
256
  LLM features for free. Without it, `--llm` just warns and returns the
257
257
  deterministic output.
258
258
 
259
+ ## Accuracy
260
+
261
+ Line coverage tells you the code runs; it doesn't tell you the extraction is
262
+ *correct*. `make eval` scores the deterministic tier against a small corpus of
263
+ **real, executed contracts** (SEC EDGAR filings) with hand-verified ground truth
264
+ ([`tests/eval/`](tests/eval/)), reporting precision/recall per field:
265
+
266
+ | Field | Score |
267
+ |---|---|
268
+ | parties | P 1.00 · R 0.92 · F1 0.96 |
269
+ | effective date | accuracy 1.00 |
270
+ | governing law | accuracy 1.00 |
271
+ | jurisdiction (normalized) | accuracy 1.00 |
272
+ | clauses (recall on verified sections) | 0.45 |
273
+
274
+ Clause recall is the honest weak spot — heading detection on dense HTML
275
+ exhibits still misses sections. A test (`tests/test_eval.py`) gates these so
276
+ accuracy can't silently regress.
277
+
259
278
  ## Development
260
279
 
261
280
  ```bash
262
281
  make install # editable install with the [dev] extra
263
282
  make test # full suite
264
- make coverage # suite + coverage report
283
+ make coverage # suite + coverage report (installs extras; fails under 100%)
265
284
  make typecheck # mypy --strict
285
+ make eval # accuracy benchmark vs the labeled corpus
266
286
  make build # wheel + sdist
267
287
  make smoke # build, install the wheel in a clean venv, run it
268
288
  make spec-check # assert docs/spec schema == `extract schema`
@@ -218,13 +218,33 @@ paths. Configure it once and every suite tool that adopts the same lookup gets
218
218
  LLM features for free. Without it, `--llm` just warns and returns the
219
219
  deterministic output.
220
220
 
221
+ ## Accuracy
222
+
223
+ Line coverage tells you the code runs; it doesn't tell you the extraction is
224
+ *correct*. `make eval` scores the deterministic tier against a small corpus of
225
+ **real, executed contracts** (SEC EDGAR filings) with hand-verified ground truth
226
+ ([`tests/eval/`](tests/eval/)), reporting precision/recall per field:
227
+
228
+ | Field | Score |
229
+ |---|---|
230
+ | parties | P 1.00 · R 0.92 · F1 0.96 |
231
+ | effective date | accuracy 1.00 |
232
+ | governing law | accuracy 1.00 |
233
+ | jurisdiction (normalized) | accuracy 1.00 |
234
+ | clauses (recall on verified sections) | 0.45 |
235
+
236
+ Clause recall is the honest weak spot — heading detection on dense HTML
237
+ exhibits still misses sections. A test (`tests/test_eval.py`) gates these so
238
+ accuracy can't silently regress.
239
+
221
240
  ## Development
222
241
 
223
242
  ```bash
224
243
  make install # editable install with the [dev] extra
225
244
  make test # full suite
226
- make coverage # suite + coverage report
245
+ make coverage # suite + coverage report (installs extras; fails under 100%)
227
246
  make typecheck # mypy --strict
247
+ make eval # accuracy benchmark vs the labeled corpus
228
248
  make build # wheel + sdist
229
249
  make smoke # build, install the wheel in a clean venv, run it
230
250
  make spec-check # assert docs/spec schema == `extract schema`
@@ -43,11 +43,11 @@ import urllib.request
43
43
  from pathlib import Path
44
44
  from typing import Any, Dict, List, Optional, Tuple
45
45
 
46
- __version__ = "0.1.12"
46
+ __version__ = "0.1.13"
47
47
 
48
48
  # Bumped independently of the package version when the *extraction logic*
49
49
  # changes in a way downstream consumers should notice. Embedded in `_meta`.
50
- EXTRACTOR_VERSION = "0.1.12"
50
+ EXTRACTOR_VERSION = "0.1.13"
51
51
 
52
52
  # JSON Schema version of the output contract (docs/spec/extract-output.schema.json).
53
53
  SCHEMA_VERSION = 1
@@ -616,8 +616,11 @@ _ROLE_PAREN_RE = re.compile(
616
616
  # enforces a capitalized proper noun (a global re.IGNORECASE would defeat that
617
617
  # and over-capture trailing lowercase clauses like ", without regard to ...").
618
618
  _GOV_LAW_RE = re.compile(
619
- r"(?i:governed\s+by(?:\s+and\s+construed\s+in\s+accordance\s+with)?\s+"
620
- r"(?:the\s+)?laws?\s+of\s+(?:the\s+)?)"
619
+ # Allow a short same-sentence gap between "governed by" and "laws of" so the
620
+ # many real connector phrasings are covered: "...and construed in accordance
621
+ # with...", "...and enforced in accordance with...", "the internal laws of",
622
+ # etc. (bounded + lazy so it stays within the clause).
623
+ r"(?i:(?:governed|construed|interpreted|enforced)\b[^.\n]{0,60}?\blaws?\s+of\s+(?:the\s+)?)"
621
624
  r"([A-Z][A-Za-z\.\- ]+?(?:,\s*[A-Z][A-Za-z\.\- ]+?)?)"
622
625
  r"(?=[\.,;\n)]|\s+and\b|\s+without\b|$)",
623
626
  )
@@ -889,16 +892,31 @@ def extract_signatories(text: str) -> List[JSON]:
889
892
  return out
890
893
 
891
894
 
892
- # Free-text jurisdiction -> a normalized ISO-ish code (best-effort, common only).
895
+ # Free-text jurisdiction -> a normalized ISO 3166-2 / ISO 3166-1 code. All 50 US
896
+ # states + DC, common Canadian provinces, UK nations, and frequent countries.
897
+ _US_STATES: Dict[str, str] = {
898
+ "alabama": "AL", "alaska": "AK", "arizona": "AZ", "arkansas": "AR",
899
+ "california": "CA", "colorado": "CO", "connecticut": "CT", "delaware": "DE",
900
+ "florida": "FL", "georgia": "GA", "hawaii": "HI", "idaho": "ID",
901
+ "illinois": "IL", "indiana": "IN", "iowa": "IA", "kansas": "KS",
902
+ "kentucky": "KY", "louisiana": "LA", "maine": "ME", "maryland": "MD",
903
+ "massachusetts": "MA", "michigan": "MI", "minnesota": "MN", "mississippi": "MS",
904
+ "missouri": "MO", "montana": "MT", "nebraska": "NE", "nevada": "NV",
905
+ "new hampshire": "NH", "new jersey": "NJ", "new mexico": "NM", "new york": "NY",
906
+ "north carolina": "NC", "north dakota": "ND", "ohio": "OH", "oklahoma": "OK",
907
+ "oregon": "OR", "pennsylvania": "PA", "rhode island": "RI", "south carolina": "SC",
908
+ "south dakota": "SD", "tennessee": "TN", "texas": "TX", "utah": "UT",
909
+ "vermont": "VT", "virginia": "VA", "washington": "WA", "west virginia": "WV",
910
+ "wisconsin": "WI", "wyoming": "WY", "district of columbia": "DC",
911
+ }
893
912
  _JURISDICTION_CODES: Dict[str, str] = {
894
- "delaware": "US-DE", "new york": "US-NY", "california": "US-CA",
895
- "texas": "US-TX", "illinois": "US-IL", "massachusetts": "US-MA",
896
- "washington": "US-WA", "florida": "US-FL", "nevada": "US-NV",
897
- "new jersey": "US-NJ", "pennsylvania": "US-PA", "michigan": "US-MI",
913
+ **{name: f"US-{code}" for name, code in _US_STATES.items()},
898
914
  "ontario": "CA-ON", "quebec": "CA-QC", "british columbia": "CA-BC",
899
- "england and wales": "GB-EAW", "england": "GB-ENG", "scotland": "GB-SCT",
915
+ "alberta": "CA-AB", "england and wales": "GB-EAW", "england": "GB-ENG",
916
+ "scotland": "GB-SCT", "wales": "GB-WLS", "northern ireland": "GB-NIR",
900
917
  "united kingdom": "GB", "france": "FR", "germany": "DE", "ireland": "IE",
901
918
  "singapore": "SG", "australia": "AU", "india": "IN", "netherlands": "NL",
919
+ "switzerland": "CH", "japan": "JP",
902
920
  }
903
921
 
904
922
 
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "extract-cli"
7
- version = "0.1.12"
7
+ version = "0.1.13"
8
8
  description = "Open-loop front door of the contract-ops CLI suite: ingest any contract (.md/.txt/.html/.docx/.pdf) and emit structured JSON."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -0,0 +1,20 @@
1
+ # Benchmark corpus — sources & licensing
2
+
3
+ The accuracy benchmark (`tests/eval/`) scores extract-cli against a small set of
4
+ **real, executed contracts** filed publicly with the U.S. Securities and
5
+ Exchange Commission (SEC EDGAR). SEC filings are public records; these exhibits
6
+ are reproduced here, unmodified, solely as a regression/accuracy test fixture.
7
+
8
+ | File | Source (SEC EDGAR) |
9
+ |---|---|
10
+ | `emp_celsci.txt` | CEL-SCI Corporation — Exhibit 10(ooo), employment agreement |
11
+ | `msa_kpmg.txt` | Blade Internet Ventures / KPMG Consulting — master services agreement |
12
+ | `services_visteon.txt` | Visteon Corporation — salaried employee lease agreement |
13
+ | `consulting_mtm.htm` | MTM Technologies — consulting agreement |
14
+ | `emp_arcp.htm` | American Realty Capital Properties — employment agreement |
15
+ | `emp_quadgraphics.htm` | Quad/Graphics, Inc. — employment agreement |
16
+
17
+ Ground truth (`gold.json`) was hand-verified against each document's text — the
18
+ parties, effective date, governing law, normalized jurisdiction, and a
19
+ verified subset of section headings. It is intentionally independent of what the
20
+ extractor currently produces.