extract-cli 0.1.5__tar.gz → 0.1.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {extract_cli-0.1.5 → extract_cli-0.1.6}/CHANGELOG.md +18 -0
  2. {extract_cli-0.1.5 → extract_cli-0.1.6}/PKG-INFO +35 -24
  3. {extract_cli-0.1.5 → extract_cli-0.1.6}/README.md +34 -23
  4. {extract_cli-0.1.5 → extract_cli-0.1.6}/extract_cli.py +2 -2
  5. {extract_cli-0.1.5 → extract_cli-0.1.6}/pyproject.toml +1 -1
  6. {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/fixtures/employment_docx.docx.expected.json +1 -1
  7. {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/fixtures/heading_docx.docx.expected.json +1 -1
  8. {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/fixtures/lease_allcaps.txt.expected.json +1 -1
  9. {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/fixtures/license_pdf.pdf.expected.json +1 -1
  10. {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/fixtures/nda_h2.md.expected.json +1 -1
  11. {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/fixtures/scanned.pdf.expected.json +1 -1
  12. {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/fixtures/services_bold.txt.expected.json +1 -1
  13. {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/fixtures/services_html.html.expected.json +1 -1
  14. {extract_cli-0.1.5 → extract_cli-0.1.6}/.gitignore +0 -0
  15. {extract_cli-0.1.5 → extract_cli-0.1.6}/ARCHITECTURE.md +0 -0
  16. {extract_cli-0.1.5 → extract_cli-0.1.6}/CONTRIBUTING.md +0 -0
  17. {extract_cli-0.1.5 → extract_cli-0.1.6}/LICENSE +0 -0
  18. {extract_cli-0.1.5 → extract_cli-0.1.6}/Makefile +0 -0
  19. {extract_cli-0.1.5 → extract_cli-0.1.6}/config/llm.json.example +0 -0
  20. {extract_cli-0.1.5 → extract_cli-0.1.6}/docs/INTEROP.md +0 -0
  21. {extract_cli-0.1.5 → extract_cli-0.1.6}/docs/spec/extract-output.schema.json +0 -0
  22. {extract_cli-0.1.5 → extract_cli-0.1.6}/scripts/release.py +0 -0
  23. {extract_cli-0.1.5 → extract_cli-0.1.6}/scripts/validate_against_spec.py +0 -0
  24. {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/_fixtures_build.py +0 -0
  25. {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/_make_goldens.py +0 -0
  26. {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/_schema_validator.py +0 -0
  27. {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/conftest.py +0 -0
  28. {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/fixtures/employment_docx.docx +0 -0
  29. {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/fixtures/heading_docx.docx +0 -0
  30. {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/fixtures/lease_allcaps.txt +0 -0
  31. {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/fixtures/license_pdf.pdf +0 -0
  32. {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/fixtures/nda_h2.md +0 -0
  33. {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/fixtures/scanned.pdf +0 -0
  34. {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/fixtures/services_bold.txt +0 -0
  35. {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/fixtures/services_html.html +0 -0
  36. {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/test_clause_map.py +0 -0
  37. {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/test_cli.py +0 -0
  38. {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/test_deterministic.py +0 -0
  39. {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/test_llm.py +0 -0
  40. {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/test_misc.py +0 -0
  41. {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/test_property.py +0 -0
  42. {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/test_schema_conformance.py +0 -0
@@ -6,6 +6,23 @@ to [Semantic Versioning](https://semver.org/). Per the suite convention
6
6
  (see [`docs/INTEROP.md`](docs/INTEROP.md)), **backward-incompatible changes to
7
7
  the output schema require a major version bump**; new optional fields are minor.
8
8
 
9
+ ## [0.1.6] - 2026-05-21
10
+
11
+ ### Docs
12
+ - **Rewrote the README composability section to verified, runnable examples.**
13
+ Testing extract-cli against the real sibling CLIs (`template-vault-cli`,
14
+ `nda-review-cli`) showed the previous pipes were aspirational — the siblings
15
+ expose no `--from-extract`/`--stdin` flag (`nda-review review` takes
16
+ `--file`/`--text`; `template-vault` reads its own vault). The integration
17
+ contract is the **output schema + the shared canonical clause vocabulary**,
18
+ glued by stdout JSON and standard tools (`jq`, `comm`): `extract`'s
19
+ `canonical_title` values are the same names template-vault detects and
20
+ nda-review keys policy on, so a foreign document's clauses line up with the
21
+ suite's with no bespoke adapter. New examples cover clause-coverage gap
22
+ analysis against a vault template and a combined extract+nda-review intake
23
+ report — all runnable today. (Also fixed a broken `jq input_filename` in the
24
+ folder-triage example.) No code or schema change.
25
+
9
26
  ## [0.1.5] - 2026-05-21
10
27
 
11
28
  ### Added
@@ -181,6 +198,7 @@ Initial release — the open-loop front door of the contract-ops CLI suite.
181
198
  intentionally *not* governed by the output schema (the schema describes the
182
199
  full default output).
183
200
 
201
+ [0.1.6]: https://github.com/DrBaher/extract-cli/releases/tag/v0.1.6
184
202
  [0.1.5]: https://github.com/DrBaher/extract-cli/releases/tag/v0.1.5
185
203
  [0.1.4]: https://github.com/DrBaher/extract-cli/releases/tag/v0.1.4
186
204
  [0.1.3]: https://github.com/DrBaher/extract-cli/releases/tag/v0.1.3
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: extract-cli
3
- Version: 0.1.5
3
+ Version: 0.1.6
4
4
  Summary: Open-loop front door of the contract-ops CLI suite: ingest any contract (.md/.txt/.html/.docx/.pdf) and emit structured JSON.
5
5
  Project-URL: Homepage, https://cli.drbaher.com/
6
6
  Project-URL: Repository, https://github.com/DrBaher/extract-cli
@@ -171,37 +171,48 @@ extract counterparty.pdf | jq '.clauses[] | {canonical_title, detected_title, ma
171
171
 
172
172
  ## Composability — piping into the rest of the suite
173
173
 
174
- `extract-cli` is built to be the first stage of a Unix pipe. Its JSON is the
175
- contract every downstream tool reads.
174
+ `extract-cli` is built to be the first stage of a Unix pipe. The glue is its
175
+ **stdout JSON + standard tools** (`jq`, `comm`) and the **shared clause
176
+ vocabulary** — `extract`'s `canonical_title` values are the same names
177
+ `template-vault-cli` detects and `nda-review-cli` keys policy on, so a foreign
178
+ document's clauses line up with the suite's with no bespoke adapter. Every
179
+ example below is runnable today (verified against the real sibling CLIs).
176
180
 
177
181
  ```bash
178
- # 1) Foreign NDA review. extract normalizes clauses; nda-review runs policy.
179
- extract counterparty_nda.pdf | nda-review review --from-extract -
180
-
181
- # 2) Pull just the clause map and feed compare-cli to diff a foreign doc
182
- # against your canonical template's structure.
183
- extract their_msa.docx --fields clauses | compare-cli align --stdin \
184
- --against msa/standard
185
-
186
- # 3) Archive structured metadata for any inbound paper into the post-signature
187
- # vault, keyed by content hash.
188
- extract signed_contract.pdf | contract-vault put --from-extract - \
189
- --id "$(extract signed_contract.pdf | jq -r .document.sha256)"
190
-
191
- # 4) Triage a folder of inbound contracts: list governing law + parties.
192
- for f in inbox/*.pdf; do
182
+ # 1) Inspect any contract's structure (.md/.txt/.html/.docx/.pdf, one tool).
183
+ extract counterparty.docx | jq '{parties: [.parties[].name],
184
+ governing_law: .governing_law.value, clauses: [.clauses[].canonical_title]}'
185
+
186
+ # 2) Clause-coverage gap vs your canonical template in template-vault-cli.
187
+ # extract normalizes the counterparty's *foreign* headings onto the same
188
+ # clause vocabulary template-vault detects, so a plain `comm` diffs them.
189
+ template-vault info nda/mutual-standard --json | jq -r '.clauses[].title' | sort > ours.txt
190
+ extract counterparty_nda.docx | jq -r '.clauses[].canonical_title' | sort -u > theirs.txt
191
+ comm -23 ours.txt theirs.txt # clauses in OUR standard that THEY are missing
192
+ comm -13 ours.txt theirs.txt # clauses THEY added that we don't have
193
+
194
+ # 3) Intake: extract for structure, nda-review-cli for a policy verdict on the
195
+ # same foreign doc; merge both views with jq.
196
+ extract counterparty_nda.docx > extract.json
197
+ nda-review review --file counterparty_nda.docx --playbook output/nda_playbook.json \
198
+ --out-json review.json
199
+ jq -n --slurpfile e extract.json --slurpfile r review.json \
200
+ '{parties: [$e[0].parties[].name], governing_law: $e[0].governing_law.value,
201
+ clauses: ($e[0].clauses | length), decision: $r[0].decision, risk: $r[0].risk_score}'
202
+
203
+ # 4) Triage a folder of inbound contracts: governing law + parties per file.
204
+ for f in inbox/*; do
193
205
  extract "$f" --fields parties,governing_law --no-confidence \
194
- | jq -c '{file: input_filename, gov: .governing_law, parties: [.parties[].name]}'
206
+ | jq -c --arg f "$f" '{file: $f, gov: .governing_law, parties: [.parties[].name]}'
195
207
  done
196
208
 
197
- # 5) Gate a workflow on extraction confidence.
209
+ # 5) Gate a workflow on extraction confidence (non-zero exit if any clause is shaky).
198
210
  extract draft.docx | jq -e '.clauses | all(.confidence > 0.7)' && echo "ok to review"
199
211
  ```
200
212
 
201
- > The `--from-extract`/`--stdin` flags above are the consumption points the
202
- > sibling CLIs expose (or are adopting) for this contract; see
203
- > [`docs/INTEROP.md`](docs/INTEROP.md) for the shared conventions and the
204
- > versioning commitment on the schema.
213
+ > The integration contract is the **output schema** and the **canonical clause
214
+ > vocabulary**, not per-tool flags. See [`docs/INTEROP.md`](docs/INTEROP.md) for
215
+ > the shared conventions and the schema's versioning commitment.
205
216
 
206
217
  ## LLM configuration (opt-in)
207
218
 
@@ -133,37 +133,48 @@ extract counterparty.pdf | jq '.clauses[] | {canonical_title, detected_title, ma
133
133
 
134
134
  ## Composability — piping into the rest of the suite
135
135
 
136
- `extract-cli` is built to be the first stage of a Unix pipe. Its JSON is the
137
- contract every downstream tool reads.
136
+ `extract-cli` is built to be the first stage of a Unix pipe. The glue is its
137
+ **stdout JSON + standard tools** (`jq`, `comm`) and the **shared clause
138
+ vocabulary** — `extract`'s `canonical_title` values are the same names
139
+ `template-vault-cli` detects and `nda-review-cli` keys policy on, so a foreign
140
+ document's clauses line up with the suite's with no bespoke adapter. Every
141
+ example below is runnable today (verified against the real sibling CLIs).
138
142
 
139
143
  ```bash
140
- # 1) Foreign NDA review. extract normalizes clauses; nda-review runs policy.
141
- extract counterparty_nda.pdf | nda-review review --from-extract -
142
-
143
- # 2) Pull just the clause map and feed compare-cli to diff a foreign doc
144
- # against your canonical template's structure.
145
- extract their_msa.docx --fields clauses | compare-cli align --stdin \
146
- --against msa/standard
147
-
148
- # 3) Archive structured metadata for any inbound paper into the post-signature
149
- # vault, keyed by content hash.
150
- extract signed_contract.pdf | contract-vault put --from-extract - \
151
- --id "$(extract signed_contract.pdf | jq -r .document.sha256)"
152
-
153
- # 4) Triage a folder of inbound contracts: list governing law + parties.
154
- for f in inbox/*.pdf; do
144
+ # 1) Inspect any contract's structure (.md/.txt/.html/.docx/.pdf, one tool).
145
+ extract counterparty.docx | jq '{parties: [.parties[].name],
146
+ governing_law: .governing_law.value, clauses: [.clauses[].canonical_title]}'
147
+
148
+ # 2) Clause-coverage gap vs your canonical template in template-vault-cli.
149
+ # extract normalizes the counterparty's *foreign* headings onto the same
150
+ # clause vocabulary template-vault detects, so a plain `comm` diffs them.
151
+ template-vault info nda/mutual-standard --json | jq -r '.clauses[].title' | sort > ours.txt
152
+ extract counterparty_nda.docx | jq -r '.clauses[].canonical_title' | sort -u > theirs.txt
153
+ comm -23 ours.txt theirs.txt # clauses in OUR standard that THEY are missing
154
+ comm -13 ours.txt theirs.txt # clauses THEY added that we don't have
155
+
156
+ # 3) Intake: extract for structure, nda-review-cli for a policy verdict on the
157
+ # same foreign doc; merge both views with jq.
158
+ extract counterparty_nda.docx > extract.json
159
+ nda-review review --file counterparty_nda.docx --playbook output/nda_playbook.json \
160
+ --out-json review.json
161
+ jq -n --slurpfile e extract.json --slurpfile r review.json \
162
+ '{parties: [$e[0].parties[].name], governing_law: $e[0].governing_law.value,
163
+ clauses: ($e[0].clauses | length), decision: $r[0].decision, risk: $r[0].risk_score}'
164
+
165
+ # 4) Triage a folder of inbound contracts: governing law + parties per file.
166
+ for f in inbox/*; do
155
167
  extract "$f" --fields parties,governing_law --no-confidence \
156
- | jq -c '{file: input_filename, gov: .governing_law, parties: [.parties[].name]}'
168
+ | jq -c --arg f "$f" '{file: $f, gov: .governing_law, parties: [.parties[].name]}'
157
169
  done
158
170
 
159
- # 5) Gate a workflow on extraction confidence.
171
+ # 5) Gate a workflow on extraction confidence (non-zero exit if any clause is shaky).
160
172
  extract draft.docx | jq -e '.clauses | all(.confidence > 0.7)' && echo "ok to review"
161
173
  ```
162
174
 
163
- > The `--from-extract`/`--stdin` flags above are the consumption points the
164
- > sibling CLIs expose (or are adopting) for this contract; see
165
- > [`docs/INTEROP.md`](docs/INTEROP.md) for the shared conventions and the
166
- > versioning commitment on the schema.
175
+ > The integration contract is the **output schema** and the **canonical clause
176
+ > vocabulary**, not per-tool flags. See [`docs/INTEROP.md`](docs/INTEROP.md) for
177
+ > the shared conventions and the schema's versioning commitment.
167
178
 
168
179
  ## LLM configuration (opt-in)
169
180
 
@@ -43,11 +43,11 @@ import urllib.request
43
43
  from pathlib import Path
44
44
  from typing import Any, Dict, List, Optional, Tuple
45
45
 
46
- __version__ = "0.1.5"
46
+ __version__ = "0.1.6"
47
47
 
48
48
  # Bumped independently of the package version when the *extraction logic*
49
49
  # changes in a way downstream consumers should notice. Embedded in `_meta`.
50
- EXTRACTOR_VERSION = "0.1.5"
50
+ EXTRACTOR_VERSION = "0.1.6"
51
51
 
52
52
  # JSON Schema version of the output contract (docs/spec/extract-output.schema.json).
53
53
  SCHEMA_VERSION = 1
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "extract-cli"
7
- version = "0.1.5"
7
+ version = "0.1.6"
8
8
  description = "Open-loop front door of the contract-ops CLI suite: ingest any contract (.md/.txt/.html/.docx/.pdf) and emit structured JSON."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -138,7 +138,7 @@
138
138
  "source": "deterministic"
139
139
  },
140
140
  "_meta": {
141
- "extractor_version": "0.1.5",
141
+ "extractor_version": "0.1.6",
142
142
  "tiers_used": [
143
143
  "deterministic"
144
144
  ],
@@ -133,7 +133,7 @@
133
133
  "source": "none"
134
134
  },
135
135
  "_meta": {
136
- "extractor_version": "0.1.5",
136
+ "extractor_version": "0.1.6",
137
137
  "tiers_used": [
138
138
  "deterministic"
139
139
  ],
@@ -133,7 +133,7 @@
133
133
  "source": "deterministic"
134
134
  },
135
135
  "_meta": {
136
- "extractor_version": "0.1.5",
136
+ "extractor_version": "0.1.6",
137
137
  "tiers_used": [
138
138
  "deterministic"
139
139
  ],
@@ -133,7 +133,7 @@
133
133
  "source": "deterministic"
134
134
  },
135
135
  "_meta": {
136
- "extractor_version": "0.1.5",
136
+ "extractor_version": "0.1.6",
137
137
  "tiers_used": [
138
138
  "deterministic"
139
139
  ],
@@ -143,7 +143,7 @@
143
143
  "source": "none"
144
144
  },
145
145
  "_meta": {
146
- "extractor_version": "0.1.5",
146
+ "extractor_version": "0.1.6",
147
147
  "tiers_used": [
148
148
  "deterministic"
149
149
  ],
@@ -48,7 +48,7 @@
48
48
  "source": "none"
49
49
  },
50
50
  "_meta": {
51
- "extractor_version": "0.1.5",
51
+ "extractor_version": "0.1.6",
52
52
  "tiers_used": [
53
53
  "deterministic"
54
54
  ],
@@ -133,7 +133,7 @@
133
133
  "source": "deterministic"
134
134
  },
135
135
  "_meta": {
136
- "extractor_version": "0.1.5",
136
+ "extractor_version": "0.1.6",
137
137
  "tiers_used": [
138
138
  "deterministic"
139
139
  ],
@@ -148,7 +148,7 @@
148
148
  "source": "deterministic"
149
149
  },
150
150
  "_meta": {
151
- "extractor_version": "0.1.5",
151
+ "extractor_version": "0.1.6",
152
152
  "tiers_used": [
153
153
  "deterministic"
154
154
  ],
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes