extract-cli 0.1.5__tar.gz → 0.1.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {extract_cli-0.1.5 → extract_cli-0.1.6}/CHANGELOG.md +18 -0
- {extract_cli-0.1.5 → extract_cli-0.1.6}/PKG-INFO +35 -24
- {extract_cli-0.1.5 → extract_cli-0.1.6}/README.md +34 -23
- {extract_cli-0.1.5 → extract_cli-0.1.6}/extract_cli.py +2 -2
- {extract_cli-0.1.5 → extract_cli-0.1.6}/pyproject.toml +1 -1
- {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/fixtures/employment_docx.docx.expected.json +1 -1
- {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/fixtures/heading_docx.docx.expected.json +1 -1
- {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/fixtures/lease_allcaps.txt.expected.json +1 -1
- {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/fixtures/license_pdf.pdf.expected.json +1 -1
- {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/fixtures/nda_h2.md.expected.json +1 -1
- {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/fixtures/scanned.pdf.expected.json +1 -1
- {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/fixtures/services_bold.txt.expected.json +1 -1
- {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/fixtures/services_html.html.expected.json +1 -1
- {extract_cli-0.1.5 → extract_cli-0.1.6}/.gitignore +0 -0
- {extract_cli-0.1.5 → extract_cli-0.1.6}/ARCHITECTURE.md +0 -0
- {extract_cli-0.1.5 → extract_cli-0.1.6}/CONTRIBUTING.md +0 -0
- {extract_cli-0.1.5 → extract_cli-0.1.6}/LICENSE +0 -0
- {extract_cli-0.1.5 → extract_cli-0.1.6}/Makefile +0 -0
- {extract_cli-0.1.5 → extract_cli-0.1.6}/config/llm.json.example +0 -0
- {extract_cli-0.1.5 → extract_cli-0.1.6}/docs/INTEROP.md +0 -0
- {extract_cli-0.1.5 → extract_cli-0.1.6}/docs/spec/extract-output.schema.json +0 -0
- {extract_cli-0.1.5 → extract_cli-0.1.6}/scripts/release.py +0 -0
- {extract_cli-0.1.5 → extract_cli-0.1.6}/scripts/validate_against_spec.py +0 -0
- {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/_fixtures_build.py +0 -0
- {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/_make_goldens.py +0 -0
- {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/_schema_validator.py +0 -0
- {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/conftest.py +0 -0
- {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/fixtures/employment_docx.docx +0 -0
- {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/fixtures/heading_docx.docx +0 -0
- {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/fixtures/lease_allcaps.txt +0 -0
- {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/fixtures/license_pdf.pdf +0 -0
- {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/fixtures/nda_h2.md +0 -0
- {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/fixtures/scanned.pdf +0 -0
- {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/fixtures/services_bold.txt +0 -0
- {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/fixtures/services_html.html +0 -0
- {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/test_clause_map.py +0 -0
- {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/test_cli.py +0 -0
- {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/test_deterministic.py +0 -0
- {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/test_llm.py +0 -0
- {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/test_misc.py +0 -0
- {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/test_property.py +0 -0
- {extract_cli-0.1.5 → extract_cli-0.1.6}/tests/test_schema_conformance.py +0 -0
|
@@ -6,6 +6,23 @@ to [Semantic Versioning](https://semver.org/). Per the suite convention
|
|
|
6
6
|
(see [`docs/INTEROP.md`](docs/INTEROP.md)), **backward-incompatible changes to
|
|
7
7
|
the output schema require a major version bump**; new optional fields are minor.
|
|
8
8
|
|
|
9
|
+
## [0.1.6] - 2026-05-21
|
|
10
|
+
|
|
11
|
+
### Docs
|
|
12
|
+
- **Rewrote the README composability section to verified, runnable examples.**
|
|
13
|
+
Testing extract-cli against the real sibling CLIs (`template-vault-cli`,
|
|
14
|
+
`nda-review-cli`) showed the previous pipes were aspirational — the siblings
|
|
15
|
+
expose no `--from-extract`/`--stdin` flag (`nda-review review` takes
|
|
16
|
+
`--file`/`--text`; `template-vault` reads its own vault). The integration
|
|
17
|
+
contract is the **output schema + the shared canonical clause vocabulary**,
|
|
18
|
+
glued by stdout JSON and standard tools (`jq`, `comm`): `extract`'s
|
|
19
|
+
`canonical_title` values are the same names template-vault detects and
|
|
20
|
+
nda-review keys policy on, so a foreign document's clauses line up with the
|
|
21
|
+
suite's with no bespoke adapter. New examples cover clause-coverage gap
|
|
22
|
+
analysis against a vault template and a combined extract+nda-review intake
|
|
23
|
+
report — all runnable today. (Also fixed a broken `jq input_filename` in the
|
|
24
|
+
folder-triage example.) No code or schema change.
|
|
25
|
+
|
|
9
26
|
## [0.1.5] - 2026-05-21
|
|
10
27
|
|
|
11
28
|
### Added
|
|
@@ -181,6 +198,7 @@ Initial release — the open-loop front door of the contract-ops CLI suite.
|
|
|
181
198
|
intentionally *not* governed by the output schema (the schema describes the
|
|
182
199
|
full default output).
|
|
183
200
|
|
|
201
|
+
[0.1.6]: https://github.com/DrBaher/extract-cli/releases/tag/v0.1.6
|
|
184
202
|
[0.1.5]: https://github.com/DrBaher/extract-cli/releases/tag/v0.1.5
|
|
185
203
|
[0.1.4]: https://github.com/DrBaher/extract-cli/releases/tag/v0.1.4
|
|
186
204
|
[0.1.3]: https://github.com/DrBaher/extract-cli/releases/tag/v0.1.3
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: extract-cli
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.6
|
|
4
4
|
Summary: Open-loop front door of the contract-ops CLI suite: ingest any contract (.md/.txt/.html/.docx/.pdf) and emit structured JSON.
|
|
5
5
|
Project-URL: Homepage, https://cli.drbaher.com/
|
|
6
6
|
Project-URL: Repository, https://github.com/DrBaher/extract-cli
|
|
@@ -171,37 +171,48 @@ extract counterparty.pdf | jq '.clauses[] | {canonical_title, detected_title, ma
|
|
|
171
171
|
|
|
172
172
|
## Composability — piping into the rest of the suite
|
|
173
173
|
|
|
174
|
-
`extract-cli` is built to be the first stage of a Unix pipe.
|
|
175
|
-
|
|
174
|
+
`extract-cli` is built to be the first stage of a Unix pipe. The glue is its
|
|
175
|
+
**stdout JSON + standard tools** (`jq`, `comm`) and the **shared clause
|
|
176
|
+
vocabulary** — `extract`'s `canonical_title` values are the same names
|
|
177
|
+
`template-vault-cli` detects and `nda-review-cli` keys policy on, so a foreign
|
|
178
|
+
document's clauses line up with the suite's with no bespoke adapter. Every
|
|
179
|
+
example below is runnable today (verified against the real sibling CLIs).
|
|
176
180
|
|
|
177
181
|
```bash
|
|
178
|
-
# 1)
|
|
179
|
-
extract
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
#
|
|
183
|
-
extract
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
#
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
#
|
|
192
|
-
|
|
182
|
+
# 1) Inspect any contract's structure (.md/.txt/.html/.docx/.pdf, one tool).
|
|
183
|
+
extract counterparty.docx | jq '{parties: [.parties[].name],
|
|
184
|
+
governing_law: .governing_law.value, clauses: [.clauses[].canonical_title]}'
|
|
185
|
+
|
|
186
|
+
# 2) Clause-coverage gap vs your canonical template in template-vault-cli.
|
|
187
|
+
# extract normalizes the counterparty's *foreign* headings onto the same
|
|
188
|
+
# clause vocabulary template-vault detects, so a plain `comm` diffs them.
|
|
189
|
+
template-vault info nda/mutual-standard --json | jq -r '.clauses[].title' | sort > ours.txt
|
|
190
|
+
extract counterparty_nda.docx | jq -r '.clauses[].canonical_title' | sort -u > theirs.txt
|
|
191
|
+
comm -23 ours.txt theirs.txt # clauses in OUR standard that THEY are missing
|
|
192
|
+
comm -13 ours.txt theirs.txt # clauses THEY added that we don't have
|
|
193
|
+
|
|
194
|
+
# 3) Intake: extract for structure, nda-review-cli for a policy verdict on the
|
|
195
|
+
# same foreign doc; merge both views with jq.
|
|
196
|
+
extract counterparty_nda.docx > extract.json
|
|
197
|
+
nda-review review --file counterparty_nda.docx --playbook output/nda_playbook.json \
|
|
198
|
+
--out-json review.json
|
|
199
|
+
jq -n --slurpfile e extract.json --slurpfile r review.json \
|
|
200
|
+
'{parties: [$e[0].parties[].name], governing_law: $e[0].governing_law.value,
|
|
201
|
+
clauses: ($e[0].clauses | length), decision: $r[0].decision, risk: $r[0].risk_score}'
|
|
202
|
+
|
|
203
|
+
# 4) Triage a folder of inbound contracts: governing law + parties per file.
|
|
204
|
+
for f in inbox/*; do
|
|
193
205
|
extract "$f" --fields parties,governing_law --no-confidence \
|
|
194
|
-
| jq -c '{file:
|
|
206
|
+
| jq -c --arg f "$f" '{file: $f, gov: .governing_law, parties: [.parties[].name]}'
|
|
195
207
|
done
|
|
196
208
|
|
|
197
|
-
# 5) Gate a workflow on extraction confidence.
|
|
209
|
+
# 5) Gate a workflow on extraction confidence (non-zero exit if any clause is shaky).
|
|
198
210
|
extract draft.docx | jq -e '.clauses | all(.confidence > 0.7)' && echo "ok to review"
|
|
199
211
|
```
|
|
200
212
|
|
|
201
|
-
> The
|
|
202
|
-
>
|
|
203
|
-
>
|
|
204
|
-
> versioning commitment on the schema.
|
|
213
|
+
> The integration contract is the **output schema** and the **canonical clause
|
|
214
|
+
> vocabulary**, not per-tool flags. See [`docs/INTEROP.md`](docs/INTEROP.md) for
|
|
215
|
+
> the shared conventions and the schema's versioning commitment.
|
|
205
216
|
|
|
206
217
|
## LLM configuration (opt-in)
|
|
207
218
|
|
|
@@ -133,37 +133,48 @@ extract counterparty.pdf | jq '.clauses[] | {canonical_title, detected_title, ma
|
|
|
133
133
|
|
|
134
134
|
## Composability — piping into the rest of the suite
|
|
135
135
|
|
|
136
|
-
`extract-cli` is built to be the first stage of a Unix pipe.
|
|
137
|
-
|
|
136
|
+
`extract-cli` is built to be the first stage of a Unix pipe. The glue is its
|
|
137
|
+
**stdout JSON + standard tools** (`jq`, `comm`) and the **shared clause
|
|
138
|
+
vocabulary** — `extract`'s `canonical_title` values are the same names
|
|
139
|
+
`template-vault-cli` detects and `nda-review-cli` keys policy on, so a foreign
|
|
140
|
+
document's clauses line up with the suite's with no bespoke adapter. Every
|
|
141
|
+
example below is runnable today (verified against the real sibling CLIs).
|
|
138
142
|
|
|
139
143
|
```bash
|
|
140
|
-
# 1)
|
|
141
|
-
extract
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
#
|
|
145
|
-
extract
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
#
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
#
|
|
154
|
-
|
|
144
|
+
# 1) Inspect any contract's structure (.md/.txt/.html/.docx/.pdf, one tool).
|
|
145
|
+
extract counterparty.docx | jq '{parties: [.parties[].name],
|
|
146
|
+
governing_law: .governing_law.value, clauses: [.clauses[].canonical_title]}'
|
|
147
|
+
|
|
148
|
+
# 2) Clause-coverage gap vs your canonical template in template-vault-cli.
|
|
149
|
+
# extract normalizes the counterparty's *foreign* headings onto the same
|
|
150
|
+
# clause vocabulary template-vault detects, so a plain `comm` diffs them.
|
|
151
|
+
template-vault info nda/mutual-standard --json | jq -r '.clauses[].title' | sort > ours.txt
|
|
152
|
+
extract counterparty_nda.docx | jq -r '.clauses[].canonical_title' | sort -u > theirs.txt
|
|
153
|
+
comm -23 ours.txt theirs.txt # clauses in OUR standard that THEY are missing
|
|
154
|
+
comm -13 ours.txt theirs.txt # clauses THEY added that we don't have
|
|
155
|
+
|
|
156
|
+
# 3) Intake: extract for structure, nda-review-cli for a policy verdict on the
|
|
157
|
+
# same foreign doc; merge both views with jq.
|
|
158
|
+
extract counterparty_nda.docx > extract.json
|
|
159
|
+
nda-review review --file counterparty_nda.docx --playbook output/nda_playbook.json \
|
|
160
|
+
--out-json review.json
|
|
161
|
+
jq -n --slurpfile e extract.json --slurpfile r review.json \
|
|
162
|
+
'{parties: [$e[0].parties[].name], governing_law: $e[0].governing_law.value,
|
|
163
|
+
clauses: ($e[0].clauses | length), decision: $r[0].decision, risk: $r[0].risk_score}'
|
|
164
|
+
|
|
165
|
+
# 4) Triage a folder of inbound contracts: governing law + parties per file.
|
|
166
|
+
for f in inbox/*; do
|
|
155
167
|
extract "$f" --fields parties,governing_law --no-confidence \
|
|
156
|
-
| jq -c '{file:
|
|
168
|
+
| jq -c --arg f "$f" '{file: $f, gov: .governing_law, parties: [.parties[].name]}'
|
|
157
169
|
done
|
|
158
170
|
|
|
159
|
-
# 5) Gate a workflow on extraction confidence.
|
|
171
|
+
# 5) Gate a workflow on extraction confidence (non-zero exit if any clause is shaky).
|
|
160
172
|
extract draft.docx | jq -e '.clauses | all(.confidence > 0.7)' && echo "ok to review"
|
|
161
173
|
```
|
|
162
174
|
|
|
163
|
-
> The
|
|
164
|
-
>
|
|
165
|
-
>
|
|
166
|
-
> versioning commitment on the schema.
|
|
175
|
+
> The integration contract is the **output schema** and the **canonical clause
|
|
176
|
+
> vocabulary**, not per-tool flags. See [`docs/INTEROP.md`](docs/INTEROP.md) for
|
|
177
|
+
> the shared conventions and the schema's versioning commitment.
|
|
167
178
|
|
|
168
179
|
## LLM configuration (opt-in)
|
|
169
180
|
|
|
@@ -43,11 +43,11 @@ import urllib.request
|
|
|
43
43
|
from pathlib import Path
|
|
44
44
|
from typing import Any, Dict, List, Optional, Tuple
|
|
45
45
|
|
|
46
|
-
__version__ = "0.1.
|
|
46
|
+
__version__ = "0.1.6"
|
|
47
47
|
|
|
48
48
|
# Bumped independently of the package version when the *extraction logic*
|
|
49
49
|
# changes in a way downstream consumers should notice. Embedded in `_meta`.
|
|
50
|
-
EXTRACTOR_VERSION = "0.1.
|
|
50
|
+
EXTRACTOR_VERSION = "0.1.6"
|
|
51
51
|
|
|
52
52
|
# JSON Schema version of the output contract (docs/spec/extract-output.schema.json).
|
|
53
53
|
SCHEMA_VERSION = 1
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "extract-cli"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.6"
|
|
8
8
|
description = "Open-loop front door of the contract-ops CLI suite: ingest any contract (.md/.txt/.html/.docx/.pdf) and emit structured JSON."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.9"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|