dos-kernel 0.22.0__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dos/__init__.py +261 -0
- dos/_bin/dos-hook.exe +0 -0
- dos/_filelock.py +255 -0
- dos/_job_policy.py +97 -0
- dos/_tree.py +145 -0
- dos/admission.py +433 -0
- dos/answer_shape.py +299 -0
- dos/arbiter.py +859 -0
- dos/archive_lock.py +266 -0
- dos/arg_provenance.py +814 -0
- dos/attest.py +472 -0
- dos/breaker.py +311 -0
- dos/churn.py +226 -0
- dos/claim_extract.py +229 -0
- dos/claim_ttl.py +150 -0
- dos/cli.py +8721 -0
- dos/commit_audit.py +666 -0
- dos/completion.py +466 -0
- dos/concurrency_class.py +154 -0
- dos/config.py +1380 -0
- dos/config_lint.py +464 -0
- dos/cooldown.py +390 -0
- dos/coverage.py +387 -0
- dos/dangling_intent.py +287 -0
- dos/data_class.py +397 -0
- dos/decisions.py +1274 -0
- dos/decisions_tui.py +251 -0
- dos/dispatch_top.py +740 -0
- dos/dispatch_top_tui.py +116 -0
- dos/drivers/__init__.py +40 -0
- dos/drivers/ci_status.py +630 -0
- dos/drivers/citation_resolve.py +703 -0
- dos/drivers/decision_stop.py +98 -0
- dos/drivers/export_file.py +173 -0
- dos/drivers/export_otlp.py +275 -0
- dos/drivers/export_statsd.py +242 -0
- dos/drivers/hook_dialects.py +391 -0
- dos/drivers/job.py +47 -0
- dos/drivers/llm_judge.py +360 -0
- dos/drivers/memory_recall.py +1231 -0
- dos/drivers/notify_slack.py +373 -0
- dos/drivers/notify_webhook.py +251 -0
- dos/drivers/operator_judge.py +114 -0
- dos/drivers/os_acceptance.py +228 -0
- dos/drivers/paste_log.py +132 -0
- dos/drivers/plan_scope.py +133 -0
- dos/drivers/self_improve.py +375 -0
- dos/drivers/similarity_judge.py +249 -0
- dos/drivers/state_diff.py +274 -0
- dos/drivers/supervisor.py +347 -0
- dos/drivers/watchdog.py +363 -0
- dos/drivers/workshop.py +160 -0
- dos/durable_schema.py +344 -0
- dos/effect_witness.py +393 -0
- dos/efficiency.py +318 -0
- dos/enforce.py +414 -0
- dos/enumerate.py +776 -0
- dos/env_print.py +378 -0
- dos/event_severity.py +258 -0
- dos/evidence.py +692 -0
- dos/exec_capability.py +256 -0
- dos/export_cursor.py +143 -0
- dos/exporter.py +320 -0
- dos/firing_label.py +353 -0
- dos/fleet_roll.py +226 -0
- dos/gate_classify.py +827 -0
- dos/gh4_coverage.py +179 -0
- dos/git_delta.py +122 -0
- dos/guard.py +215 -0
- dos/health.py +552 -0
- dos/help_summary.py +519 -0
- dos/home.py +934 -0
- dos/hook_binary.py +194 -0
- dos/hook_dialect.py +271 -0
- dos/hook_exit.py +191 -0
- dos/hook_install.py +437 -0
- dos/id_alloc.py +304 -0
- dos/improve.py +499 -0
- dos/intent_ledger.py +635 -0
- dos/interpret.py +176 -0
- dos/intervention.py +769 -0
- dos/intervention_eval.py +371 -0
- dos/journal_delta.py +308 -0
- dos/judge_eval.py +328 -0
- dos/judges.py +366 -0
- dos/lane_infer.py +127 -0
- dos/lane_journal.py +1001 -0
- dos/lane_lease.py +952 -0
- dos/lane_overlap.py +228 -0
- dos/lease_health.py +282 -0
- dos/lifecycle.py +211 -0
- dos/liveness.py +352 -0
- dos/lock_modes.py +185 -0
- dos/log_source.py +395 -0
- dos/loop_decide.py +1746 -0
- dos/marker_gate.py +254 -0
- dos/marker_sensor.py +396 -0
- dos/noop_streak.py +280 -0
- dos/notify.py +479 -0
- dos/observe.py +175 -0
- dos/oracle.py +1661 -0
- dos/overlap_eval.py +214 -0
- dos/overlap_policy.py +342 -0
- dos/packet_sidecar.py +267 -0
- dos/phase_shipped.py +1985 -0
- dos/pick_priority.py +225 -0
- dos/pickable.py +369 -0
- dos/picker_oracle.py +1037 -0
- dos/plan_board.py +513 -0
- dos/plan_board_tui.py +113 -0
- dos/plan_source.py +455 -0
- dos/posttool_sensor.py +528 -0
- dos/precursor_gate.py +499 -0
- dos/precursor_gate_eval.py +239 -0
- dos/preflight.py +825 -0
- dos/pretool_sensor.py +490 -0
- dos/proc_delta.py +181 -0
- dos/productivity.py +296 -0
- dos/provider_limit.py +242 -0
- dos/py.typed +4 -0
- dos/reason_morphology.py +299 -0
- dos/reasons.py +449 -0
- dos/reconcile.py +173 -0
- dos/recurring_wedge.py +206 -0
- dos/render.py +393 -0
- dos/result_state.py +468 -0
- dos/resume.py +578 -0
- dos/resume_evidence.py +293 -0
- dos/retention.py +344 -0
- dos/reward.py +372 -0
- dos/rewind.py +587 -0
- dos/rewind_evidence.py +168 -0
- dos/rewind_tokens.py +252 -0
- dos/run_id.py +342 -0
- dos/scope.py +520 -0
- dos/scope_source.py +382 -0
- dos/scout.py +982 -0
- dos/self_modify.py +209 -0
- dos/sibling_scan.py +569 -0
- dos/skills/EXAMPLES.md +584 -0
- dos/skills/dos-class-cycle/SKILL.md +107 -0
- dos/skills/dos-dispatch/SKILL.md +177 -0
- dos/skills/dos-dispatch-loop/SKILL.md +254 -0
- dos/skills/dos-goal-gate/SKILL.md +269 -0
- dos/skills/dos-next-up/SKILL.md +231 -0
- dos/skills/dos-promote/SKILL.md +114 -0
- dos/skills/dos-replan/SKILL.md +159 -0
- dos/skills/dos-replan-loop/SKILL.md +114 -0
- dos/skills/dos-self-improve/SKILL.md +213 -0
- dos/skills/dos-supervise-loop/SKILL.md +180 -0
- dos/skills/dos-unstick/SKILL.md +108 -0
- dos/skills/dos-witness-claim/SKILL.md +251 -0
- dos/stamp.py +1002 -0
- dos/state_health.py +387 -0
- dos/status.py +114 -0
- dos/stop_policy.py +334 -0
- dos/supervise.py +1014 -0
- dos/testwitness.py +392 -0
- dos/timeline.py +1027 -0
- dos/tokens.py +485 -0
- dos/tool_stream.py +393 -0
- dos/tool_stream_eval.py +226 -0
- dos/trace.py +524 -0
- dos/verdict.py +140 -0
- dos/verdict_cli.py +189 -0
- dos/verdict_journal.py +497 -0
- dos/verdict_rollup.py +217 -0
- dos/verdicts.py +181 -0
- dos/wedge_reason.py +282 -0
- dos_kernel-0.22.0.dist-info/METADATA +859 -0
- dos_kernel-0.22.0.dist-info/RECORD +178 -0
- dos_kernel-0.22.0.dist-info/WHEEL +5 -0
- dos_kernel-0.22.0.dist-info/entry_points.txt +39 -0
- dos_kernel-0.22.0.dist-info/licenses/LICENSE +21 -0
- dos_kernel-0.22.0.dist-info/top_level.txt +2 -0
- dos_mcp/__init__.py +52 -0
- dos_mcp/py.typed +2 -0
- dos_mcp/server.py +779 -0
|
@@ -0,0 +1,703 @@
|
|
|
1
|
+
"""dos.drivers.citation_resolve — the legal-citation witness (docs/277 §6 #1, docs/279).
|
|
2
|
+
|
|
3
|
+
The catastrophic, *sanctioned* legal-AI failure is the **fabricated citation** (the
|
|
4
|
+
*Mata v. Avianca* class — fake cases cited to a federal court, $5,000 sanction, May
|
|
5
|
+
2023). Stanford measured 17–33% hallucination on legal-RAG tools, and the field's own
|
|
6
|
+
verdict (Harvey-LAB, 2026) is that citation hallucination "is not captured by any
|
|
7
|
+
benchmark." That failure sits on DOS's *cleanest* rung: a cited case either **resolves
|
|
8
|
+
in a third-party reporter** — bytes the agent authored zero of — or it does not.
|
|
9
|
+
|
|
10
|
+
This is the second occupant of the docs/265 `dos.evidence_sources` seam (the first is
|
|
11
|
+
`ci_status`), and the same **move (B)**: a new artifact oracle for a non-git surface.
|
|
12
|
+
It has the surface the kernel forbids — network I/O against a third party
|
|
13
|
+
(CourtListener / Free Law Project) — so it lives HERE, in a driver, exactly as
|
|
14
|
+
`ci_status` / `llm_judge` do, and for the same structural reason. It imports the kernel;
|
|
15
|
+
the kernel never imports it (`drivers/__init__` rule).
|
|
16
|
+
|
|
17
|
+
What it witnesses (Tier 1 — existence + quote-fidelity), and what it does NOT
|
|
18
|
+
============================================================================
|
|
19
|
+
It answers "does this citation EXIST, and does the quoted holding MATCH the resolved
|
|
20
|
+
opinion?" It does **not** make the legal argument correct (Tier 3 — abstain). A
|
|
21
|
+
caught-count (`J`) here is a flagged fabrication, never a won case. Selling it as "DOS
|
|
22
|
+
verifies legal correctness" is the docs/277 §7 over-claim — and in this domain an
|
|
23
|
+
over-claim is a liability, not a bug.
|
|
24
|
+
|
|
25
|
+
The shape (the `ci_status` template, verbatim)
|
|
26
|
+
==============================================
|
|
27
|
+
* the **boundary reader** `gather()` mirrors `dos.git_delta`/`ci_status.gather`: the
|
|
28
|
+
HTTP call (`urllib` against CourtListener) happens HERE, at the caller boundary, and
|
|
29
|
+
every failure mode (no token, network error, timeout, rate-limit, malformed JSON)
|
|
30
|
+
degrades to an honest `ABSTAIN` evidence object — never a crash, never a fabricated
|
|
31
|
+
RESOLVED. A deployment with no corpus access gets "abstain," the truthful floor.
|
|
32
|
+
* the **pure classifier** `classify(CitationEvidence, CitationPolicy) -> CitationVerdict`
|
|
33
|
+
is in the `classify(Evidence, Policy) -> Verdict` family: a closed-enum verdict, a
|
|
34
|
+
frozen caller-gathered evidence dataclass, a frozen policy, an operator-facing
|
|
35
|
+
`reason`, a `to_dict()`. `classify()` makes NO I/O — the whole verdict is
|
|
36
|
+
replay-testable on frozen fixtures (the family discipline, and what makes the
|
|
37
|
+
benchmark's $0 replay deterministic).
|
|
38
|
+
|
|
39
|
+
The two non-forgeable operands (why resolution ALONE is insufficient — docs/279 §3)
|
|
40
|
+
===================================================================================
|
|
41
|
+
A *Mata* fabrication, `92 F.3d 1074` (Hyatt v. N. Cent. Airlines), *resolves* in the
|
|
42
|
+
reporter — but to *Grilli v. Metropolitan Life*, a DIFFERENT real case: the fabricator
|
|
43
|
+
reused a real reporter slot with a wrong case name. So citation-string resolution alone
|
|
44
|
+
would rubber-stamp it. We therefore check TWO operands, BOTH authored by Free Law
|
|
45
|
+
Project (THIRD_PARTY): (1) a cluster carries the claimed **citation string** AND (2) the
|
|
46
|
+
cluster's **case name** agrees with the claimed party names. A cite that resolves to a
|
|
47
|
+
name that does not match is `UNRESOLVED` — the citation, *as claimed*, does not exist.
|
|
48
|
+
|
|
49
|
+
The resolver fitness (docs/279 §2)
|
|
50
|
+
==================================
|
|
51
|
+
CourtListener has two endpoints. `/citation-lookup/` is the purpose-built
|
|
52
|
+
normalized-citation resolver but needs a TOKEN (rate-limited). `/search/` is
|
|
53
|
+
unauthenticated but is a full-text relevance search, NOT a citation index — its recall
|
|
54
|
+
on real cites is unreliable. So: prefer `/citation-lookup/` when `COURTLISTENER_TOKEN`
|
|
55
|
+
is set; fall back to `/search/` exact-citation-array match otherwise; ABSTAIN on no
|
|
56
|
+
access. The reproducible *measured* benchmark scores against a FROZEN local sample
|
|
57
|
+
(`benchmark/legalcite/`); the live driver is for adoption, not the headline number.
|
|
58
|
+
|
|
59
|
+
The quote-match (docs/156 `derived_witness`)
|
|
60
|
+
============================================
|
|
61
|
+
The quoted holding is matched against the resolved opinion text by a DECLARED op
|
|
62
|
+
(normalized substring containment) — committed up front, never reverse-searched to fit.
|
|
63
|
+
A mis-quote (resolved cite, quote absent) is `RESOLVED_MISMATCH` → REFUTED, distinct
|
|
64
|
+
from "no signal." Quote-matching needs the opinion BODY, which the unauthenticated
|
|
65
|
+
search snippet does not always carry, so the verdict is honest: with no opinion text the
|
|
66
|
+
quote rung ABSTAINs (it does not claim a match it could not check), and the citation
|
|
67
|
+
rung still stands on its own.
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
from __future__ import annotations
|
|
71
|
+
|
|
72
|
+
import argparse
|
|
73
|
+
import enum
|
|
74
|
+
import json
|
|
75
|
+
import os
|
|
76
|
+
import re
|
|
77
|
+
import urllib.error
|
|
78
|
+
import urllib.parse
|
|
79
|
+
import urllib.request
|
|
80
|
+
from dataclasses import dataclass
|
|
81
|
+
from typing import Optional
|
|
82
|
+
|
|
83
|
+
# Imports the kernel — never the other way round (the driver rule). The evidence
|
|
84
|
+
# vocabulary for the `EvidenceSource` face; `config` only for the CLI workspace seam.
|
|
85
|
+
from dos.evidence import Accountability, EvidenceFacts
|
|
86
|
+
|
|
87
|
+
# The public Free Law Project base. A host pointing at a mirror passes --base / base=.
|
|
88
|
+
DEFAULT_BASE = "https://www.courtlistener.com"
|
|
89
|
+
# Cap the network call so a hung API can't stall a gather — the `ci_status._GH_TIMEOUT_S`
|
|
90
|
+
# discipline, a touch longer for a possibly-cold third-party search.
|
|
91
|
+
_HTTP_TIMEOUT_S = 25
|
|
92
|
+
# The env var carrying a CourtListener API token (optional). With it, the driver uses
|
|
93
|
+
# the purpose-built /citation-lookup/ endpoint; without it, the noisier /search/ rung.
|
|
94
|
+
_TOKEN_ENV = "COURTLISTENER_TOKEN"
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class Citation(str, enum.Enum):
|
|
98
|
+
"""The typed citation verdict — four states, mutually exclusive.
|
|
99
|
+
|
|
100
|
+
`str`-valued so it round-trips through a CLI token / exit-code map without a lookup
|
|
101
|
+
table (the `Ci` / `Liveness` idiom). The four-way split is the honest part: a binary
|
|
102
|
+
valid/invalid would have to LIE about the two cases where there is no answer — a
|
|
103
|
+
mis-quote of a real case (RESOLVED_MISMATCH, a distinct, stronger signal than "fake")
|
|
104
|
+
and no corpus access (ABSTAIN). Collapsing either manufactures a verdict the evidence
|
|
105
|
+
does not support — the typed-verdict-over-binary-gate law on a sometimes-silent source.
|
|
106
|
+
"""
|
|
107
|
+
|
|
108
|
+
RESOLVED_MATCH = "RESOLVED_MATCH" # cite resolves AND the quote is in the opinion
|
|
109
|
+
RESOLVED_MISMATCH = "RESOLVED_MISMATCH" # cite resolves BUT the quote is absent (mis-quote)
|
|
110
|
+
UNRESOLVED = "UNRESOLVED" # no cluster carries this citation (the fabrication)
|
|
111
|
+
ABSTAIN = "ABSTAIN" # no corpus access — never a fabricated verdict
|
|
112
|
+
|
|
113
|
+
def __str__(self) -> str: # pragma: no cover - trivial
|
|
114
|
+
return self.value
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@dataclass(frozen=True)
|
|
118
|
+
class CitationPolicy:
|
|
119
|
+
"""The knobs separating the verdicts — policy, not mechanism.
|
|
120
|
+
|
|
121
|
+
require_name_match — when True (default), a cite that resolves to a cluster whose
|
|
122
|
+
name does NOT agree with the claimed party names is UNRESOLVED
|
|
123
|
+
(the docs/279 §3 collision trap: a fabricated name on a real
|
|
124
|
+
reporter slot). The load-bearing precision guard. A host that
|
|
125
|
+
only has bare reporter strings (no claimed name) sets it False.
|
|
126
|
+
name_overlap_min — the minimum normalized-token Jaccard overlap between the
|
|
127
|
+
claimed name and the cluster name to count as "same case."
|
|
128
|
+
0.34 ≈ "at least one distinctive party token in common" once
|
|
129
|
+
v./in re/et al. stop-words are stripped.
|
|
130
|
+
quote_min_len — quotes shorter than this are too generic to witness (a 3-word
|
|
131
|
+
phrase appears in thousands of opinions); the quote rung
|
|
132
|
+
ABSTAINs below it rather than manufacture a coincidental match.
|
|
133
|
+
"""
|
|
134
|
+
|
|
135
|
+
require_name_match: bool = True
|
|
136
|
+
name_overlap_min: float = 0.34
|
|
137
|
+
quote_min_len: int = 12
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
DEFAULT_POLICY = CitationPolicy()
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
@dataclass(frozen=True)
|
|
144
|
+
class ResolvedCluster:
|
|
145
|
+
"""One reporter cluster, normalized from CourtListener (the unforgeable bit).
|
|
146
|
+
|
|
147
|
+
The agent under adjudication cannot author a cluster bearing a given citation string
|
|
148
|
+
in a public reporter database — Free Law Project did. `citations` is the cluster's
|
|
149
|
+
full citation array (parallel cites); `name` is its case name; `opinion_text` is the
|
|
150
|
+
body when available (the search snippet, or the full opinion on a token read), used
|
|
151
|
+
ONLY by the quote rung. That byte-author≠claimant split is the THIRD_PARTY rung the
|
|
152
|
+
whole witness stands on.
|
|
153
|
+
|
|
154
|
+
`text_is_full` is the load-bearing honesty flag for the quote rung: a search-result
|
|
155
|
+
SNIPPET (the unauthenticated default) is the opening fragment of the opinion, NOT the
|
|
156
|
+
whole text — so a quote's ABSENCE from it proves nothing (the holding may be on page
|
|
157
|
+
20). The quote rung may only REFUTE a mis-quote when it has the FULL opinion
|
|
158
|
+
(`text_is_full=True`, set by a full-opinion fetch); against a mere snippet it
|
|
159
|
+
ABSTAINs on the quote and stands on existence alone. This is the docs/277 precision
|
|
160
|
+
discipline made structural — a noisy resolver is worse than none, so we never refute
|
|
161
|
+
on evidence we know is partial.
|
|
162
|
+
"""
|
|
163
|
+
|
|
164
|
+
name: str
|
|
165
|
+
citations: tuple[str, ...]
|
|
166
|
+
opinion_text: str = ""
|
|
167
|
+
text_is_full: bool = False
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
@dataclass(frozen=True)
|
|
171
|
+
class CitationEvidence:
|
|
172
|
+
"""Everything `classify()` needs, gathered by the CALLER before the call. PURE in.
|
|
173
|
+
|
|
174
|
+
No network inside the verdict — the `ci_status.CiEvidence` rule.
|
|
175
|
+
|
|
176
|
+
cite — the reporter citation string as CLAIMED by the agent (echoed).
|
|
177
|
+
claimed_name — the case name as CLAIMED (e.g. "Varghese v. China Southern"),
|
|
178
|
+
checked against the resolved cluster's name. "" disables the
|
|
179
|
+
name rung for this cite (a bare-reporter claim).
|
|
180
|
+
quote — the quoted holding as CLAIMED, "" if none (citation-only check).
|
|
181
|
+
clusters — the reporter clusters whose citation array CONTAINS `cite`
|
|
182
|
+
(exact, normalized match — NOT a fuzzy search hit). Empty means
|
|
183
|
+
"no reporter carries this cite" → UNRESOLVED.
|
|
184
|
+
reachable — False when the corpus call itself failed (no token+noisy fallback
|
|
185
|
+
failed, network/timeout/rate-limit, bad JSON). With reachable=False
|
|
186
|
+
the verdict is ALWAYS ABSTAIN regardless of clusters — we observed
|
|
187
|
+
nothing, so we assert nothing (fail-safe, never fail-open).
|
|
188
|
+
detail — a one-line note from the gather (the resolver used, or the error
|
|
189
|
+
class) — carried into the verdict reason so an operator sees WHY.
|
|
190
|
+
"""
|
|
191
|
+
|
|
192
|
+
cite: str
|
|
193
|
+
claimed_name: str = ""
|
|
194
|
+
quote: str = ""
|
|
195
|
+
clusters: tuple[ResolvedCluster, ...] = ()
|
|
196
|
+
reachable: bool = True
|
|
197
|
+
detail: str = ""
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
@dataclass(frozen=True)
|
|
201
|
+
class CitationVerdict:
|
|
202
|
+
"""The single verdict `classify()` returns, with the evidence echoed back.
|
|
203
|
+
|
|
204
|
+
`reason` NAMES the driving fact (legible distrust — not just UNRESOLVED but "no
|
|
205
|
+
reporter carries 925 F.3d 1339"; not just MISMATCH but "resolved to Grilli, claimed
|
|
206
|
+
Hyatt"). `to_dict()` is the JSON shape for `--json` / the benchmark / the decisions
|
|
207
|
+
queue. Conforms structurally to the typed-verdict family.
|
|
208
|
+
"""
|
|
209
|
+
|
|
210
|
+
verdict: Citation
|
|
211
|
+
reason: str
|
|
212
|
+
evidence: CitationEvidence
|
|
213
|
+
matched_name: str = ""
|
|
214
|
+
|
|
215
|
+
def to_dict(self) -> dict:
|
|
216
|
+
ev = self.evidence
|
|
217
|
+
return {
|
|
218
|
+
"verdict": self.verdict.value,
|
|
219
|
+
"reason": self.reason,
|
|
220
|
+
"matched_name": self.matched_name,
|
|
221
|
+
"evidence": {
|
|
222
|
+
"cite": ev.cite,
|
|
223
|
+
"claimed_name": ev.claimed_name,
|
|
224
|
+
"quote": ev.quote,
|
|
225
|
+
"reachable": ev.reachable,
|
|
226
|
+
"detail": ev.detail,
|
|
227
|
+
"clusters": [
|
|
228
|
+
{"name": c.name, "citations": list(c.citations)}
|
|
229
|
+
for c in ev.clusters
|
|
230
|
+
],
|
|
231
|
+
},
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
# ---------------------------------------------------------------------------
|
|
236
|
+
# Normalization helpers — pure. The "same citation" / "same case" predicates.
|
|
237
|
+
# ---------------------------------------------------------------------------
|
|
238
|
+
|
|
239
|
+
_WS = re.compile(r"\s+")
|
|
240
|
+
# Case-name stop-words: connective/procedural tokens that carry no party identity.
|
|
241
|
+
_NAME_STOP = frozenset(
|
|
242
|
+
{"v", "vs", "in", "re", "the", "of", "et", "al", "ex", "rel", "co", "inc",
|
|
243
|
+
"llc", "ltd", "corp", "company", "and", "a", "an"}
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def _norm_cite(cite: str) -> str:
|
|
248
|
+
"""Collapse whitespace + case so '925 F.3d 1339' == '925 F.3d 1339'. The exact
|
|
249
|
+
(not fuzzy) citation-string equality the resolution rung stands on."""
|
|
250
|
+
return _WS.sub(" ", (cite or "").strip()).lower()
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def _name_tokens(name: str) -> frozenset[str]:
|
|
254
|
+
"""The distinctive party tokens of a case name (stop-words stripped, lowercased).
|
|
255
|
+
'Varghese v. China Southern Airlines' -> {varghese, china, southern, airlines}."""
|
|
256
|
+
raw = re.findall(r"[a-z0-9]+", (name or "").lower())
|
|
257
|
+
return frozenset(t for t in raw if t not in _NAME_STOP and len(t) > 1)
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def _names_agree(claimed: str, resolved: str, min_overlap: float) -> bool:
|
|
261
|
+
"""True iff the claimed and resolved case names share enough distinctive tokens.
|
|
262
|
+
|
|
263
|
+
Jaccard over the smaller side (asymmetric: a claimed 'Varghese v. China Southern'
|
|
264
|
+
matching a resolved 'Varghese v. China Southern Airlines Co.' should agree even
|
|
265
|
+
though the resolved side has extra tokens). Empty claimed name → caller decides via
|
|
266
|
+
`require_name_match`; here an empty token set cannot agree (no identity to confirm)."""
|
|
267
|
+
a, b = _name_tokens(claimed), _name_tokens(resolved)
|
|
268
|
+
if not a or not b:
|
|
269
|
+
return False
|
|
270
|
+
overlap = len(a & b)
|
|
271
|
+
denom = min(len(a), len(b))
|
|
272
|
+
return denom > 0 and (overlap / denom) >= min_overlap
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def _quote_in_text(quote: str, text: str, *, min_len: int) -> "bool | None":
|
|
276
|
+
"""The declared op of the docs/156 derivation: normalized substring containment.
|
|
277
|
+
|
|
278
|
+
Returns True/False if the quote is long enough to witness AND the opinion text is
|
|
279
|
+
present; returns None (ABSTAIN) when there is no text to check or the quote is too
|
|
280
|
+
short to be distinctive (a coincidental match would forge the rung). Committed up
|
|
281
|
+
front — never a reverse search for which opinion contains the quote."""
|
|
282
|
+
q = _WS.sub(" ", (quote or "").strip())
|
|
283
|
+
if len(q) < min_len:
|
|
284
|
+
return None
|
|
285
|
+
if not text or not text.strip():
|
|
286
|
+
return None
|
|
287
|
+
hay = _WS.sub(" ", text).lower()
|
|
288
|
+
return q.lower() in hay
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def classify(ev: CitationEvidence, policy: CitationPolicy = DEFAULT_POLICY) -> CitationVerdict:
|
|
292
|
+
"""Classify one (cite, name, quote) claim from already-gathered evidence. PURE — no I/O.
|
|
293
|
+
|
|
294
|
+
The ladder, top to bottom:
|
|
295
|
+
|
|
296
|
+
1. ABSTAIN — the corpus was unreachable. We saw nothing → assert nothing. Checked
|
|
297
|
+
FIRST so a failed read can never be mistaken for a real verdict
|
|
298
|
+
(fail-safe; the `ci_status` NO_SIGNAL-on-unreachable rung).
|
|
299
|
+
2. UNRESOLVED — no cluster carries this citation (the fabrication), OR a cluster
|
|
300
|
+
carries it but its name does not agree with the claimed name (the
|
|
301
|
+
docs/279 §3 collision: a fabricated name on a real slot). The
|
|
302
|
+
citation, AS CLAIMED, does not exist.
|
|
303
|
+
3. RESOLVED_MISMATCH — the cite resolves to the claimed case, but the quoted
|
|
304
|
+
holding is NOT in the opinion (a checkable mis-quote). Only reached
|
|
305
|
+
when there IS opinion text and a long-enough quote (else the quote
|
|
306
|
+
rung abstains and we fall through to MATCH on the citation alone).
|
|
307
|
+
4. RESOLVED_MATCH — the cite resolves to the claimed case AND (the quote matched OR
|
|
308
|
+
there was no checkable quote). The honest top: existence confirmed;
|
|
309
|
+
quote-fidelity confirmed-or-not-applicable.
|
|
310
|
+
"""
|
|
311
|
+
# 1. ABSTAIN (unreachable) — fail-safe floor.
|
|
312
|
+
if not ev.reachable:
|
|
313
|
+
return CitationVerdict(
|
|
314
|
+
verdict=Citation.ABSTAIN,
|
|
315
|
+
reason=(
|
|
316
|
+
f"no corpus access for '{ev.cite}'"
|
|
317
|
+
+ (f" — {ev.detail}" if ev.detail else " — resolver unreachable")
|
|
318
|
+
),
|
|
319
|
+
evidence=ev,
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
norm = _norm_cite(ev.cite)
|
|
323
|
+
# The clusters whose citation array literally contains this cite (exact, normalized).
|
|
324
|
+
carrying = [c for c in ev.clusters if norm in {_norm_cite(x) for x in c.citations}]
|
|
325
|
+
|
|
326
|
+
# 2a. UNRESOLVED — nothing in the reporter carries this citation string.
|
|
327
|
+
if not carrying:
|
|
328
|
+
return CitationVerdict(
|
|
329
|
+
verdict=Citation.UNRESOLVED,
|
|
330
|
+
reason=(
|
|
331
|
+
f"no reporter cluster carries '{ev.cite}' — citation does not resolve "
|
|
332
|
+
f"({ev.detail or 'searched the reporter index'})"
|
|
333
|
+
),
|
|
334
|
+
evidence=ev,
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
# 2b. Name agreement — the collision guard. With a claimed name and the policy armed,
|
|
338
|
+
# a resolved cluster whose name disagrees means the citation AS CLAIMED is fake
|
|
339
|
+
# (the slot is real, the case is not the one named).
|
|
340
|
+
if policy.require_name_match and ev.claimed_name.strip():
|
|
341
|
+
agreeing = [c for c in carrying if _names_agree(ev.claimed_name, c.name, policy.name_overlap_min)]
|
|
342
|
+
if not agreeing:
|
|
343
|
+
resolved_to = "; ".join(sorted({c.name for c in carrying if c.name})[:3]) or "(unnamed cluster)"
|
|
344
|
+
return CitationVerdict(
|
|
345
|
+
verdict=Citation.UNRESOLVED,
|
|
346
|
+
reason=(
|
|
347
|
+
f"'{ev.cite}' resolves to a DIFFERENT case — claimed "
|
|
348
|
+
f"'{ev.claimed_name}', reporter has '{resolved_to}' "
|
|
349
|
+
f"(citation as claimed does not exist; the docs/279 §3 collision)"
|
|
350
|
+
),
|
|
351
|
+
evidence=ev,
|
|
352
|
+
matched_name=resolved_to,
|
|
353
|
+
)
|
|
354
|
+
carrying = agreeing # quote-check against the name-agreeing cluster(s)
|
|
355
|
+
|
|
356
|
+
matched_name = next((c.name for c in carrying if c.name), "")
|
|
357
|
+
|
|
358
|
+
# 3 / 4. Quote rung — only when a checkable quote AND the FULL opinion text exist.
|
|
359
|
+
# A search SNIPPET is excluded (text_is_full=False): a quote's absence from the
|
|
360
|
+
# opening fragment proves nothing, so refuting on it would be unsound (docs/279 §2).
|
|
361
|
+
if ev.quote.strip():
|
|
362
|
+
text = "\n".join(c.opinion_text for c in carrying if c.opinion_text and c.text_is_full)
|
|
363
|
+
hit = _quote_in_text(ev.quote, text, min_len=policy.quote_min_len)
|
|
364
|
+
if hit is False:
|
|
365
|
+
return CitationVerdict(
|
|
366
|
+
verdict=Citation.RESOLVED_MISMATCH,
|
|
367
|
+
reason=(
|
|
368
|
+
f"'{ev.cite}' resolves to '{matched_name}' but the quoted holding is "
|
|
369
|
+
f"NOT in the opinion text (a mis-quote — put words in the court's mouth)"
|
|
370
|
+
),
|
|
371
|
+
evidence=ev,
|
|
372
|
+
matched_name=matched_name,
|
|
373
|
+
)
|
|
374
|
+
if hit is None:
|
|
375
|
+
# No opinion text or quote too short to witness — citation stands, quote
|
|
376
|
+
# abstains. Honest: we do not claim a match we could not check.
|
|
377
|
+
return CitationVerdict(
|
|
378
|
+
verdict=Citation.RESOLVED_MATCH,
|
|
379
|
+
reason=(
|
|
380
|
+
f"'{ev.cite}' resolves to '{matched_name}'; quote-fidelity not checkable "
|
|
381
|
+
f"(no opinion text or quote too short) — existence confirmed only"
|
|
382
|
+
),
|
|
383
|
+
evidence=ev,
|
|
384
|
+
matched_name=matched_name,
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
# 4. RESOLVED_MATCH — existence confirmed; quote matched or not applicable.
|
|
388
|
+
return CitationVerdict(
|
|
389
|
+
verdict=Citation.RESOLVED_MATCH,
|
|
390
|
+
reason=(
|
|
391
|
+
f"'{ev.cite}' resolves to '{matched_name}'"
|
|
392
|
+
+ (" and the quoted holding appears in the opinion" if ev.quote.strip() else "")
|
|
393
|
+
),
|
|
394
|
+
evidence=ev,
|
|
395
|
+
matched_name=matched_name,
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
# ---------------------------------------------------------------------------
|
|
400
|
+
# The boundary reader — the ONLY I/O path (mirrors dos.drivers.ci_status.gather).
|
|
401
|
+
# ---------------------------------------------------------------------------
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
def _http_get_json(url: str, *, token: str = "") -> "tuple[Optional[dict], str]":
|
|
405
|
+
"""GET `url` → (parsed-json, "") on success, (None, error-class) else. NEVER raises.
|
|
406
|
+
|
|
407
|
+
The single guarded provider seam (the `ci_status._run_gh` discipline). Every failure
|
|
408
|
+
mode — network error, timeout, rate-limit (HTTP 429), auth failure, malformed JSON —
|
|
409
|
+
returns `(None, <short reason>)` so `gather()` degrades to an unreachable evidence
|
|
410
|
+
object → ABSTAIN. This is the one place CourtListener is touched."""
|
|
411
|
+
headers = {"User-Agent": "dos-citation-resolve/0.1 (https://github.com/anthony-chaudhary/dos)"}
|
|
412
|
+
if token:
|
|
413
|
+
headers["Authorization"] = f"Token {token}"
|
|
414
|
+
req = urllib.request.Request(url, headers=headers)
|
|
415
|
+
try:
|
|
416
|
+
with urllib.request.urlopen(req, timeout=_HTTP_TIMEOUT_S) as r:
|
|
417
|
+
raw = r.read()
|
|
418
|
+
except urllib.error.HTTPError as e: # 4xx/5xx — rate-limit / auth / not found
|
|
419
|
+
if e.code == 429:
|
|
420
|
+
return None, "rate-limited (HTTP 429) — corpus quota exhausted"
|
|
421
|
+
if e.code in (401, 403):
|
|
422
|
+
return None, f"auth failure (HTTP {e.code})"
|
|
423
|
+
return None, f"HTTP {e.code}"
|
|
424
|
+
except urllib.error.URLError as e: # network down / DNS / timeout
|
|
425
|
+
return None, f"network error ({getattr(e, 'reason', e)!r})"
|
|
426
|
+
except (TimeoutError, OSError) as e:
|
|
427
|
+
return None, f"network error ({e.__class__.__name__})"
|
|
428
|
+
try:
|
|
429
|
+
return json.loads(raw.decode("utf-8", "replace")), ""
|
|
430
|
+
except (ValueError, TypeError):
|
|
431
|
+
return None, "malformed JSON from resolver"
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
def _clusters_from_search(data: dict) -> tuple[ResolvedCluster, ...]:
|
|
435
|
+
"""Normalize a CourtListener /search/ response into clusters. Tolerant: a missing
|
|
436
|
+
field yields an empty/partial cluster, never a raise (the `ci_status` parse-defensive
|
|
437
|
+
stance). The search result carries `caseName`, `citation` (a list), and sometimes an
|
|
438
|
+
opinion `snippet`/`text`."""
|
|
439
|
+
results = data.get("results") if isinstance(data, dict) else None
|
|
440
|
+
if not isinstance(results, list):
|
|
441
|
+
return ()
|
|
442
|
+
out: list[ResolvedCluster] = []
|
|
443
|
+
for r in results:
|
|
444
|
+
if not isinstance(r, dict):
|
|
445
|
+
continue
|
|
446
|
+
cites = r.get("citation") or []
|
|
447
|
+
if not isinstance(cites, list):
|
|
448
|
+
cites = []
|
|
449
|
+
# Opinion body, when the search result carries it (varies by endpoint version).
|
|
450
|
+
text = ""
|
|
451
|
+
for k in ("snippet", "text", "plain_text"):
|
|
452
|
+
v = r.get(k)
|
|
453
|
+
if isinstance(v, str) and v.strip():
|
|
454
|
+
text = v
|
|
455
|
+
break
|
|
456
|
+
# Some shapes nest opinions; pull their snippets too.
|
|
457
|
+
for op in (r.get("opinions") or []) if isinstance(r.get("opinions"), list) else []:
|
|
458
|
+
if isinstance(op, dict):
|
|
459
|
+
for k in ("snippet", "text"):
|
|
460
|
+
v = op.get(k)
|
|
461
|
+
if isinstance(v, str) and v.strip():
|
|
462
|
+
text = (text + "\n" + v) if text else v
|
|
463
|
+
out.append(ResolvedCluster(
|
|
464
|
+
name=str(r.get("caseName") or "").strip(),
|
|
465
|
+
citations=tuple(str(c).strip() for c in cites if str(c).strip()),
|
|
466
|
+
opinion_text=text,
|
|
467
|
+
))
|
|
468
|
+
return tuple(out)
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
def _clusters_from_lookup(data: "dict | list") -> tuple[ResolvedCluster, ...]:
|
|
472
|
+
"""Normalize a CourtListener /citation-lookup/ response. The lookup returns a list of
|
|
473
|
+
per-citation results; a `status == 200` entry carries `clusters` (each with
|
|
474
|
+
`case_name` + `citations`). A `status` of 404 means the cite did not resolve → no
|
|
475
|
+
clusters. Tolerant of shape drift."""
|
|
476
|
+
entries = data if isinstance(data, list) else (data.get("results") if isinstance(data, dict) else None)
|
|
477
|
+
if not isinstance(entries, list):
|
|
478
|
+
return ()
|
|
479
|
+
out: list[ResolvedCluster] = []
|
|
480
|
+
for e in entries:
|
|
481
|
+
if not isinstance(e, dict):
|
|
482
|
+
continue
|
|
483
|
+
if e.get("status") not in (200, "200", None):
|
|
484
|
+
continue
|
|
485
|
+
for cl in (e.get("clusters") or []) if isinstance(e.get("clusters"), list) else []:
|
|
486
|
+
if not isinstance(cl, dict):
|
|
487
|
+
continue
|
|
488
|
+
cites = cl.get("citations") or []
|
|
489
|
+
norm_cites: list[str] = []
|
|
490
|
+
for c in cites if isinstance(cites, list) else []:
|
|
491
|
+
if isinstance(c, str):
|
|
492
|
+
norm_cites.append(c.strip())
|
|
493
|
+
elif isinstance(c, dict): # {volume, reporter, page}
|
|
494
|
+
vol, rep, pg = c.get("volume"), c.get("reporter"), c.get("page")
|
|
495
|
+
if rep and vol and pg:
|
|
496
|
+
norm_cites.append(f"{vol} {rep} {pg}")
|
|
497
|
+
out.append(ResolvedCluster(
|
|
498
|
+
name=str(cl.get("case_name") or cl.get("caseName") or "").strip(),
|
|
499
|
+
citations=tuple(c for c in norm_cites if c),
|
|
500
|
+
))
|
|
501
|
+
return tuple(out)
|
|
502
|
+
|
|
503
|
+
|
|
504
|
+
def gather(
|
|
505
|
+
cite: str,
|
|
506
|
+
*,
|
|
507
|
+
claimed_name: str = "",
|
|
508
|
+
quote: str = "",
|
|
509
|
+
base: str = DEFAULT_BASE,
|
|
510
|
+
token: str = "",
|
|
511
|
+
) -> CitationEvidence:
|
|
512
|
+
"""Resolve `cite` against CourtListener. Boundary I/O — the ONLY network path.
|
|
513
|
+
|
|
514
|
+
Prefers the purpose-built `/citation-lookup/` endpoint when a token is given (the
|
|
515
|
+
reliable resolver); falls back to the unauthenticated `/search/` exact-citation-array
|
|
516
|
+
match otherwise (the docs/279 §2 fitness note: noisier, so the headline benchmark
|
|
517
|
+
number uses the frozen sample, not this). NEVER raises — every failure degrades to an
|
|
518
|
+
unreachable `CitationEvidence`, which `classify()` maps to ABSTAIN, never a fabricated
|
|
519
|
+
RESOLVED."""
|
|
520
|
+
if not (cite or "").strip():
|
|
521
|
+
return CitationEvidence(cite="", claimed_name=claimed_name, quote=quote,
|
|
522
|
+
reachable=False, detail="no citation string given")
|
|
523
|
+
|
|
524
|
+
token = token or os.environ.get(_TOKEN_ENV, "")
|
|
525
|
+
if token:
|
|
526
|
+
# The purpose-built normalized resolver (POST text → parsed cites + clusters).
|
|
527
|
+
url = f"{base.rstrip('/')}/api/rest/v4/citation-lookup/"
|
|
528
|
+
data, err = _http_post_form(url, {"text": cite}, token=token)
|
|
529
|
+
if data is not None:
|
|
530
|
+
clusters = _clusters_from_lookup(data)
|
|
531
|
+
return CitationEvidence(cite=cite, claimed_name=claimed_name, quote=quote,
|
|
532
|
+
clusters=clusters, reachable=True,
|
|
533
|
+
detail="via /citation-lookup/ (token)")
|
|
534
|
+
# Token path failed — fall through to the search rung (it may still answer).
|
|
535
|
+
detail_prefix = f"citation-lookup failed ({err}); fell back to search — "
|
|
536
|
+
else:
|
|
537
|
+
detail_prefix = ""
|
|
538
|
+
|
|
539
|
+
# Unauthenticated /search/ rung: phrase-quote the cite, opinions only.
|
|
540
|
+
q = urllib.parse.urlencode({"q": f'"{cite}"', "type": "o"})
|
|
541
|
+
url = f"{base.rstrip('/')}/api/rest/v4/search/?{q}"
|
|
542
|
+
data, err = _http_get_json(url, token=token)
|
|
543
|
+
if data is None:
|
|
544
|
+
return CitationEvidence(cite=cite, claimed_name=claimed_name, quote=quote,
|
|
545
|
+
reachable=False, detail=detail_prefix + err)
|
|
546
|
+
clusters = _clusters_from_search(data)
|
|
547
|
+
return CitationEvidence(cite=cite, claimed_name=claimed_name, quote=quote,
|
|
548
|
+
clusters=clusters, reachable=True,
|
|
549
|
+
detail=detail_prefix + "via /search/ (unauthenticated)")
|
|
550
|
+
|
|
551
|
+
|
|
552
|
+
def _http_post_form(url: str, fields: dict, *, token: str = "") -> "tuple[Optional[dict], str]":
|
|
553
|
+
"""POST form fields → (parsed-json, "") | (None, err). NEVER raises (the GET twin,
|
|
554
|
+
for the token-only /citation-lookup/ endpoint which takes POSTed text)."""
|
|
555
|
+
headers = {"User-Agent": "dos-citation-resolve/0.1", "Content-Type": "application/x-www-form-urlencoded"}
|
|
556
|
+
if token:
|
|
557
|
+
headers["Authorization"] = f"Token {token}"
|
|
558
|
+
body = urllib.parse.urlencode(fields).encode("utf-8")
|
|
559
|
+
req = urllib.request.Request(url, data=body, headers=headers, method="POST")
|
|
560
|
+
try:
|
|
561
|
+
with urllib.request.urlopen(req, timeout=_HTTP_TIMEOUT_S) as r:
|
|
562
|
+
raw = r.read()
|
|
563
|
+
except urllib.error.HTTPError as e:
|
|
564
|
+
if e.code == 429:
|
|
565
|
+
return None, "rate-limited (HTTP 429)"
|
|
566
|
+
if e.code in (401, 403):
|
|
567
|
+
return None, f"auth failure (HTTP {e.code})"
|
|
568
|
+
return None, f"HTTP {e.code}"
|
|
569
|
+
except urllib.error.URLError as e:
|
|
570
|
+
return None, f"network error ({getattr(e, 'reason', e)!r})"
|
|
571
|
+
except (TimeoutError, OSError) as e:
|
|
572
|
+
return None, f"network error ({e.__class__.__name__})"
|
|
573
|
+
try:
|
|
574
|
+
return json.loads(raw.decode("utf-8", "replace")), ""
|
|
575
|
+
except (ValueError, TypeError):
|
|
576
|
+
return None, "malformed JSON from resolver"
|
|
577
|
+
|
|
578
|
+
|
|
579
|
+
def resolve(
|
|
580
|
+
cite: str,
|
|
581
|
+
*,
|
|
582
|
+
claimed_name: str = "",
|
|
583
|
+
quote: str = "",
|
|
584
|
+
base: str = DEFAULT_BASE,
|
|
585
|
+
token: str = "",
|
|
586
|
+
policy: CitationPolicy = DEFAULT_POLICY,
|
|
587
|
+
) -> CitationVerdict:
|
|
588
|
+
"""Convenience: gather + classify in one call (the wired entry point). Kept thin so
|
|
589
|
+
the reader and the verdict stay independently testable on frozen fixtures."""
|
|
590
|
+
return classify(
|
|
591
|
+
gather(cite, claimed_name=claimed_name, quote=quote, base=base, token=token),
|
|
592
|
+
policy,
|
|
593
|
+
)
|
|
594
|
+
|
|
595
|
+
|
|
596
|
+
# ---------------------------------------------------------------------------
|
|
597
|
+
# The EvidenceSource face — the `dos.evidence_sources` entry-point occupant (docs/265).
|
|
598
|
+
# The subject is the citation, optionally "<cite> || <claimed_name> || <quote>" so one
|
|
599
|
+
# string carries all three operands through the generic seam.
|
|
600
|
+
# ---------------------------------------------------------------------------
|
|
601
|
+
|
|
602
|
+
_SUBJECT_SEP = "||"
|
|
603
|
+
|
|
604
|
+
|
|
605
|
+
class CitationResolveSource:
|
|
606
|
+
"""An `evidence.EvidenceSource` over the legal-citation resolver. `THIRD_PARTY`-tagged.
|
|
607
|
+
|
|
608
|
+
The `subject` IS the citation, optionally packing the claimed name + quote as
|
|
609
|
+
"<cite> || <name> || <quote>" (the seam's subject is one opaque string; this is the
|
|
610
|
+
source's chosen encoding). `gather` runs `resolve(...)` at the boundary and maps the
|
|
611
|
+
typed verdict to `EvidenceFacts`:
|
|
612
|
+
|
|
613
|
+
* RESOLVED_MATCH → **ATTESTED** (a third-party reporter carries the cite + the
|
|
614
|
+
quote matches — bytes the agent did not author)
|
|
615
|
+
* RESOLVED_MISMATCH → **REFUTED** (resolves, but the quote is fabricated)
|
|
616
|
+
* UNRESOLVED → **REFUTED** (no reporter carries it — the Mata fabrication;
|
|
617
|
+
a positive disconfirmation, stronger than "no signal")
|
|
618
|
+
* ABSTAIN → **NO_SIGNAL** (no corpus access — never a fabricated verdict)
|
|
619
|
+
|
|
620
|
+
`accountability` is CLASS-LEVEL and fixed `THIRD_PARTY`: a reporter's citation index
|
|
621
|
+
is infrastructure the agent does not control. So a RESOLVED_MATCH IS eligible to grant
|
|
622
|
+
belief under `believe_under_floor` — and, crucially, an UNRESOLVED is a non-forgeable
|
|
623
|
+
REFUTED that can REDDEN a verify of "I cited this case." Never raises —
|
|
624
|
+
`gather_evidence` wraps it fail-safe and `resolve` degrades every provider failure to
|
|
625
|
+
ABSTAIN on its own. `config` is accepted for Protocol conformance.
|
|
626
|
+
"""
|
|
627
|
+
|
|
628
|
+
name = "citation_resolve"
|
|
629
|
+
accountability = Accountability.THIRD_PARTY
|
|
630
|
+
|
|
631
|
+
def __init__(self, *, base: str = DEFAULT_BASE, token: str = "",
|
|
632
|
+
policy: CitationPolicy = DEFAULT_POLICY) -> None:
|
|
633
|
+
self._base = base
|
|
634
|
+
self._token = token
|
|
635
|
+
self._policy = policy
|
|
636
|
+
|
|
637
|
+
def gather(self, subject: str, config: object) -> EvidenceFacts:
|
|
638
|
+
cite, claimed_name, quote = self._unpack(subject)
|
|
639
|
+
if not cite:
|
|
640
|
+
return EvidenceFacts.no_signal(
|
|
641
|
+
self.name, self.accountability, subject,
|
|
642
|
+
detail="no citation in subject — nothing to resolve",
|
|
643
|
+
)
|
|
644
|
+
v = resolve(cite, claimed_name=claimed_name, quote=quote,
|
|
645
|
+
base=self._base, token=self._token, policy=self._policy)
|
|
646
|
+
if v.verdict is Citation.RESOLVED_MATCH:
|
|
647
|
+
return EvidenceFacts.attest(self.name, self.accountability, cite, detail=v.reason)
|
|
648
|
+
if v.verdict in (Citation.UNRESOLVED, Citation.RESOLVED_MISMATCH):
|
|
649
|
+
return EvidenceFacts.refute(self.name, self.accountability, cite, detail=v.reason)
|
|
650
|
+
# ABSTAIN — no corpus access. The honest floor; never a fabricated read.
|
|
651
|
+
return EvidenceFacts.no_signal(self.name, self.accountability, cite, detail=v.reason)
|
|
652
|
+
|
|
653
|
+
@staticmethod
|
|
654
|
+
def _unpack(subject: str) -> tuple[str, str, str]:
|
|
655
|
+
parts = [p.strip() for p in (subject or "").split(_SUBJECT_SEP)]
|
|
656
|
+
cite = parts[0] if parts else ""
|
|
657
|
+
name = parts[1] if len(parts) > 1 else ""
|
|
658
|
+
quote = parts[2] if len(parts) > 2 else ""
|
|
659
|
+
return cite, name, quote
|
|
660
|
+
|
|
661
|
+
|
|
662
|
+
# ---------------------------------------------------------------------------
|
|
663
|
+
# CLI — `python -m dos.drivers.citation_resolve "<cite>" [--name N] [--quote Q]`.
|
|
664
|
+
# ---------------------------------------------------------------------------
|
|
665
|
+
|
|
666
|
+
|
|
667
|
+
def main(argv: list[str] | None = None) -> int:
|
|
668
|
+
ap = argparse.ArgumentParser(
|
|
669
|
+
prog="dos.drivers.citation_resolve",
|
|
670
|
+
description=__doc__.splitlines()[0],
|
|
671
|
+
)
|
|
672
|
+
ap.add_argument("cite", help='the reporter citation, e.g. "925 F.3d 1339"')
|
|
673
|
+
ap.add_argument("--name", default="", help="the case name as claimed (collision guard)")
|
|
674
|
+
ap.add_argument("--quote", default="", help="the quoted holding to check against the opinion")
|
|
675
|
+
ap.add_argument("--base", default=DEFAULT_BASE, help=f"CourtListener base (default: {DEFAULT_BASE})")
|
|
676
|
+
ap.add_argument("--token", default="", help=f"API token (or set ${_TOKEN_ENV})")
|
|
677
|
+
ap.add_argument("--json", action="store_true", help="machine-readable verdict")
|
|
678
|
+
args = ap.parse_args(argv)
|
|
679
|
+
|
|
680
|
+
verdict = resolve(args.cite, claimed_name=args.name, quote=args.quote,
|
|
681
|
+
base=args.base, token=args.token)
|
|
682
|
+
if args.json:
|
|
683
|
+
print(json.dumps(verdict.to_dict(), indent=2, default=str))
|
|
684
|
+
else:
|
|
685
|
+
print(f"CITE {verdict.evidence.cite}")
|
|
686
|
+
if verdict.evidence.claimed_name:
|
|
687
|
+
print(f"CLAIMED {verdict.evidence.claimed_name}")
|
|
688
|
+
print(f"VERDICT {verdict.verdict.value}")
|
|
689
|
+
print(f"WHY {verdict.reason}")
|
|
690
|
+
|
|
691
|
+
# Exit map: a clean resolve-and-match is 0; everything that is not is non-zero so a
|
|
692
|
+
# gate can `&&` on it. MISMATCH/UNRESOLVED = 1 (a caught fabrication/mis-quote),
|
|
693
|
+
# ABSTAIN = 3 (could not tell — a human's call), mirroring `dos verify` / `ci_status`.
|
|
694
|
+
return {
|
|
695
|
+
Citation.RESOLVED_MATCH: 0,
|
|
696
|
+
Citation.RESOLVED_MISMATCH: 1,
|
|
697
|
+
Citation.UNRESOLVED: 1,
|
|
698
|
+
Citation.ABSTAIN: 3,
|
|
699
|
+
}[verdict.verdict]
|
|
700
|
+
|
|
701
|
+
|
|
702
|
+
if __name__ == "__main__":
|
|
703
|
+
raise SystemExit(main())
|