eyecite-ts 0.29.1 → 0.30.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -0
- package/dist/annotate/index.d.cts +1 -1
- package/dist/annotate/index.d.mts +1 -1
- package/dist/{citation-CsWsvkxB.d.cts → citation-Dx71V5wD.d.mts} +33 -4
- package/dist/citation-Dx71V5wD.d.mts.map +1 -0
- package/dist/{citation-Dy_8OOzV.d.mts → citation-t5DPIzyE.d.cts} +33 -4
- package/dist/citation-t5DPIzyE.d.cts.map +1 -0
- package/dist/index.cjs +7 -7
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +114 -11
- package/dist/index.d.cts.map +1 -1
- package/dist/index.d.mts +114 -11
- package/dist/index.d.mts.map +1 -1
- package/dist/index.mjs +7 -7
- package/dist/index.mjs.map +1 -1
- package/dist/{types-BS607EPW.d.mts → types-BKg6EJIz.d.mts} +2 -2
- package/dist/{types-BS607EPW.d.mts.map → types-BKg6EJIz.d.mts.map} +1 -1
- package/dist/{types-DXnfQIbk.d.cts → types-D961u5ik.d.cts} +2 -2
- package/dist/{types-DXnfQIbk.d.cts.map → types-D961u5ik.d.cts.map} +1 -1
- package/dist/utils/index.cjs +1 -1
- package/dist/utils/index.cjs.map +1 -1
- package/dist/utils/index.d.cts +64 -3
- package/dist/utils/index.d.cts.map +1 -1
- package/dist/utils/index.d.mts +64 -3
- package/dist/utils/index.d.mts.map +1 -1
- package/dist/utils/index.mjs +1 -1
- package/dist/utils/index.mjs.map +1 -1
- package/package.json +6 -5
- package/dist/citation-CsWsvkxB.d.cts.map +0 -1
- package/dist/citation-Dy_8OOzV.d.mts.map +0 -1
package/dist/index.d.cts
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
|
-
import { A as
|
|
2
|
-
import { a as FootnoteMap, i as ScopeStrategy, n as ResolutionResult, o as FootnoteZone, r as ResolvedCitation, t as ResolutionOptions } from "./types-
|
|
1
|
+
import { A as StatutesAtLargeCitation, B as JournalComponentSpans, C as PublicLawCitation, D as ShortFormCitation, E as ShortFormCaseCitation, F as AnnotationComponentSpans, G as StatutesAtLargeComponentSpans, H as PublicLawComponentSpans, I as CaseComponentSpans, J as TransformationMap, K as TreatiseComponentSpans, L as ConstitutionalComponentSpans, M as SupraCitation, N as TreatiseCitation, O as ShortFormCitationType, P as Warning, R as FederalRegisterComponentSpans, S as ParentheticalType, T as RestatementCitation, U as RestatementComponentSpans, V as NeutralComponentSpans, W as StatuteComponentSpans, X as PinciteInfo, Y as spanFromGroupIndex, Z as parsePincite, _ as HistorySignal, a as CitationOfType, b as NeutralCitation, c as ConstitutionalCitation, d as ExtractorMap, f as FederalRegisterCitation, g as FullCitationType, h as FullCitation, i as CitationId, j as SubsequentHistoryEntry, k as StatuteCitation, l as CourtInference, m as FullCaseCitation, n as Citation, o as CitationSignal, p as FederalRuleCitation, q as Span, r as CitationBase, s as CitationType, t as AnnotationCitation, u as DocketCitation, v as IdCitation, w as RegulationCitation, x as Parenthetical, y as JournalCitation, z as FederalRuleComponentSpans } from "./citation-t5DPIzyE.cjs";
|
|
2
|
+
import { a as FootnoteMap, i as ScopeStrategy, n as ResolutionResult, o as FootnoteZone, r as ResolvedCitation, t as ResolutionOptions } from "./types-D961u5ik.cjs";
|
|
3
3
|
|
|
4
4
|
//#region src/types/guards.d.ts
|
|
5
5
|
/**
|
|
6
|
-
* Type guard: narrows Citation to a full citation
|
|
7
|
-
*
|
|
8
|
-
*
|
|
6
|
+
* Type guard: narrows Citation to a full citation — any member of
|
|
7
|
+
* `FullCitationType`. Membership derives from `FULL_CITATION_TYPES` (the single
|
|
8
|
+
* source), so it can never silently omit a full type the way it once dropped
|
|
9
|
+
* `regulation` / `stateRule` (#843).
|
|
9
10
|
*/
|
|
10
11
|
declare function isFullCitation(citation: Citation): citation is FullCitation;
|
|
11
12
|
/**
|
|
@@ -68,6 +69,21 @@ interface ExtractOptions {
|
|
|
68
69
|
*/
|
|
69
70
|
cleaners?: Array<(text: string) => string>;
|
|
70
71
|
/**
|
|
72
|
+
* Extra cleaners appended AFTER the effective base chain (the defaults, or a
|
|
73
|
+
* custom `cleaners` array). Unlike `cleaners` — which REPLACES the defaults —
|
|
74
|
+
* `additionalCleaners` keeps them, so you can add e.g. `stripMarkdownEmphasis`
|
|
75
|
+
* without silently dropping HTML stripping, whitespace/Unicode normalization,
|
|
76
|
+
* smart-quote fixing, reporter spacing, etc. (#835)
|
|
77
|
+
*
|
|
78
|
+
* @example
|
|
79
|
+
* ```typescript
|
|
80
|
+
* import { extractCitations, stripMarkdownEmphasis } from "eyecite-ts"
|
|
81
|
+
* // markdown legal text: keep all defaults, also strip *emphasis*
|
|
82
|
+
* extractCitations(markdownText, { additionalCleaners: [stripMarkdownEmphasis] })
|
|
83
|
+
* ```
|
|
84
|
+
*/
|
|
85
|
+
additionalCleaners?: Array<(text: string) => string>;
|
|
86
|
+
/**
|
|
71
87
|
* Custom regex patterns (overrides defaults).
|
|
72
88
|
*
|
|
73
89
|
* If provided, these patterns replace the default pattern set:
|
|
@@ -226,6 +242,15 @@ declare function extractCitationsAsync(text: string, options: ExtractOptions & {
|
|
|
226
242
|
}): Promise<ResolvedCitation[]>;
|
|
227
243
|
declare function extractCitationsAsync(text: string, options?: ExtractOptions): Promise<Citation[]>;
|
|
228
244
|
//#endregion
|
|
245
|
+
//#region src/extract/assignCitationIds.d.ts
|
|
246
|
+
/**
|
|
247
|
+
* Build a lookup from {@link CitationId} to its citation. Citations without an
|
|
248
|
+
* id (e.g. produced by the granular per-type extractors rather than by
|
|
249
|
+
* `extractCitations()`) are skipped. The map keys by stable id, so it keeps
|
|
250
|
+
* working after the caller has `filter`/`sort`/`map`-ed the array.
|
|
251
|
+
*/
|
|
252
|
+
declare function byId(citations: Citation[]): Map<CitationId, Citation>;
|
|
253
|
+
//#endregion
|
|
229
254
|
//#region src/extract/filterFalsePositives.d.ts
|
|
230
255
|
/**
|
|
231
256
|
* Apply false positive filters to extracted citations.
|
|
@@ -248,6 +273,31 @@ declare function extractCitationsAsync(text: string, options?: ExtractOptions):
|
|
|
248
273
|
*/
|
|
249
274
|
declare function applyFalsePositiveFilters(citations: Citation[], remove: boolean, originalText?: string): Citation[];
|
|
250
275
|
//#endregion
|
|
276
|
+
//#region src/clean/cleaners.d.ts
|
|
277
|
+
/**
|
|
278
|
+
* Strip markdown emphasis markers (`*italic*`, `**bold**`, `***both***`) while
|
|
279
|
+
* keeping the emphasized text. For markdown legal text (e.g. LLM-drafted briefs
|
|
280
|
+
* where case names are emphasized: `*Leon v. Martinez*`), whose asterisks
|
|
281
|
+
* otherwise break case-name capture / `fullSpan`.
|
|
282
|
+
*
|
|
283
|
+
* Deliberately conservative so it never corrupts citations:
|
|
284
|
+
* - Star-pagination pincites are preserved (`at *3` \u2014 an asterisk followed by a
|
|
285
|
+
* digit, or with no closing `*`, never matches).
|
|
286
|
+
* - Underscores are never touched (`_x_`, and blank locators like `[____]`).
|
|
287
|
+
* - Lone or space-flanked asterisks (`a * b`) and backslash-escaped pairs
|
|
288
|
+
* (`\*x\*`) are left alone.
|
|
289
|
+
*
|
|
290
|
+
* NOT in the default pipeline \u2014 pass it via `additionalCleaners` (recommended;
|
|
291
|
+
* keeps the defaults) or `cleaners`. It changes length; position tracking is
|
|
292
|
+
* handled by `cleanText`'s TransformationMap. Each pattern uses a single bounded
|
|
293
|
+
* quantifier (ReDoS-safe). (#835)
|
|
294
|
+
*
|
|
295
|
+
* @example
|
|
296
|
+
* stripMarkdownEmphasis("see *Leon v. Martinez*, 84 N.Y.2d 83")
|
|
297
|
+
* // => "see Leon v. Martinez, 84 N.Y.2d 83"
|
|
298
|
+
*/
|
|
299
|
+
declare function stripMarkdownEmphasis(text: string): string;
|
|
300
|
+
//#endregion
|
|
251
301
|
//#region src/clean/cleanText.d.ts
|
|
252
302
|
/**
|
|
253
303
|
* Result of text cleaning operation.
|
|
@@ -268,7 +318,8 @@ interface CleanTextResult {
|
|
|
268
318
|
* cleaned text while reporting positions in the original text.
|
|
269
319
|
*
|
|
270
320
|
* @param original - Original input text
|
|
271
|
-
* @param cleaners - Array of cleaner functions to apply (default: stripHtmlTags, decodeHtmlEntities, normalizeWhitespace, normalizeUnicode, normalizeDashes, fixSmartQuotes, normalizeTypography, normalizeReporterSpacing)
|
|
321
|
+
* @param cleaners - Array of cleaner functions to apply (default: stripHtmlTags, decodeHtmlEntities, normalizeWhitespace, normalizeUnicode, normalizeDashes, fixSmartQuotes, normalizeTypography, normalizeReporterSpacing). Passing a custom array REPLACES the defaults.
|
|
322
|
+
* @param additionalCleaners - Cleaners appended AFTER the effective base chain (the defaults, or a custom `cleaners` array). Use this to add a cleaner — e.g. `stripMarkdownEmphasis` — without dropping the defaults (#835).
|
|
272
323
|
* @returns Cleaned text with position mappings and warnings
|
|
273
324
|
*
|
|
274
325
|
* @example
|
|
@@ -276,7 +327,7 @@ interface CleanTextResult {
|
|
|
276
327
|
* // result.cleaned: "Smith v. Doe, 500 F.2d 123"
|
|
277
328
|
* // result.transformationMap tracks position shifts from HTML removal
|
|
278
329
|
*/
|
|
279
|
-
declare function cleanText(original: string, cleaners?: Array<(text: string) => string>): CleanTextResult;
|
|
330
|
+
declare function cleanText(original: string, cleaners?: Array<(text: string) => string>, additionalCleaners?: Array<(text: string) => string>): CleanTextResult;
|
|
280
331
|
//#endregion
|
|
281
332
|
//#region src/tokenize/tokenizer.d.ts
|
|
282
333
|
/**
|
|
@@ -624,13 +675,26 @@ declare function normalizeCourt(court: string | undefined): string | undefined;
|
|
|
624
675
|
*/
|
|
625
676
|
declare class DocumentResolver {
|
|
626
677
|
private readonly citations;
|
|
678
|
+
/** Original document text — used for original-coordinate reads (quote zones,
|
|
679
|
+
* paragraph boundaries, party-name lookback) that locate citations via
|
|
680
|
+
* `span.originalStart`. */
|
|
627
681
|
private readonly text;
|
|
682
|
+
/** Cleaned document text — used for clean-coordinate reads (bracket scopes,
|
|
683
|
+
* trigger-anchored asides, family/name windows) that index by
|
|
684
|
+
* `span.cleanStart`/`cleanEnd`. Equals `text` when no length-changing cleaner
|
|
685
|
+
* ran; diverges from it otherwise, which is the #830 desync this fixes. */
|
|
686
|
+
private readonly cleanedText;
|
|
687
|
+
/** Maps clean→original offsets so derived output spans carry correct original
|
|
688
|
+
* coordinates even when a cleaner transformed the text (#830). */
|
|
689
|
+
private readonly transformationMap?;
|
|
628
690
|
private readonly options;
|
|
629
691
|
private readonly context;
|
|
630
692
|
private readonly partyNameTree;
|
|
631
693
|
private readonly quoteZones;
|
|
632
694
|
/** Parenthesis depth at each citation's start (filled lazily by resolve()). */
|
|
633
695
|
private parenDepths;
|
|
696
|
+
/** Per-citation bracket-balance trust (#809/#820); false = structure unreliable. */
|
|
697
|
+
private balanceOks;
|
|
634
698
|
/** Resolution results accumulated during the in-flight resolve() pass. */
|
|
635
699
|
private resolutions;
|
|
636
700
|
/** Resolved citations accumulated during the in-flight resolve() pass; used
|
|
@@ -640,10 +704,17 @@ declare class DocumentResolver {
|
|
|
640
704
|
* Creates a new DocumentResolver.
|
|
641
705
|
*
|
|
642
706
|
* @param citations - All citations in document (in order of appearance)
|
|
643
|
-
* @param text - Original document text
|
|
707
|
+
* @param text - Original document text (original-coordinate reads index into it)
|
|
644
708
|
* @param options - Resolution options
|
|
709
|
+
* @param cleanContext - Cleaned text + transformation map for clean-coordinate
|
|
710
|
+
* reads (#830). When omitted, `text` is treated as the cleaned text too,
|
|
711
|
+
* preserving the historical clean==original behavior for callers that pass
|
|
712
|
+
* only one text (e.g. text untouched by a length-changing cleaner).
|
|
645
713
|
*/
|
|
646
|
-
constructor(citations: Citation[], text: string, options?: ResolutionOptions
|
|
714
|
+
constructor(citations: Citation[], text: string, options?: ResolutionOptions, cleanContext?: {
|
|
715
|
+
cleanedText: string;
|
|
716
|
+
transformationMap: TransformationMap;
|
|
717
|
+
});
|
|
647
718
|
/**
|
|
648
719
|
* Resolves all citations in the document.
|
|
649
720
|
*
|
|
@@ -753,6 +824,22 @@ declare class DocumentResolver {
|
|
|
753
824
|
*/
|
|
754
825
|
private isParentheticalChild;
|
|
755
826
|
/**
|
|
827
|
+
* Whether the citation's clean span is wholly inside a previously-resolved
|
|
828
|
+
* citation's `fullSpan` (the #214 strategy-2 signal). Independent of the
|
|
829
|
+
* fragile bracket-depth count, so it stays a *hard* exclusion even when
|
|
830
|
+
* balance fails (#820).
|
|
831
|
+
*/
|
|
832
|
+
private isFullSpanContained;
|
|
833
|
+
/**
|
|
834
|
+
* #820: the citation's paren-child status rests ONLY on the (possibly
|
|
835
|
+
* desynced) bracket depth, in a clause whose brackets did not balance — so the
|
|
836
|
+
* "nested" verdict is untrustworthy. Trigger-anchored asides and
|
|
837
|
+
* fullSpan-contained cites are reliable (independent of the depth count) and
|
|
838
|
+
* stay hard exclusions; only the depth-only case under balance failure is
|
|
839
|
+
* soft, which `resolveId` keeps as a degraded-confidence candidate.
|
|
840
|
+
*/
|
|
841
|
+
private isUntrustworthyDepthAside;
|
|
842
|
+
/**
|
|
756
843
|
* Scans the prose between the previous citation and `Id.` for a case-name
|
|
757
844
|
* mention. If a name is found and doesn't match the picked antecedent's
|
|
758
845
|
* caseName/plaintiff/defendant, returns a downgraded confidence and an
|
|
@@ -772,6 +859,15 @@ declare class DocumentResolver {
|
|
|
772
859
|
* threshold-derived `maxDistance` to recover it.
|
|
773
860
|
*/
|
|
774
861
|
private resolveSupra;
|
|
862
|
+
/**
|
|
863
|
+
* Apply the #818 hybrid policy to the in-scope authorities a `supra` name key
|
|
864
|
+
* resolved to. Exactly one → commit at the matched similarity. More than one is
|
|
865
|
+
* a non-unique key: a *true tie* (same name + same year, indistinguishable by
|
|
866
|
+
* the key alone) abstains; otherwise pick the most-recent-within-name but cap
|
|
867
|
+
* confidence and warn, with `idConfidenceFloor` able to fail it closed. Mirrors
|
|
868
|
+
* the `Id.` path's downgrade-warn-then-floor-abstain shape (#800/#820).
|
|
869
|
+
*/
|
|
870
|
+
private selectSupraAntecedent;
|
|
775
871
|
private createSupraSuccess;
|
|
776
872
|
/**
|
|
777
873
|
* Match a full-caption supra (`Plaintiff v. Defendant, supra`) against a
|
|
@@ -876,9 +972,16 @@ declare class DocumentResolver {
|
|
|
876
972
|
* @param citations - Extracted citations in order of appearance
|
|
877
973
|
* @param text - Original document text
|
|
878
974
|
* @param options - Resolution options
|
|
975
|
+
* @param cleanContext - Optional cleaned text + transformation map. Pass when a
|
|
976
|
+
* length-changing cleaner was applied so clean-coordinate reads index the
|
|
977
|
+
* cleaned text and derived spans map back to original coordinates (#830). When
|
|
978
|
+
* omitted, `text` is treated as the cleaned text too (clean == original).
|
|
879
979
|
* @returns Citations with resolution metadata
|
|
880
980
|
*/
|
|
881
|
-
declare function resolveCitations(citations: Citation[], text: string, options?: ResolutionOptions
|
|
981
|
+
declare function resolveCitations(citations: Citation[], text: string, options?: ResolutionOptions, cleanContext?: {
|
|
982
|
+
cleanedText: string;
|
|
983
|
+
transformationMap: TransformationMap;
|
|
984
|
+
}): ResolvedCitation[];
|
|
882
985
|
//#endregion
|
|
883
986
|
//#region src/footnotes/detectFootnotes.d.ts
|
|
884
987
|
/**
|
|
@@ -1041,5 +1144,5 @@ declare function analyzeDocument(text: string, citations: Citation[], opts?: {
|
|
|
1041
1144
|
transformationMap?: TransformationMap;
|
|
1042
1145
|
}): Document;
|
|
1043
1146
|
//#endregion
|
|
1044
|
-
export { type AnalyzedFootnoteZone, type AnnotationCitation, type AnnotationComponentSpans, type AttributionKind, type CaseComponentSpans, type Citation, type CitationBase, type CitationGraph, type CitationOfType, type CitationSignal, type CitationType, type CleanTextResult, type ConstitutionalCitation, type ConstitutionalComponentSpans, type CourtInference, type DocketCitation, type Document, DocumentResolver, type Edge, type ExtractOptions, type ExtractorMap, type FederalRegisterCitation, type FederalRegisterComponentSpans, type FederalRuleCitation, type FederalRuleComponentSpans, type FootnoteMap, type FootnoteZone, type FullCaseCitation, type FullCitation, type FullCitationType, type HistorySignal, type IdCitation, type JournalCitation, type JournalComponentSpans, type NeutralCitation, type NeutralComponentSpans, type Parenthetical, type ParentheticalType, type PinciteInfo, type PublicLawCitation, type PublicLawComponentSpans, type QuoteAttribution, type ResolutionOptions, type ResolutionResult, type ResolvedCitation, type RestatementCitation, type RestatementComponentSpans, type ScopeStrategy, type ShortFormCaseCitation, type ShortFormCitation, type ShortFormCitationType, type Span, type StatuteCitation, type StatuteComponentSpans, type StatutesAtLargeCitation, type StatutesAtLargeComponentSpans, type SubsequentHistoryEntry, type SupraCitation, type Token, type TransformationMap, type TreatiseCitation, type TreatiseComponentSpans, type Warning, analyzeDocument, applyFalsePositiveFilters, assertUnreachable, cleanText, detectFootnotes, extractAnnotation, extractCase, extractCitations, extractCitationsAsync, extractConstitutional, extractFederalRegister, extractFederalRule, extractJournal, extractNeutral, extractPublicLaw, extractRestatement, extractStatute, extractStatutesAtLarge, extractTreatise, isCaseCitation, isCitationType, isFullCitation, isShortFormCitation, normalizeCourt, parsePincite, resolveCitations, spanFromGroupIndex, tokenize };
|
|
1147
|
+
export { type AnalyzedFootnoteZone, type AnnotationCitation, type AnnotationComponentSpans, type AttributionKind, type CaseComponentSpans, type Citation, type CitationBase, type CitationGraph, type CitationId, type CitationOfType, type CitationSignal, type CitationType, type CleanTextResult, type ConstitutionalCitation, type ConstitutionalComponentSpans, type CourtInference, type DocketCitation, type Document, DocumentResolver, type Edge, type ExtractOptions, type ExtractorMap, type FederalRegisterCitation, type FederalRegisterComponentSpans, type FederalRuleCitation, type FederalRuleComponentSpans, type FootnoteMap, type FootnoteZone, type FullCaseCitation, type FullCitation, type FullCitationType, type HistorySignal, type IdCitation, type JournalCitation, type JournalComponentSpans, type NeutralCitation, type NeutralComponentSpans, type Parenthetical, type ParentheticalType, type PinciteInfo, type PublicLawCitation, type PublicLawComponentSpans, type QuoteAttribution, type ResolutionOptions, type ResolutionResult, type ResolvedCitation, type RestatementCitation, type RestatementComponentSpans, type ScopeStrategy, type ShortFormCaseCitation, type ShortFormCitation, type ShortFormCitationType, type Span, type StatuteCitation, type StatuteComponentSpans, type StatutesAtLargeCitation, type StatutesAtLargeComponentSpans, type SubsequentHistoryEntry, type SupraCitation, type Token, type TransformationMap, type TreatiseCitation, type TreatiseComponentSpans, type Warning, analyzeDocument, applyFalsePositiveFilters, assertUnreachable, byId, cleanText, detectFootnotes, extractAnnotation, extractCase, extractCitations, extractCitationsAsync, extractConstitutional, extractFederalRegister, extractFederalRule, extractJournal, extractNeutral, extractPublicLaw, extractRestatement, extractStatute, extractStatutesAtLarge, extractTreatise, isCaseCitation, isCitationType, isFullCitation, isShortFormCitation, normalizeCourt, parsePincite, resolveCitations, spanFromGroupIndex, stripMarkdownEmphasis, tokenize };
|
|
1045
1148
|
//# sourceMappingURL=index.d.cts.map
|
package/dist/index.d.cts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.cts","names":[],"sources":["../src/types/guards.ts","../src/patterns/casePatterns.ts","../src/extract/extractCitations.ts","../src/extract/filterFalsePositives.ts","../src/clean/cleanText.ts","../src/tokenize/tokenizer.ts","../src/extract/extractAnnotation.ts","../src/extract/extractCase.ts","../src/extract/extractConstitutional.ts","../src/extract/extractFederalRegister.ts","../src/extract/extractFederalRule.ts","../src/extract/extractJournal.ts","../src/extract/extractNeutral.ts","../src/extract/extractPublicLaw.ts","../src/extract/extractRestatement.ts","../src/extract/extractStatute.ts","../src/extract/extractStatutesAtLarge.ts","../src/extract/extractTreatise.ts","../src/extract/courtNormalization.ts","../src/resolve/DocumentResolver.ts","../src/resolve/index.ts","../src/footnotes/detectFootnotes.ts","../src/document/types.ts","../src/document/analyzer.ts"],"mappings":";;;;;;;
|
|
1
|
+
{"version":3,"file":"index.d.cts","names":[],"sources":["../src/types/guards.ts","../src/patterns/casePatterns.ts","../src/extract/extractCitations.ts","../src/extract/assignCitationIds.ts","../src/extract/filterFalsePositives.ts","../src/clean/cleaners.ts","../src/clean/cleanText.ts","../src/tokenize/tokenizer.ts","../src/extract/extractAnnotation.ts","../src/extract/extractCase.ts","../src/extract/extractConstitutional.ts","../src/extract/extractFederalRegister.ts","../src/extract/extractFederalRule.ts","../src/extract/extractJournal.ts","../src/extract/extractNeutral.ts","../src/extract/extractPublicLaw.ts","../src/extract/extractRestatement.ts","../src/extract/extractStatute.ts","../src/extract/extractStatutesAtLarge.ts","../src/extract/extractTreatise.ts","../src/extract/courtNormalization.ts","../src/resolve/DocumentResolver.ts","../src/resolve/index.ts","../src/footnotes/detectFootnotes.ts","../src/document/types.ts","../src/document/analyzer.ts"],"mappings":";;;;;;;AAyBA;;;iBAAgB,cAAA,CAAe,QAAA,EAAU,QAAA,GAAW,QAAA,IAAY,YAAA;;;;iBAOhD,mBAAA,CAAoB,QAAA,EAAU,QAAA,GAAW,QAAA,IAAY,iBAAA;;;AAArE;iBAOgB,cAAA,CAAe,QAAA,EAAU,QAAA,GAAW,QAAA,IAAY,gBAAA;;;;;iBAQhD,cAAA,WAAyB,YAAA,CAAA,CACvC,QAAA,EAAU,QAAA,EACV,IAAA,EAAM,CAAA,GACL,QAAA,IAAY,cAAA,CAAe,CAAA;;;;AAX9B;;;;;;;;;;AAQA;;;iBAuBgB,iBAAA,CAAkB,CAAA;;;UCtDjB,OAAA;EACf,EAAA;EACA,KAAA,EAAO,MAAA;EACP,WAAA;EACA,IAAA,EAAM,gBAAA;AAAA;;;;ADKR;;UEuDiB,cAAA;EFvD+C;;;;;;;AAOhE;;;;;;;EE+DE,QAAA,GAAW,KAAA,EAAO,IAAA;;;AFxDpB;;;;;;;;;;AAQA;;EEgEE,kBAAA,GAAqB,KAAA,EAAO,IAAA;;;;;;;;;;;;;;;EAgB5B,QAAA,GAAW,OAAA;;;;AFzDb;;;;;;;;ACtDA;;EC8HE,OAAA;ED1HM;;;;;;;;;;;;AC4DR;;EA8EE,iBAAA,GAAoB,iBAAA;;;;;;;;;;;;;;;;;EAkBpB,oBAAA;;EAGA,eAAA;AAAA;;AA6DF;;;;;;;;;;;AAIA;;;;;;;;;;AAmdA;;;;;;;;;;;;;;;;AAIA;;;;;;;;;;;;;;;;;;AClrBA;;iBDuNgB,gBAAA,CACd,IAAA,UACA,OAAA,EAAS,cAAA;EAAmB,OAAA;AAAA,IAC3B,gBAAA;AAAA,iBACa,gBAAA,CAAiB,IAAA,UAAc,OAAA,GAAU,cAAA,GAAiB,QAAA;;;;;;;;;;;;;;AEsW1E;;;;;;;;;;;;;;AC3EA;iBHwLsB,qBAAA,CACpB,IAAA,UACA,OAAA,EAAS,cAAA;EAAmB,OAAA;AAAA,IAC3B,OAAA,CAAQ,gBAAA;AAAA,iBACW,qBAAA,CACpB,IAAA,UACA,OAAA,GAAU,cAAA,GACT,OAAA,CAAQ,QAAA;;;;AF9qBX;;;;;iBGPgB,IAAA,CAAK,SAAA,EAAW,QAAA,KAAa,GAAA,CAAI,UAAA,EAAY,QAAA;;;;;;;;;;;AHc7D;;;;;;;;;;AAQA;iBI2iBgB,yBAAA,CACd,SAAA,EAAW,QAAA,IACX,MAAA,WACA,YAAA,YACC,QAAA;;;;;;;;;;;;;;;;AG3jBH;;;;;;;;;iBF4egB,qBAAA,CAAsB,IAAA;;;;;ALtftC;UMLiB,eAAA;;EAEf,OAAA;;EAGA,iBAAA,EAAmB,iBAAA;;EAGnB,QAAA,EAAU,OAAA;AAAA;;ANIZ;;;;;;;;;;AAOA;;;;;;iBMSgB,SAAA,CACd,QAAA,UACA,QAAA,GAAU,KAAA,EAAO,IAAA,sBAajB,kBAAA,GAAoB,KAAA,EAAO,IAAA,uBAC1B,eAAA;;;;;;;;;;UC7Bc,KAAA;EPID;EOFd,IAAA;EPE8D;EOC9D,IAAA,EAAM,IAAA,CAAK,IAAA;;EAGX,IAAA,EAAM,OAAA;;EAGN,SAAA;AAAA;APCF;;;;;;;;;;;;;;;;;;;;;AAuBA;;;;;;;;ACtDA;;;;;;;AD+BA,iBOsCgB,QAAA,CACd,WAAA,UACA,QAAA,GAAU,OAAA,KAST,KAAA;;;APhEH;;;;;;;AAAA,iBQPgB,iBAAA,CACd,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GAClB,kBAAA;;;;;ARWH;;;;;;;;;;AAQA;;;;;;;;;;;;;;;;;;;;;AAuBA;;;;;;;;ACtDA;;;;;;;;;;iBQiFgB,WAAA,CACd,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,EACnB,WAAA,WACA,YAAA,WAOA,QAAA,GAAW,aAAA;EAAgB,UAAA;EAAoB,QAAA;AAAA,KAC9C,gBAAA;;;;ATpFH;;;;;;iBUsPgB,qBAAA,CACd,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GAClB,sBAAA;;;;;;AVlPH;;;;;;;;;;AAOA;;;;;;;;;;AAQA;;;;;;;;;;iBWCgB,sBAAA,CACd,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,EACnB,WAAA,YACC,uBAAA;;;;AXpBH;;;;;;;;;;AAOA;;;iBYgDgB,kBAAA,CACd,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GAClB,mBAAA;;;;;;AZ1DH;;;;;;;;;;AAOA;;;;;;;;;;AAQA;;;;;;;;;;;;;;;;;;;;;iBaYgB,cAAA,CACd,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,EACnB,WAAA,YACC,eAAA;;;;;;Ab/BH;;;;;;;;;;AAOA;;;;;;;;;;AAQA;;;;;;;;;;;iBckDgB,cAAA,CACd,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,EACnB,WAAA,YACC,eAAA;;;;;;AdrEH;;;;;;;;;;AAOA;;;;;;;;;;AAQA;;;;;;;;;;;;iBeEgB,gBAAA,CACd,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GAClB,iBAAA;;;iBCXa,kBAAA,CACd,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GAClB,mBAAA;;;;;;;iBCuFa,cAAA,CACd,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GAClB,eAAA,GAAkB,kBAAA;;;iBClGL,sBAAA,CACd,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,EACnB,WAAA,YACC,uBAAA;;;iBCyCa,eAAA,CACd,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GAClB,gBAAA;;;;;;;AnB3DH;;;;;;;;iBoBbgB,cAAA,CAAe,KAAA;;;ApBoB/B;;;;AAAA,cqBuDa,gBAAA;EAAA,iBACM,SAAA;;;;mBAIA,IAAA;ErBrDnB;;;;EAAA,iBqB0DmB,WAAA;;;mBAGA,iBAAA;EAAA,iBACA,OAAA;EAAA,iBAMA,OAAA;EAAA,iBACA,aAAA;EAAA,iBACA,UAAA;;UAET,WAAA;;UAEA,UAAA;;UAEA,WAAA;;;UAGA,aAAA;;;;;;;;;;;ArBhDV;EqB6DE,WAAA,CACE,SAAA,EAAW,QAAA,IACX,IAAA,UACA,OAAA,GAAS,iBAAA,EACT,YAAA;IAAiB,WAAA;IAAqB,iBAAA,EAAmB,iBAAA;EAAA;;;;ApBvH7D;;EoB6KE,OAAA,CAAA,GAAW,gBAAA;EpBzKL;;;;;;;;EAAA,QoBsQE,uBAAA;;;;AnB1MV;;;;;;;;UmByNU,eAAA;;;;;;;;;;;;UA4DA,wBAAA;;;;AnBrHV;;;;;;;;;;;AAIA;;;;;UmBsJU,SAAA;;;;;AnB6TV;;;;;;;;UmBnIU,yBAAA;;;;;;UAoBA,gBAAA;EnBkHC;AACX;;;;;;EADW,QmB9FD,oBAAA;EnBkGP;;;;;;;;;EAAA,QmBhFO,oBAAA;;AlBrmBV;;;;;;;;;;UkBunBU,oBAAA;;;;;;;UAUA,mBAAA;;AjBhEV;;;;;;;UiBwFU,yBAAA;;;;;;;AhBnKV;;UgBmLU,wBAAA;EhBnLM;;;;;AC3fhB;;;;;ED2fgB,QgBoSN,YAAA;;;;;;AfnwBV;;;Ues0BU,qBAAA;EAAA,QAmDA,kBAAA;;;;;;;UA4BA,qBAAA;;;;;;;UAmDA,kBAAA;EAAA,QA4CA,mBAAA;EAAA,QAmBA,qBAAA;EdphCV;;;;;;;;;;;;;;;EAAA,QcqjCU,oBAAA;Ed1iCR;AAuCF;;;;;;;;;;;;;AC5DA;;;;;;;;;;;;EDqBE,QcsqCQ,uBAAA;;;;;;UAwGA,iBAAA;EZ3tCM;;;;EAAA,QY2vCN,gBAAA;;;;;;UAgCA,gBAAA;;;;UAWA,kBAAA;;;;UAuBA,iBAAA;;;;UAUA,aAAA;;AXzpCV;;UW0qCU,mBAAA;AAAA;;;;;;ArBl5CV;;;;;;;;;;AAQA;iBsBTgB,gBAAA,CACd,SAAA,EAAW,QAAA,IACX,IAAA,UACA,OAAA,GAAU,iBAAA,EACV,YAAA;EAAiB,WAAA;EAAqB,iBAAA,EAAmB,iBAAA;AAAA,IACxD,gBAAA;;;;;;AtBlBH;;;;;;;;iBuBRgB,eAAA,CAAgB,IAAA,WAAe,WAAA;;;;;AvBQ/C;;;;;;;;KwBZY,eAAA;;AxBmBZ;;;;UwBZiB,gBAAA;;EAEf,SAAA,EAAW,IAAA;;EAEX,SAAA;ExBQmE;EwBNnE,aAAA;ExBac;EwBXd,eAAA,GAAkB,eAAA;ExBW4C;;;;;;;AAQhE;EwBVE,UAAA;AAAA;;;;;;;;KAUU,IAAA;EACN,IAAA;EAAqB,IAAA;EAAc,EAAA;EAAY,UAAA;EAAoB,QAAA;AAAA;EACnE,IAAA;EAAoB,IAAA;EAAc,EAAA;AAAA;EAClC,IAAA;EAAkB,IAAA;EAAc,EAAA;EAAY,OAAA;AAAA;EAC5C,IAAA;EAAoB,IAAA;EAAc,EAAA;EAAY,MAAA,EAAQ,aAAA;AAAA;EACtD,IAAA;EAAyB,IAAA;EAAc,EAAA;AAAA;EACvC,IAAA;EAAqB,IAAA;EAAc,EAAA;EAAY,OAAA;EAAiB,QAAA;AAAA;EAChE,IAAA;EAA6B,IAAA;EAAc,EAAA;AAAA;;;;;;;;;;;;UAahC,aAAA;EACf,KAAA;EACA,KAAA,EAAO,IAAA;AAAA;;;;;;UAQQ,oBAAA;EACf,KAAA;EACA,GAAA;EACA,cAAA;EtBgKc;EsB9Jd,eAAA;AAAA;;;;;;;UASe,QAAA;EtByJjB;EsBvJE,SAAA,EAAW,QAAA;;;;EAKX,UAAA,EAAY,IAAA;;EAGZ,cAAA,EAAgB,GAAA,SAAY,IAAA;;EAG5B,cAAA,EAAgB,GAAA,SAAY,IAAA;EtB+lB9B;;EsB3lBE,iBAAA,EAAmB,gBAAA;;EAGnB,aAAA,EAAe,aAAA;;;EAIf,aAAA,GAAgB,oBAAA;AAAA;;;;AxB1FlB;;;;;;;;;iByBLgB,eAAA,CACd,IAAA,UACA,SAAA,EAAW,QAAA,IACX,IAAA;EAAS,iBAAA,GAAoB,iBAAA;AAAA,IAC5B,QAAA"}
|
package/dist/index.d.mts
CHANGED
|
@@ -1,11 +1,12 @@
|
|
|
1
|
-
import { A as
|
|
2
|
-
import { a as FootnoteMap, i as ScopeStrategy, n as ResolutionResult, o as FootnoteZone, r as ResolvedCitation, t as ResolutionOptions } from "./types-
|
|
1
|
+
import { A as StatutesAtLargeCitation, B as JournalComponentSpans, C as PublicLawCitation, D as ShortFormCitation, E as ShortFormCaseCitation, F as AnnotationComponentSpans, G as StatutesAtLargeComponentSpans, H as PublicLawComponentSpans, I as CaseComponentSpans, J as TransformationMap, K as TreatiseComponentSpans, L as ConstitutionalComponentSpans, M as SupraCitation, N as TreatiseCitation, O as ShortFormCitationType, P as Warning, R as FederalRegisterComponentSpans, S as ParentheticalType, T as RestatementCitation, U as RestatementComponentSpans, V as NeutralComponentSpans, W as StatuteComponentSpans, X as PinciteInfo, Y as spanFromGroupIndex, Z as parsePincite, _ as HistorySignal, a as CitationOfType, b as NeutralCitation, c as ConstitutionalCitation, d as ExtractorMap, f as FederalRegisterCitation, g as FullCitationType, h as FullCitation, i as CitationId, j as SubsequentHistoryEntry, k as StatuteCitation, l as CourtInference, m as FullCaseCitation, n as Citation, o as CitationSignal, p as FederalRuleCitation, q as Span, r as CitationBase, s as CitationType, t as AnnotationCitation, u as DocketCitation, v as IdCitation, w as RegulationCitation, x as Parenthetical, y as JournalCitation, z as FederalRuleComponentSpans } from "./citation-Dx71V5wD.mjs";
|
|
2
|
+
import { a as FootnoteMap, i as ScopeStrategy, n as ResolutionResult, o as FootnoteZone, r as ResolvedCitation, t as ResolutionOptions } from "./types-BKg6EJIz.mjs";
|
|
3
3
|
|
|
4
4
|
//#region src/types/guards.d.ts
|
|
5
5
|
/**
|
|
6
|
-
* Type guard: narrows Citation to a full citation
|
|
7
|
-
*
|
|
8
|
-
*
|
|
6
|
+
* Type guard: narrows Citation to a full citation — any member of
|
|
7
|
+
* `FullCitationType`. Membership derives from `FULL_CITATION_TYPES` (the single
|
|
8
|
+
* source), so it can never silently omit a full type the way it once dropped
|
|
9
|
+
* `regulation` / `stateRule` (#843).
|
|
9
10
|
*/
|
|
10
11
|
declare function isFullCitation(citation: Citation): citation is FullCitation;
|
|
11
12
|
/**
|
|
@@ -68,6 +69,21 @@ interface ExtractOptions {
|
|
|
68
69
|
*/
|
|
69
70
|
cleaners?: Array<(text: string) => string>;
|
|
70
71
|
/**
|
|
72
|
+
* Extra cleaners appended AFTER the effective base chain (the defaults, or a
|
|
73
|
+
* custom `cleaners` array). Unlike `cleaners` — which REPLACES the defaults —
|
|
74
|
+
* `additionalCleaners` keeps them, so you can add e.g. `stripMarkdownEmphasis`
|
|
75
|
+
* without silently dropping HTML stripping, whitespace/Unicode normalization,
|
|
76
|
+
* smart-quote fixing, reporter spacing, etc. (#835)
|
|
77
|
+
*
|
|
78
|
+
* @example
|
|
79
|
+
* ```typescript
|
|
80
|
+
* import { extractCitations, stripMarkdownEmphasis } from "eyecite-ts"
|
|
81
|
+
* // markdown legal text: keep all defaults, also strip *emphasis*
|
|
82
|
+
* extractCitations(markdownText, { additionalCleaners: [stripMarkdownEmphasis] })
|
|
83
|
+
* ```
|
|
84
|
+
*/
|
|
85
|
+
additionalCleaners?: Array<(text: string) => string>;
|
|
86
|
+
/**
|
|
71
87
|
* Custom regex patterns (overrides defaults).
|
|
72
88
|
*
|
|
73
89
|
* If provided, these patterns replace the default pattern set:
|
|
@@ -226,6 +242,15 @@ declare function extractCitationsAsync(text: string, options: ExtractOptions & {
|
|
|
226
242
|
}): Promise<ResolvedCitation[]>;
|
|
227
243
|
declare function extractCitationsAsync(text: string, options?: ExtractOptions): Promise<Citation[]>;
|
|
228
244
|
//#endregion
|
|
245
|
+
//#region src/extract/assignCitationIds.d.ts
|
|
246
|
+
/**
|
|
247
|
+
* Build a lookup from {@link CitationId} to its citation. Citations without an
|
|
248
|
+
* id (e.g. produced by the granular per-type extractors rather than by
|
|
249
|
+
* `extractCitations()`) are skipped. The map keys by stable id, so it keeps
|
|
250
|
+
* working after the caller has `filter`/`sort`/`map`-ed the array.
|
|
251
|
+
*/
|
|
252
|
+
declare function byId(citations: Citation[]): Map<CitationId, Citation>;
|
|
253
|
+
//#endregion
|
|
229
254
|
//#region src/extract/filterFalsePositives.d.ts
|
|
230
255
|
/**
|
|
231
256
|
* Apply false positive filters to extracted citations.
|
|
@@ -248,6 +273,31 @@ declare function extractCitationsAsync(text: string, options?: ExtractOptions):
|
|
|
248
273
|
*/
|
|
249
274
|
declare function applyFalsePositiveFilters(citations: Citation[], remove: boolean, originalText?: string): Citation[];
|
|
250
275
|
//#endregion
|
|
276
|
+
//#region src/clean/cleaners.d.ts
|
|
277
|
+
/**
|
|
278
|
+
* Strip markdown emphasis markers (`*italic*`, `**bold**`, `***both***`) while
|
|
279
|
+
* keeping the emphasized text. For markdown legal text (e.g. LLM-drafted briefs
|
|
280
|
+
* where case names are emphasized: `*Leon v. Martinez*`), whose asterisks
|
|
281
|
+
* otherwise break case-name capture / `fullSpan`.
|
|
282
|
+
*
|
|
283
|
+
* Deliberately conservative so it never corrupts citations:
|
|
284
|
+
* - Star-pagination pincites are preserved (`at *3` \u2014 an asterisk followed by a
|
|
285
|
+
* digit, or with no closing `*`, never matches).
|
|
286
|
+
* - Underscores are never touched (`_x_`, and blank locators like `[____]`).
|
|
287
|
+
* - Lone or space-flanked asterisks (`a * b`) and backslash-escaped pairs
|
|
288
|
+
* (`\*x\*`) are left alone.
|
|
289
|
+
*
|
|
290
|
+
* NOT in the default pipeline \u2014 pass it via `additionalCleaners` (recommended;
|
|
291
|
+
* keeps the defaults) or `cleaners`. It changes length; position tracking is
|
|
292
|
+
* handled by `cleanText`'s TransformationMap. Each pattern uses a single bounded
|
|
293
|
+
* quantifier (ReDoS-safe). (#835)
|
|
294
|
+
*
|
|
295
|
+
* @example
|
|
296
|
+
* stripMarkdownEmphasis("see *Leon v. Martinez*, 84 N.Y.2d 83")
|
|
297
|
+
* // => "see Leon v. Martinez, 84 N.Y.2d 83"
|
|
298
|
+
*/
|
|
299
|
+
declare function stripMarkdownEmphasis(text: string): string;
|
|
300
|
+
//#endregion
|
|
251
301
|
//#region src/clean/cleanText.d.ts
|
|
252
302
|
/**
|
|
253
303
|
* Result of text cleaning operation.
|
|
@@ -268,7 +318,8 @@ interface CleanTextResult {
|
|
|
268
318
|
* cleaned text while reporting positions in the original text.
|
|
269
319
|
*
|
|
270
320
|
* @param original - Original input text
|
|
271
|
-
* @param cleaners - Array of cleaner functions to apply (default: stripHtmlTags, decodeHtmlEntities, normalizeWhitespace, normalizeUnicode, normalizeDashes, fixSmartQuotes, normalizeTypography, normalizeReporterSpacing)
|
|
321
|
+
* @param cleaners - Array of cleaner functions to apply (default: stripHtmlTags, decodeHtmlEntities, normalizeWhitespace, normalizeUnicode, normalizeDashes, fixSmartQuotes, normalizeTypography, normalizeReporterSpacing). Passing a custom array REPLACES the defaults.
|
|
322
|
+
* @param additionalCleaners - Cleaners appended AFTER the effective base chain (the defaults, or a custom `cleaners` array). Use this to add a cleaner — e.g. `stripMarkdownEmphasis` — without dropping the defaults (#835).
|
|
272
323
|
* @returns Cleaned text with position mappings and warnings
|
|
273
324
|
*
|
|
274
325
|
* @example
|
|
@@ -276,7 +327,7 @@ interface CleanTextResult {
|
|
|
276
327
|
* // result.cleaned: "Smith v. Doe, 500 F.2d 123"
|
|
277
328
|
* // result.transformationMap tracks position shifts from HTML removal
|
|
278
329
|
*/
|
|
279
|
-
declare function cleanText(original: string, cleaners?: Array<(text: string) => string>): CleanTextResult;
|
|
330
|
+
declare function cleanText(original: string, cleaners?: Array<(text: string) => string>, additionalCleaners?: Array<(text: string) => string>): CleanTextResult;
|
|
280
331
|
//#endregion
|
|
281
332
|
//#region src/tokenize/tokenizer.d.ts
|
|
282
333
|
/**
|
|
@@ -624,13 +675,26 @@ declare function normalizeCourt(court: string | undefined): string | undefined;
|
|
|
624
675
|
*/
|
|
625
676
|
declare class DocumentResolver {
|
|
626
677
|
private readonly citations;
|
|
678
|
+
/** Original document text — used for original-coordinate reads (quote zones,
|
|
679
|
+
* paragraph boundaries, party-name lookback) that locate citations via
|
|
680
|
+
* `span.originalStart`. */
|
|
627
681
|
private readonly text;
|
|
682
|
+
/** Cleaned document text — used for clean-coordinate reads (bracket scopes,
|
|
683
|
+
* trigger-anchored asides, family/name windows) that index by
|
|
684
|
+
* `span.cleanStart`/`cleanEnd`. Equals `text` when no length-changing cleaner
|
|
685
|
+
* ran; diverges from it otherwise, which is the #830 desync this fixes. */
|
|
686
|
+
private readonly cleanedText;
|
|
687
|
+
/** Maps clean→original offsets so derived output spans carry correct original
|
|
688
|
+
* coordinates even when a cleaner transformed the text (#830). */
|
|
689
|
+
private readonly transformationMap?;
|
|
628
690
|
private readonly options;
|
|
629
691
|
private readonly context;
|
|
630
692
|
private readonly partyNameTree;
|
|
631
693
|
private readonly quoteZones;
|
|
632
694
|
/** Parenthesis depth at each citation's start (filled lazily by resolve()). */
|
|
633
695
|
private parenDepths;
|
|
696
|
+
/** Per-citation bracket-balance trust (#809/#820); false = structure unreliable. */
|
|
697
|
+
private balanceOks;
|
|
634
698
|
/** Resolution results accumulated during the in-flight resolve() pass. */
|
|
635
699
|
private resolutions;
|
|
636
700
|
/** Resolved citations accumulated during the in-flight resolve() pass; used
|
|
@@ -640,10 +704,17 @@ declare class DocumentResolver {
|
|
|
640
704
|
* Creates a new DocumentResolver.
|
|
641
705
|
*
|
|
642
706
|
* @param citations - All citations in document (in order of appearance)
|
|
643
|
-
* @param text - Original document text
|
|
707
|
+
* @param text - Original document text (original-coordinate reads index into it)
|
|
644
708
|
* @param options - Resolution options
|
|
709
|
+
* @param cleanContext - Cleaned text + transformation map for clean-coordinate
|
|
710
|
+
* reads (#830). When omitted, `text` is treated as the cleaned text too,
|
|
711
|
+
* preserving the historical clean==original behavior for callers that pass
|
|
712
|
+
* only one text (e.g. text untouched by a length-changing cleaner).
|
|
645
713
|
*/
|
|
646
|
-
constructor(citations: Citation[], text: string, options?: ResolutionOptions
|
|
714
|
+
constructor(citations: Citation[], text: string, options?: ResolutionOptions, cleanContext?: {
|
|
715
|
+
cleanedText: string;
|
|
716
|
+
transformationMap: TransformationMap;
|
|
717
|
+
});
|
|
647
718
|
/**
|
|
648
719
|
* Resolves all citations in the document.
|
|
649
720
|
*
|
|
@@ -753,6 +824,22 @@ declare class DocumentResolver {
|
|
|
753
824
|
*/
|
|
754
825
|
private isParentheticalChild;
|
|
755
826
|
/**
|
|
827
|
+
* Whether the citation's clean span is wholly inside a previously-resolved
|
|
828
|
+
* citation's `fullSpan` (the #214 strategy-2 signal). Independent of the
|
|
829
|
+
* fragile bracket-depth count, so it stays a *hard* exclusion even when
|
|
830
|
+
* balance fails (#820).
|
|
831
|
+
*/
|
|
832
|
+
private isFullSpanContained;
|
|
833
|
+
/**
|
|
834
|
+
* #820: the citation's paren-child status rests ONLY on the (possibly
|
|
835
|
+
* desynced) bracket depth, in a clause whose brackets did not balance — so the
|
|
836
|
+
* "nested" verdict is untrustworthy. Trigger-anchored asides and
|
|
837
|
+
* fullSpan-contained cites are reliable (independent of the depth count) and
|
|
838
|
+
* stay hard exclusions; only the depth-only case under balance failure is
|
|
839
|
+
* soft, which `resolveId` keeps as a degraded-confidence candidate.
|
|
840
|
+
*/
|
|
841
|
+
private isUntrustworthyDepthAside;
|
|
842
|
+
/**
|
|
756
843
|
* Scans the prose between the previous citation and `Id.` for a case-name
|
|
757
844
|
* mention. If a name is found and doesn't match the picked antecedent's
|
|
758
845
|
* caseName/plaintiff/defendant, returns a downgraded confidence and an
|
|
@@ -772,6 +859,15 @@ declare class DocumentResolver {
|
|
|
772
859
|
* threshold-derived `maxDistance` to recover it.
|
|
773
860
|
*/
|
|
774
861
|
private resolveSupra;
|
|
862
|
+
/**
|
|
863
|
+
* Apply the #818 hybrid policy to the in-scope authorities a `supra` name key
|
|
864
|
+
* resolved to. Exactly one → commit at the matched similarity. More than one is
|
|
865
|
+
* a non-unique key: a *true tie* (same name + same year, indistinguishable by
|
|
866
|
+
* the key alone) abstains; otherwise pick the most-recent-within-name but cap
|
|
867
|
+
* confidence and warn, with `idConfidenceFloor` able to fail it closed. Mirrors
|
|
868
|
+
* the `Id.` path's downgrade-warn-then-floor-abstain shape (#800/#820).
|
|
869
|
+
*/
|
|
870
|
+
private selectSupraAntecedent;
|
|
775
871
|
private createSupraSuccess;
|
|
776
872
|
/**
|
|
777
873
|
* Match a full-caption supra (`Plaintiff v. Defendant, supra`) against a
|
|
@@ -876,9 +972,16 @@ declare class DocumentResolver {
|
|
|
876
972
|
* @param citations - Extracted citations in order of appearance
|
|
877
973
|
* @param text - Original document text
|
|
878
974
|
* @param options - Resolution options
|
|
975
|
+
* @param cleanContext - Optional cleaned text + transformation map. Pass when a
|
|
976
|
+
* length-changing cleaner was applied so clean-coordinate reads index the
|
|
977
|
+
* cleaned text and derived spans map back to original coordinates (#830). When
|
|
978
|
+
* omitted, `text` is treated as the cleaned text too (clean == original).
|
|
879
979
|
* @returns Citations with resolution metadata
|
|
880
980
|
*/
|
|
881
|
-
declare function resolveCitations(citations: Citation[], text: string, options?: ResolutionOptions
|
|
981
|
+
declare function resolveCitations(citations: Citation[], text: string, options?: ResolutionOptions, cleanContext?: {
|
|
982
|
+
cleanedText: string;
|
|
983
|
+
transformationMap: TransformationMap;
|
|
984
|
+
}): ResolvedCitation[];
|
|
882
985
|
//#endregion
|
|
883
986
|
//#region src/footnotes/detectFootnotes.d.ts
|
|
884
987
|
/**
|
|
@@ -1041,5 +1144,5 @@ declare function analyzeDocument(text: string, citations: Citation[], opts?: {
|
|
|
1041
1144
|
transformationMap?: TransformationMap;
|
|
1042
1145
|
}): Document;
|
|
1043
1146
|
//#endregion
|
|
1044
|
-
export { type AnalyzedFootnoteZone, type AnnotationCitation, type AnnotationComponentSpans, type AttributionKind, type CaseComponentSpans, type Citation, type CitationBase, type CitationGraph, type CitationOfType, type CitationSignal, type CitationType, type CleanTextResult, type ConstitutionalCitation, type ConstitutionalComponentSpans, type CourtInference, type DocketCitation, type Document, DocumentResolver, type Edge, type ExtractOptions, type ExtractorMap, type FederalRegisterCitation, type FederalRegisterComponentSpans, type FederalRuleCitation, type FederalRuleComponentSpans, type FootnoteMap, type FootnoteZone, type FullCaseCitation, type FullCitation, type FullCitationType, type HistorySignal, type IdCitation, type JournalCitation, type JournalComponentSpans, type NeutralCitation, type NeutralComponentSpans, type Parenthetical, type ParentheticalType, type PinciteInfo, type PublicLawCitation, type PublicLawComponentSpans, type QuoteAttribution, type ResolutionOptions, type ResolutionResult, type ResolvedCitation, type RestatementCitation, type RestatementComponentSpans, type ScopeStrategy, type ShortFormCaseCitation, type ShortFormCitation, type ShortFormCitationType, type Span, type StatuteCitation, type StatuteComponentSpans, type StatutesAtLargeCitation, type StatutesAtLargeComponentSpans, type SubsequentHistoryEntry, type SupraCitation, type Token, type TransformationMap, type TreatiseCitation, type TreatiseComponentSpans, type Warning, analyzeDocument, applyFalsePositiveFilters, assertUnreachable, cleanText, detectFootnotes, extractAnnotation, extractCase, extractCitations, extractCitationsAsync, extractConstitutional, extractFederalRegister, extractFederalRule, extractJournal, extractNeutral, extractPublicLaw, extractRestatement, extractStatute, extractStatutesAtLarge, extractTreatise, isCaseCitation, isCitationType, isFullCitation, isShortFormCitation, normalizeCourt, parsePincite, resolveCitations, spanFromGroupIndex, tokenize };
|
|
1147
|
+
export { type AnalyzedFootnoteZone, type AnnotationCitation, type AnnotationComponentSpans, type AttributionKind, type CaseComponentSpans, type Citation, type CitationBase, type CitationGraph, type CitationId, type CitationOfType, type CitationSignal, type CitationType, type CleanTextResult, type ConstitutionalCitation, type ConstitutionalComponentSpans, type CourtInference, type DocketCitation, type Document, DocumentResolver, type Edge, type ExtractOptions, type ExtractorMap, type FederalRegisterCitation, type FederalRegisterComponentSpans, type FederalRuleCitation, type FederalRuleComponentSpans, type FootnoteMap, type FootnoteZone, type FullCaseCitation, type FullCitation, type FullCitationType, type HistorySignal, type IdCitation, type JournalCitation, type JournalComponentSpans, type NeutralCitation, type NeutralComponentSpans, type Parenthetical, type ParentheticalType, type PinciteInfo, type PublicLawCitation, type PublicLawComponentSpans, type QuoteAttribution, type ResolutionOptions, type ResolutionResult, type ResolvedCitation, type RestatementCitation, type RestatementComponentSpans, type ScopeStrategy, type ShortFormCaseCitation, type ShortFormCitation, type ShortFormCitationType, type Span, type StatuteCitation, type StatuteComponentSpans, type StatutesAtLargeCitation, type StatutesAtLargeComponentSpans, type SubsequentHistoryEntry, type SupraCitation, type Token, type TransformationMap, type TreatiseCitation, type TreatiseComponentSpans, type Warning, analyzeDocument, applyFalsePositiveFilters, assertUnreachable, byId, cleanText, detectFootnotes, extractAnnotation, extractCase, extractCitations, extractCitationsAsync, extractConstitutional, extractFederalRegister, extractFederalRule, extractJournal, extractNeutral, extractPublicLaw, extractRestatement, extractStatute, extractStatutesAtLarge, extractTreatise, isCaseCitation, isCitationType, isFullCitation, isShortFormCitation, normalizeCourt, parsePincite, resolveCitations, spanFromGroupIndex, stripMarkdownEmphasis, tokenize };
|
|
1045
1148
|
//# sourceMappingURL=index.d.mts.map
|
package/dist/index.d.mts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.mts","names":[],"sources":["../src/types/guards.ts","../src/patterns/casePatterns.ts","../src/extract/extractCitations.ts","../src/extract/filterFalsePositives.ts","../src/clean/cleanText.ts","../src/tokenize/tokenizer.ts","../src/extract/extractAnnotation.ts","../src/extract/extractCase.ts","../src/extract/extractConstitutional.ts","../src/extract/extractFederalRegister.ts","../src/extract/extractFederalRule.ts","../src/extract/extractJournal.ts","../src/extract/extractNeutral.ts","../src/extract/extractPublicLaw.ts","../src/extract/extractRestatement.ts","../src/extract/extractStatute.ts","../src/extract/extractStatutesAtLarge.ts","../src/extract/extractTreatise.ts","../src/extract/courtNormalization.ts","../src/resolve/DocumentResolver.ts","../src/resolve/index.ts","../src/footnotes/detectFootnotes.ts","../src/document/types.ts","../src/document/analyzer.ts"],"mappings":";;;;;;;
|
|
1
|
+
{"version":3,"file":"index.d.mts","names":[],"sources":["../src/types/guards.ts","../src/patterns/casePatterns.ts","../src/extract/extractCitations.ts","../src/extract/assignCitationIds.ts","../src/extract/filterFalsePositives.ts","../src/clean/cleaners.ts","../src/clean/cleanText.ts","../src/tokenize/tokenizer.ts","../src/extract/extractAnnotation.ts","../src/extract/extractCase.ts","../src/extract/extractConstitutional.ts","../src/extract/extractFederalRegister.ts","../src/extract/extractFederalRule.ts","../src/extract/extractJournal.ts","../src/extract/extractNeutral.ts","../src/extract/extractPublicLaw.ts","../src/extract/extractRestatement.ts","../src/extract/extractStatute.ts","../src/extract/extractStatutesAtLarge.ts","../src/extract/extractTreatise.ts","../src/extract/courtNormalization.ts","../src/resolve/DocumentResolver.ts","../src/resolve/index.ts","../src/footnotes/detectFootnotes.ts","../src/document/types.ts","../src/document/analyzer.ts"],"mappings":";;;;;;;AAyBA;;;iBAAgB,cAAA,CAAe,QAAA,EAAU,QAAA,GAAW,QAAA,IAAY,YAAA;;;;iBAOhD,mBAAA,CAAoB,QAAA,EAAU,QAAA,GAAW,QAAA,IAAY,iBAAA;;;AAArE;iBAOgB,cAAA,CAAe,QAAA,EAAU,QAAA,GAAW,QAAA,IAAY,gBAAA;;;;;iBAQhD,cAAA,WAAyB,YAAA,CAAA,CACvC,QAAA,EAAU,QAAA,EACV,IAAA,EAAM,CAAA,GACL,QAAA,IAAY,cAAA,CAAe,CAAA;;;;AAX9B;;;;;;;;;;AAQA;;;iBAuBgB,iBAAA,CAAkB,CAAA;;;UCtDjB,OAAA;EACf,EAAA;EACA,KAAA,EAAO,MAAA;EACP,WAAA;EACA,IAAA,EAAM,gBAAA;AAAA;;;;ADKR;;UEuDiB,cAAA;EFvD+C;;;;;;;AAOhE;;;;;;;EE+DE,QAAA,GAAW,KAAA,EAAO,IAAA;;;AFxDpB;;;;;;;;;;AAQA;;EEgEE,kBAAA,GAAqB,KAAA,EAAO,IAAA;;;;;;;;;;;;;;;EAgB5B,QAAA,GAAW,OAAA;;;;AFzDb;;;;;;;;ACtDA;;EC8HE,OAAA;ED1HM;;;;;;;;;;;;AC4DR;;EA8EE,iBAAA,GAAoB,iBAAA;;;;;;;;;;;;;;;;;EAkBpB,oBAAA;;EAGA,eAAA;AAAA;;AA6DF;;;;;;;;;;;AAIA;;;;;;;;;;AAmdA;;;;;;;;;;;;;;;;AAIA;;;;;;;;;;;;;;;;;;AClrBA;;iBDuNgB,gBAAA,CACd,IAAA,UACA,OAAA,EAAS,cAAA;EAAmB,OAAA;AAAA,IAC3B,gBAAA;AAAA,iBACa,gBAAA,CAAiB,IAAA,UAAc,OAAA,GAAU,cAAA,GAAiB,QAAA;;;;;;;;;;;;;;AEsW1E;;;;;;;;;;;;;;AC3EA;iBHwLsB,qBAAA,CACpB,IAAA,UACA,OAAA,EAAS,cAAA;EAAmB,OAAA;AAAA,IAC3B,OAAA,CAAQ,gBAAA;AAAA,iBACW,qBAAA,CACpB,IAAA,UACA,OAAA,GAAU,cAAA,GACT,OAAA,CAAQ,QAAA;;;;AF9qBX;;;;;iBGPgB,IAAA,CAAK,SAAA,EAAW,QAAA,KAAa,GAAA,CAAI,UAAA,EAAY,QAAA;;;;;;;;;;;AHc7D;;;;;;;;;;AAQA;iBI2iBgB,yBAAA,CACd,SAAA,EAAW,QAAA,IACX,MAAA,WACA,YAAA,YACC,QAAA;;;;;;;;;;;;;;;;AG3jBH;;;;;;;;;iBF4egB,qBAAA,CAAsB,IAAA;;;;;ALtftC;UMLiB,eAAA;;EAEf,OAAA;;EAGA,iBAAA,EAAmB,iBAAA;;EAGnB,QAAA,EAAU,OAAA;AAAA;;ANIZ;;;;;;;;;;AAOA;;;;;;iBMSgB,SAAA,CACd,QAAA,UACA,QAAA,GAAU,KAAA,EAAO,IAAA,sBAajB,kBAAA,GAAoB,KAAA,EAAO,IAAA,uBAC1B,eAAA;;;;;;;;;;UC7Bc,KAAA;EPID;EOFd,IAAA;EPE8D;EOC9D,IAAA,EAAM,IAAA,CAAK,IAAA;;EAGX,IAAA,EAAM,OAAA;;EAGN,SAAA;AAAA;APCF;;;;;;;;;;;;;;;;;;;;;AAuBA;;;;;;;;ACtDA;;;;;;;AD+BA,iBOsCgB,QAAA,CACd,WAAA,UACA,QAAA,GAAU,OAAA,KAST,KAAA;;;APhEH;;;;;;;AAAA,iBQPgB,iBAAA,CACd,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GAClB,kBAAA;;;;;ARWH;;;;;;;;;;AAQA;;;;;;;;;;;;;;;;;;;;;AAuBA;;;;;;;;ACtDA;;;;;;;;;;iBQiFgB,WAAA,CACd,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,EACnB,WAAA,WACA,YAAA,WAOA,QAAA,GAAW,aAAA;EAAgB,UAAA;EAAoB,QAAA;AAAA,KAC9C,gBAAA;;;;ATpFH;;;;;;iBUsPgB,qBAAA,CACd,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GAClB,sBAAA;;;;;;AVlPH;;;;;;;;;;AAOA;;;;;;;;;;AAQA;;;;;;;;;;iBWCgB,sBAAA,CACd,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,EACnB,WAAA,YACC,uBAAA;;;;AXpBH;;;;;;;;;;AAOA;;;iBYgDgB,kBAAA,CACd,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GAClB,mBAAA;;;;;;AZ1DH;;;;;;;;;;AAOA;;;;;;;;;;AAQA;;;;;;;;;;;;;;;;;;;;;iBaYgB,cAAA,CACd,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,EACnB,WAAA,YACC,eAAA;;;;;;Ab/BH;;;;;;;;;;AAOA;;;;;;;;;;AAQA;;;;;;;;;;;iBckDgB,cAAA,CACd,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,EACnB,WAAA,YACC,eAAA;;;;;;AdrEH;;;;;;;;;;AAOA;;;;;;;;;;AAQA;;;;;;;;;;;;iBeEgB,gBAAA,CACd,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GAClB,iBAAA;;;iBCXa,kBAAA,CACd,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GAClB,mBAAA;;;;;;;iBCuFa,cAAA,CACd,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GAClB,eAAA,GAAkB,kBAAA;;;iBClGL,sBAAA,CACd,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,EACnB,WAAA,YACC,uBAAA;;;iBCyCa,eAAA,CACd,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GAClB,gBAAA;;;;;;;AnB3DH;;;;;;;;iBoBbgB,cAAA,CAAe,KAAA;;;ApBoB/B;;;;AAAA,cqBuDa,gBAAA;EAAA,iBACM,SAAA;;;;mBAIA,IAAA;ErBrDnB;;;;EAAA,iBqB0DmB,WAAA;;;mBAGA,iBAAA;EAAA,iBACA,OAAA;EAAA,iBAMA,OAAA;EAAA,iBACA,aAAA;EAAA,iBACA,UAAA;;UAET,WAAA;;UAEA,UAAA;;UAEA,WAAA;;;UAGA,aAAA;;;;;;;;;;;ArBhDV;EqB6DE,WAAA,CACE,SAAA,EAAW,QAAA,IACX,IAAA,UACA,OAAA,GAAS,iBAAA,EACT,YAAA;IAAiB,WAAA;IAAqB,iBAAA,EAAmB,iBAAA;EAAA;;;;ApBvH7D;;EoB6KE,OAAA,CAAA,GAAW,gBAAA;EpBzKL;;;;;;;;EAAA,QoBsQE,uBAAA;;;;AnB1MV;;;;;;;;UmByNU,eAAA;;;;;;;;;;;;UA4DA,wBAAA;;;;AnBrHV;;;;;;;;;;;AAIA;;;;;UmBsJU,SAAA;;;;;AnB6TV;;;;;;;;UmBnIU,yBAAA;;;;;;UAoBA,gBAAA;EnBkHC;AACX;;;;;;EADW,QmB9FD,oBAAA;EnBkGP;;;;;;;;;EAAA,QmBhFO,oBAAA;;AlBrmBV;;;;;;;;;;UkBunBU,oBAAA;;;;;;;UAUA,mBAAA;;AjBhEV;;;;;;;UiBwFU,yBAAA;;;;;;;AhBnKV;;UgBmLU,wBAAA;EhBnLM;;;;;AC3fhB;;;;;ED2fgB,QgBoSN,YAAA;;;;;;AfnwBV;;;Ues0BU,qBAAA;EAAA,QAmDA,kBAAA;;;;;;;UA4BA,qBAAA;;;;;;;UAmDA,kBAAA;EAAA,QA4CA,mBAAA;EAAA,QAmBA,qBAAA;EdphCV;;;;;;;;;;;;;;;EAAA,QcqjCU,oBAAA;Ed1iCR;AAuCF;;;;;;;;;;;;;AC5DA;;;;;;;;;;;;EDqBE,QcsqCQ,uBAAA;;;;;;UAwGA,iBAAA;EZ3tCM;;;;EAAA,QY2vCN,gBAAA;;;;;;UAgCA,gBAAA;;;;UAWA,kBAAA;;;;UAuBA,iBAAA;;;;UAUA,aAAA;;AXzpCV;;UW0qCU,mBAAA;AAAA;;;;;;ArBl5CV;;;;;;;;;;AAQA;iBsBTgB,gBAAA,CACd,SAAA,EAAW,QAAA,IACX,IAAA,UACA,OAAA,GAAU,iBAAA,EACV,YAAA;EAAiB,WAAA;EAAqB,iBAAA,EAAmB,iBAAA;AAAA,IACxD,gBAAA;;;;;;AtBlBH;;;;;;;;iBuBRgB,eAAA,CAAgB,IAAA,WAAe,WAAA;;;;;AvBQ/C;;;;;;;;KwBZY,eAAA;;AxBmBZ;;;;UwBZiB,gBAAA;;EAEf,SAAA,EAAW,IAAA;;EAEX,SAAA;ExBQmE;EwBNnE,aAAA;ExBac;EwBXd,eAAA,GAAkB,eAAA;ExBW4C;;;;;;;AAQhE;EwBVE,UAAA;AAAA;;;;;;;;KAUU,IAAA;EACN,IAAA;EAAqB,IAAA;EAAc,EAAA;EAAY,UAAA;EAAoB,QAAA;AAAA;EACnE,IAAA;EAAoB,IAAA;EAAc,EAAA;AAAA;EAClC,IAAA;EAAkB,IAAA;EAAc,EAAA;EAAY,OAAA;AAAA;EAC5C,IAAA;EAAoB,IAAA;EAAc,EAAA;EAAY,MAAA,EAAQ,aAAA;AAAA;EACtD,IAAA;EAAyB,IAAA;EAAc,EAAA;AAAA;EACvC,IAAA;EAAqB,IAAA;EAAc,EAAA;EAAY,OAAA;EAAiB,QAAA;AAAA;EAChE,IAAA;EAA6B,IAAA;EAAc,EAAA;AAAA;;;;;;;;;;;;UAahC,aAAA;EACf,KAAA;EACA,KAAA,EAAO,IAAA;AAAA;;;;;;UAQQ,oBAAA;EACf,KAAA;EACA,GAAA;EACA,cAAA;EtBgKc;EsB9Jd,eAAA;AAAA;;;;;;;UASe,QAAA;EtByJjB;EsBvJE,SAAA,EAAW,QAAA;;;;EAKX,UAAA,EAAY,IAAA;;EAGZ,cAAA,EAAgB,GAAA,SAAY,IAAA;;EAG5B,cAAA,EAAgB,GAAA,SAAY,IAAA;EtB+lB9B;;EsB3lBE,iBAAA,EAAmB,gBAAA;;EAGnB,aAAA,EAAe,aAAA;;;EAIf,aAAA,GAAgB,oBAAA;AAAA;;;;AxB1FlB;;;;;;;;;iByBLgB,eAAA,CACd,IAAA,UACA,SAAA,EAAW,QAAA,IACX,IAAA;EAAS,iBAAA,GAAoB,iBAAA;AAAA,IAC5B,QAAA"}
|