eyecite-ts 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +138 -125
- package/dist/annotate/index.cjs +1 -1
- package/dist/annotate/index.cjs.map +1 -1
- package/dist/annotate/index.d.cts +4 -4
- package/dist/annotate/index.d.cts.map +1 -1
- package/dist/annotate/index.d.mts +4 -4
- package/dist/annotate/index.d.mts.map +1 -1
- package/dist/annotate/index.mjs +1 -1
- package/dist/annotate/index.mjs.map +1 -1
- package/dist/{citation-8_GvfEuj.d.mts → citation-DAyM8kNA.d.mts} +51 -11
- package/dist/{citation-8_GvfEuj.d.mts.map → citation-DAyM8kNA.d.mts.map} +1 -1
- package/dist/{citation-BcY5zzWb.d.cts → citation-qKSc_Myj.d.cts} +51 -11
- package/dist/{citation-BcY5zzWb.d.cts.map → citation-qKSc_Myj.d.cts.map} +1 -1
- package/dist/data/index.cjs +1 -1
- package/dist/data/index.cjs.map +1 -1
- package/dist/data/index.mjs +1 -1
- package/dist/data/index.mjs.map +1 -1
- package/dist/index.cjs +1 -1
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +63 -39
- package/dist/index.d.cts.map +1 -1
- package/dist/index.d.mts +63 -39
- package/dist/index.d.mts.map +1 -1
- package/dist/index.mjs +1 -1
- package/dist/index.mjs.map +1 -1
- package/package.json +16 -13
package/dist/index.d.cts
CHANGED
|
@@ -1,24 +1,47 @@
|
|
|
1
|
-
import { a as
|
|
1
|
+
import { S as TransformationMap, _ as StatuteCitation, a as ExtractorMap, b as Warning, c as FullCitation, d as JournalCitation, f as NeutralCitation, g as ShortFormCitationType, h as ShortFormCitation, i as CitationType, l as FullCitationType, m as ShortFormCaseCitation, n as CitationBase, o as FederalRegisterCitation, p as PublicLawCitation, r as CitationOfType, s as FullCaseCitation, t as Citation, u as IdCitation, v as StatutesAtLargeCitation, x as Span, y as SupraCitation } from "./citation-qKSc_Myj.cjs";
|
|
2
2
|
|
|
3
|
-
//#region src/
|
|
3
|
+
//#region src/types/guards.d.ts
|
|
4
|
+
/**
|
|
5
|
+
* Type guard: narrows Citation to a full citation (case, statute, journal, neutral, publicLaw, federalRegister).
|
|
6
|
+
*/
|
|
7
|
+
declare function isFullCitation(citation: Citation): citation is FullCitation;
|
|
8
|
+
/**
|
|
9
|
+
* Type guard: narrows Citation to a short-form citation (id, supra, shortFormCase).
|
|
10
|
+
*/
|
|
11
|
+
declare function isShortFormCitation(citation: Citation): citation is ShortFormCitation;
|
|
12
|
+
/**
|
|
13
|
+
* Type guard: narrows Citation to a full case citation.
|
|
14
|
+
*/
|
|
15
|
+
declare function isCaseCitation(citation: Citation): citation is FullCaseCitation;
|
|
4
16
|
/**
|
|
5
|
-
*
|
|
17
|
+
* Generic type guard that narrows a Citation to a specific type.
|
|
18
|
+
* Useful when the target type is dynamic or generic.
|
|
19
|
+
*/
|
|
20
|
+
declare function isCitationType<T extends CitationType>(citation: Citation, type: T): citation is CitationOfType<T>;
|
|
21
|
+
/**
|
|
22
|
+
* Exhaustiveness helper for switch statements on discriminated unions.
|
|
6
23
|
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
* Metadata parsing and validation against reporters-db happens in Phase 2 Plan 5 (extraction layer).
|
|
24
|
+
* Place in the `default` branch to get a compile-time error if a new
|
|
25
|
+
* variant is added but not handled.
|
|
10
26
|
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
27
|
+
* @example
|
|
28
|
+
* ```typescript
|
|
29
|
+
* switch (citation.type) {
|
|
30
|
+
* case 'case': ...
|
|
31
|
+
* case 'statute': ...
|
|
32
|
+
* // If you forget a variant, TypeScript errors here:
|
|
33
|
+
* default: assertUnreachable(citation.type)
|
|
34
|
+
* }
|
|
35
|
+
* ```
|
|
16
36
|
*/
|
|
37
|
+
declare function assertUnreachable(x: never): never;
|
|
38
|
+
//#endregion
|
|
39
|
+
//#region src/patterns/casePatterns.d.ts
|
|
17
40
|
interface Pattern {
|
|
18
41
|
id: string;
|
|
19
42
|
regex: RegExp;
|
|
20
43
|
description: string;
|
|
21
|
-
type:
|
|
44
|
+
type: FullCitationType;
|
|
22
45
|
}
|
|
23
46
|
//#endregion
|
|
24
47
|
//#region src/resolve/types.d.ts
|
|
@@ -96,15 +119,16 @@ interface ResolutionResult {
|
|
|
96
119
|
confidence: number;
|
|
97
120
|
}
|
|
98
121
|
/**
|
|
99
|
-
* Citation with
|
|
100
|
-
*
|
|
122
|
+
* Citation with resolution metadata.
|
|
123
|
+
*
|
|
124
|
+
* Uses a distributive conditional type so that `resolution` is only
|
|
125
|
+
* meaningfully present on short-form citations (Id., supra, short-form case).
|
|
126
|
+
* On full citations, `resolution` is typed as `undefined`.
|
|
101
127
|
*/
|
|
102
|
-
type ResolvedCitation = Citation & {
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
*/
|
|
107
|
-
resolution?: ResolutionResult;
|
|
128
|
+
type ResolvedCitation<C extends Citation = Citation> = C extends ShortFormCitation ? C & {
|
|
129
|
+
resolution: ResolutionResult | undefined;
|
|
130
|
+
} : C & {
|
|
131
|
+
resolution?: undefined;
|
|
108
132
|
};
|
|
109
133
|
//#endregion
|
|
110
134
|
//#region src/extract/extractCitations.d.ts
|
|
@@ -230,7 +254,10 @@ interface ExtractOptions {
|
|
|
230
254
|
* // citations[2].type === "journal"
|
|
231
255
|
* ```
|
|
232
256
|
*/
|
|
233
|
-
declare function extractCitations(text: string, options
|
|
257
|
+
declare function extractCitations(text: string, options: ExtractOptions & {
|
|
258
|
+
resolve: true;
|
|
259
|
+
}): ResolvedCitation[];
|
|
260
|
+
declare function extractCitations(text: string, options?: ExtractOptions): Citation[];
|
|
234
261
|
/**
|
|
235
262
|
* Asynchronous version of extractCitations().
|
|
236
263
|
*
|
|
@@ -253,7 +280,10 @@ declare function extractCitations(text: string, options?: ExtractOptions): Citat
|
|
|
253
280
|
* // Returns ResolvedCitation[] with resolution metadata
|
|
254
281
|
* ```
|
|
255
282
|
*/
|
|
256
|
-
declare function extractCitationsAsync(text: string, options
|
|
283
|
+
declare function extractCitationsAsync(text: string, options: ExtractOptions & {
|
|
284
|
+
resolve: true;
|
|
285
|
+
}): Promise<ResolvedCitation[]>;
|
|
286
|
+
declare function extractCitationsAsync(text: string, options?: ExtractOptions): Promise<Citation[]>;
|
|
257
287
|
//#endregion
|
|
258
288
|
//#region src/clean/cleanText.d.ts
|
|
259
289
|
/**
|
|
@@ -268,17 +298,6 @@ interface CleanTextResult {
|
|
|
268
298
|
warnings: Warning[];
|
|
269
299
|
}
|
|
270
300
|
/**
|
|
271
|
-
* Warning generated during text cleaning.
|
|
272
|
-
*/
|
|
273
|
-
interface Warning {
|
|
274
|
-
level: "error" | "warning" | "info";
|
|
275
|
-
message: string;
|
|
276
|
-
position: {
|
|
277
|
-
start: number;
|
|
278
|
-
end: number;
|
|
279
|
-
};
|
|
280
|
-
}
|
|
281
|
-
/**
|
|
282
301
|
* Clean text using a pipeline of transformation functions.
|
|
283
302
|
*
|
|
284
303
|
* Applies cleaners sequentially while maintaining accurate position mappings
|
|
@@ -404,7 +423,7 @@ declare function tokenize(cleanedText: string, patterns?: Pattern[]): Token[];
|
|
|
404
423
|
* // }
|
|
405
424
|
* ```
|
|
406
425
|
*/
|
|
407
|
-
declare function extractCase(token: Token, transformationMap: TransformationMap): FullCaseCitation;
|
|
426
|
+
declare function extractCase(token: Token, transformationMap: TransformationMap, cleanedText?: string): FullCaseCitation;
|
|
408
427
|
//#endregion
|
|
409
428
|
//#region src/extract/extractStatute.d.ts
|
|
410
429
|
/**
|
|
@@ -597,6 +616,9 @@ declare function extractPublicLaw(token: Token, transformationMap: Transformatio
|
|
|
597
616
|
*/
|
|
598
617
|
declare function extractFederalRegister(token: Token, transformationMap: TransformationMap): FederalRegisterCitation;
|
|
599
618
|
//#endregion
|
|
619
|
+
//#region src/extract/extractStatutesAtLarge.d.ts
|
|
620
|
+
declare function extractStatutesAtLarge(token: Token, transformationMap: TransformationMap): StatutesAtLargeCitation;
|
|
621
|
+
//#endregion
|
|
600
622
|
//#region src/resolve/DocumentResolver.d.ts
|
|
601
623
|
/**
|
|
602
624
|
* Document-scoped resolver that processes citations sequentially
|
|
@@ -634,10 +656,6 @@ declare class DocumentResolver {
|
|
|
634
656
|
*/
|
|
635
657
|
private resolveShortFormCase;
|
|
636
658
|
/**
|
|
637
|
-
* Checks if a citation is a full citation (not short-form).
|
|
638
|
-
*/
|
|
639
|
-
private isFullCitation;
|
|
640
|
-
/**
|
|
641
659
|
* Tracks a full citation in the resolution history.
|
|
642
660
|
* Extracts party name for supra resolution.
|
|
643
661
|
*/
|
|
@@ -648,6 +666,12 @@ declare class DocumentResolver {
|
|
|
648
666
|
*/
|
|
649
667
|
private extractPartyName;
|
|
650
668
|
/**
|
|
669
|
+
* Strips citation signal words that may precede party names.
|
|
670
|
+
* E.g., "In Smith" → "Smith", "See Also Jones" → "Jones"
|
|
671
|
+
* Preserves "In re" which is a case name format, not a signal word.
|
|
672
|
+
*/
|
|
673
|
+
private stripSignalWords;
|
|
674
|
+
/**
|
|
651
675
|
* Normalizes party name for matching.
|
|
652
676
|
*/
|
|
653
677
|
private normalizePartyName;
|
|
@@ -678,5 +702,5 @@ declare class DocumentResolver {
|
|
|
678
702
|
*/
|
|
679
703
|
declare function resolveCitations(citations: Citation[], text: string, options?: ResolutionOptions): ResolvedCitation[];
|
|
680
704
|
//#endregion
|
|
681
|
-
export { type Citation, type CitationBase, type CitationType, type CleanTextResult, DocumentResolver, type ExtractOptions, type FederalRegisterCitation, type FullCaseCitation, type IdCitation, type JournalCitation, type NeutralCitation, type PublicLawCitation, type ResolutionOptions, type ResolutionResult, type ResolvedCitation, type ScopeStrategy, type ShortFormCaseCitation, type Span, type StatuteCitation, type SupraCitation, type Token, type TransformationMap, type Warning, cleanText, extractCase, extractCitations, extractCitationsAsync, extractFederalRegister, extractJournal, extractNeutral, extractPublicLaw, extractStatute, resolveCitations, tokenize };
|
|
705
|
+
export { type Citation, type CitationBase, type CitationOfType, type CitationType, type CleanTextResult, DocumentResolver, type ExtractOptions, type ExtractorMap, type FederalRegisterCitation, type FullCaseCitation, type FullCitation, type FullCitationType, type IdCitation, type JournalCitation, type NeutralCitation, type PublicLawCitation, type ResolutionOptions, type ResolutionResult, type ResolvedCitation, type ScopeStrategy, type ShortFormCaseCitation, type ShortFormCitation, type ShortFormCitationType, type Span, type StatuteCitation, type StatutesAtLargeCitation, type SupraCitation, type Token, type TransformationMap, type Warning, assertUnreachable, cleanText, extractCase, extractCitations, extractCitationsAsync, extractFederalRegister, extractJournal, extractNeutral, extractPublicLaw, extractStatute, extractStatutesAtLarge, isCaseCitation, isCitationType, isFullCitation, isShortFormCitation, resolveCitations, tokenize };
|
|
682
706
|
//# sourceMappingURL=index.d.cts.map
|
package/dist/index.d.cts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.cts","names":[],"sources":["../src/patterns/casePatterns.ts","../src/resolve/types.ts","../src/extract/extractCitations.ts","../src/clean/cleanText.ts","../src/tokenize/tokenizer.ts","../src/extract/extractCase.ts","../src/extract/extractStatute.ts","../src/extract/extractJournal.ts","../src/extract/extractNeutral.ts","../src/extract/extractPublicLaw.ts","../src/extract/extractFederalRegister.ts","../src/resolve/DocumentResolver.ts","../src/resolve/index.ts"],"mappings":";;;;;;
|
|
1
|
+
{"version":3,"file":"index.d.cts","names":[],"sources":["../src/types/guards.ts","../src/patterns/casePatterns.ts","../src/resolve/types.ts","../src/extract/extractCitations.ts","../src/clean/cleanText.ts","../src/tokenize/tokenizer.ts","../src/extract/extractCase.ts","../src/extract/extractStatute.ts","../src/extract/extractJournal.ts","../src/extract/extractNeutral.ts","../src/extract/extractPublicLaw.ts","../src/extract/extractFederalRegister.ts","../src/extract/extractStatutesAtLarge.ts","../src/resolve/DocumentResolver.ts","../src/resolve/index.ts"],"mappings":";;;;;AAKA;iBAAgB,cAAA,CAAe,QAAA,EAAU,QAAA,GAAW,QAAA,IAAY,YAAA;;;;iBAahD,mBAAA,CAAoB,QAAA,EAAU,QAAA,GAAW,QAAA,IAAY,iBAAA;;;;iBASrD,cAAA,CAAe,QAAA,EAAU,QAAA,GAAW,QAAA,IAAY,gBAAA;AAThE;;;;AAAA,iBAiBgB,cAAA,WAAyB,YAAA,CAAA,CACvC,QAAA,EAAU,QAAA,EACV,IAAA,EAAM,CAAA,GACL,QAAA,IAAY,cAAA,CAAe,CAAA;;;;;;AAX9B;;;;;;;;;;AAQA;iBAuBgB,iBAAA,CAAkB,CAAA;;;UC1CjB,OAAA;EACf,EAAA;EACA,KAAA,EAAO,MAAA;EACP,WAAA;EACA,IAAA,EAAM,gBAAA;AAAA;;;;;;;KCPI,aAAA;;AFKZ;;UEAiB,iBAAA;EFAoD;;;;;;;EEQnE,aAAA,GAAgB,aAAA;EFCF;;;;EEKd,oBAAA;;;;;EAMA,wBAAA,GAA2B,MAAA;EFHb;;;;EESd,kBAAA;;;;;;EAOA,mBAAA;;;;;;EAOA,qBAAA;;;;AFAF;EEME,gBAAA;AAAA;;;;UAMe,gBAAA;;ADtDjB;;;EC2DE,UAAA;;;;EAKA,aAAA;;;;EAKA,QAAA;;;;AAxEF;EA8EE,UAAA;AAAA;;;AAzEF;;;;;KAmFY,gBAAA,WAA2B,QAAA,GAAW,QAAA,IAChD,CAAA,SAAU,iBAAA,GACN,CAAA;EAAM,UAAA,EAAY,gBAAA;AAAA,IAClB,CAAA;EAAM,UAAA;AAAA;;;AFnGZ;;;AAAA,UGmCiB,cAAA;;;;;;;AHtBjB;;;;;;;;EGqCC,QAAA,GAAW,KAAA,EAAO,IAAA;EHrCkD;AASrE;;;;;;;;;;AAQA;;;EGoCC,QAAA,GAAW,OAAA;;;;;;;;;;;;;;EAeX,OAAA;;;;AH5BD;;;;;;;;AC1CA;;;EEsFC,iBAAA,GAAoB,iBAAA;AAAA;;;;;;;;;;;ADzFrB;;;;;AAKA;;;;;;;;;;;;;;;AAoDA;;;;;;;;;;AA+BA;;;;;;;;;;;;;;;;;;iBC8DgB,gBAAA,CAAiB,IAAA,UAAc,OAAA,EAAS,cAAA;EAAmB,OAAA;AAAA,IAAkB,gBAAA;AAAA,iBAC7E,gBAAA,CAAiB,IAAA,UAAc,OAAA,GAAU,cAAA,GAAiB,QAAA;;;;;;AA5H1E;;;;;;;;;;;;;;;;;iBAoPsB,qBAAA,CAAsB,IAAA,UAAc,OAAA,EAAS,cAAA;EAAmB,OAAA;AAAA,IAAkB,OAAA,CAAQ,gBAAA;AAAA,iBAC1F,qBAAA,CAAsB,IAAA,UAAc,OAAA,GAAU,cAAA,GAAiB,OAAA,CAAQ,QAAA;;;;AHxR7F;;UIOiB,eAAA;EJP+C;EIS/D,OAAA;;EAGA,iBAAA,EAAmB,iBAAA;;EAGnB,QAAA,EAAU,OAAA;AAAA;AJFX;;;;;;;;;;AASA;;;;;;AATA,iBIqBgB,SAAA,CACf,QAAA,UACA,QAAA,GAAU,KAAA,EAAO,IAAA,uBAMf,eAAA;;;;;;;;;AJpBH;UKMiB,KAAA;;EAEf,IAAA;;EAGA,IAAA,EAAM,IAAA,CAAK,IAAA;;EAGX,IAAA,EAAM,OAAA;;EAGN,SAAA;AAAA;;;;;;;;;;;;;;;;;;;;;ALcF;;;;;;;;AC1CA;;;;;;;;iBImEgB,QAAA,CACd,WAAA,UACA,QAAA,GAAU,OAAA,KAOT,KAAA;;;;;;;;ALjEH;;;;;;;;;;AAQA;;;;;;;;;;;;;;;;;;;;;AAuBA;;;;;;;;AC1CA;;;;;;;iBKkFgB,WAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,EACnB,WAAA,YACE,gBAAA;;;;;ANpFH;;;;;;;;;;AASA;;;;;;;;;;AAQA;;;;;;;;;;;;;;iBOcgB,cAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,eAAA;;;;;APlCH;;;;;;;;;;AASA;;;;;;;;;;AAQA;;;;;;;;;;;;;;;;iBQgBgB,cAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,eAAA;;;;;ARpCH;;;;;;;;;;AASA;;;;;;;;;;AAQA;;;;;;;;;;;;iBSYgB,cAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,eAAA;;;;;AThCH;;;;;;;;;;AASA;;;;;;;;;;AAQA;;;;;;;;;;;;;iBUagB,gBAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,iBAAA;;;;;AVjCH;;;;;;;;;;AASA;;;;;;;;;;AAQA;;;;;;;;;;;iBWWgB,sBAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,uBAAA;;;iBCjCa,sBAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,uBAAA;;;AZDH;;;;AAAA,caea,gBAAA;EAAA,iBACM,SAAA;EAAA,iBACA,IAAA;EAAA,iBACA,OAAA;EAAA,iBACA,OAAA;EbnBkD;AASrE;;;;;;EamBE,WAAA,CACE,SAAA,EAAW,QAAA,IACX,IAAA,UACA,OAAA,GAAS,iBAAA;;;;Abdb;;EasDE,OAAA,CAAA,GAAW,gBAAA;;;;UA2CH,SAAA;;;;UAgCA,YAAA;;;;UAiDA,oBAAA;;;;;UAqCA,iBAAA;EbpNoB;AAoB9B;;;EApB8B,QamOpB,gBAAA;Eb/MwB;;;;AC1ClC;ED0CkC,Qa6OxB,gBAAA;;;;UASA,kBAAA;;;;UAUA,iBAAA;;;;UAUA,aAAA;;AXvTV;;UWmUU,mBAAA;AAAA;;;;;;AbrTV;;;;;;;iBcMgB,gBAAA,CACd,SAAA,EAAW,QAAA,IACX,IAAA,UACA,OAAA,GAAU,iBAAA,GACT,gBAAA"}
|
package/dist/index.d.mts
CHANGED
|
@@ -1,24 +1,47 @@
|
|
|
1
|
-
import { a as
|
|
1
|
+
import { S as TransformationMap, _ as StatuteCitation, a as ExtractorMap, b as Warning, c as FullCitation, d as JournalCitation, f as NeutralCitation, g as ShortFormCitationType, h as ShortFormCitation, i as CitationType, l as FullCitationType, m as ShortFormCaseCitation, n as CitationBase, o as FederalRegisterCitation, p as PublicLawCitation, r as CitationOfType, s as FullCaseCitation, t as Citation, u as IdCitation, v as StatutesAtLargeCitation, x as Span, y as SupraCitation } from "./citation-DAyM8kNA.mjs";
|
|
2
2
|
|
|
3
|
-
//#region src/
|
|
3
|
+
//#region src/types/guards.d.ts
|
|
4
|
+
/**
|
|
5
|
+
* Type guard: narrows Citation to a full citation (case, statute, journal, neutral, publicLaw, federalRegister).
|
|
6
|
+
*/
|
|
7
|
+
declare function isFullCitation(citation: Citation): citation is FullCitation;
|
|
8
|
+
/**
|
|
9
|
+
* Type guard: narrows Citation to a short-form citation (id, supra, shortFormCase).
|
|
10
|
+
*/
|
|
11
|
+
declare function isShortFormCitation(citation: Citation): citation is ShortFormCitation;
|
|
12
|
+
/**
|
|
13
|
+
* Type guard: narrows Citation to a full case citation.
|
|
14
|
+
*/
|
|
15
|
+
declare function isCaseCitation(citation: Citation): citation is FullCaseCitation;
|
|
4
16
|
/**
|
|
5
|
-
*
|
|
17
|
+
* Generic type guard that narrows a Citation to a specific type.
|
|
18
|
+
* Useful when the target type is dynamic or generic.
|
|
19
|
+
*/
|
|
20
|
+
declare function isCitationType<T extends CitationType>(citation: Citation, type: T): citation is CitationOfType<T>;
|
|
21
|
+
/**
|
|
22
|
+
* Exhaustiveness helper for switch statements on discriminated unions.
|
|
6
23
|
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
* Metadata parsing and validation against reporters-db happens in Phase 2 Plan 5 (extraction layer).
|
|
24
|
+
* Place in the `default` branch to get a compile-time error if a new
|
|
25
|
+
* variant is added but not handled.
|
|
10
26
|
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
27
|
+
* @example
|
|
28
|
+
* ```typescript
|
|
29
|
+
* switch (citation.type) {
|
|
30
|
+
* case 'case': ...
|
|
31
|
+
* case 'statute': ...
|
|
32
|
+
* // If you forget a variant, TypeScript errors here:
|
|
33
|
+
* default: assertUnreachable(citation.type)
|
|
34
|
+
* }
|
|
35
|
+
* ```
|
|
16
36
|
*/
|
|
37
|
+
declare function assertUnreachable(x: never): never;
|
|
38
|
+
//#endregion
|
|
39
|
+
//#region src/patterns/casePatterns.d.ts
|
|
17
40
|
interface Pattern {
|
|
18
41
|
id: string;
|
|
19
42
|
regex: RegExp;
|
|
20
43
|
description: string;
|
|
21
|
-
type:
|
|
44
|
+
type: FullCitationType;
|
|
22
45
|
}
|
|
23
46
|
//#endregion
|
|
24
47
|
//#region src/resolve/types.d.ts
|
|
@@ -96,15 +119,16 @@ interface ResolutionResult {
|
|
|
96
119
|
confidence: number;
|
|
97
120
|
}
|
|
98
121
|
/**
|
|
99
|
-
* Citation with
|
|
100
|
-
*
|
|
122
|
+
* Citation with resolution metadata.
|
|
123
|
+
*
|
|
124
|
+
* Uses a distributive conditional type so that `resolution` is only
|
|
125
|
+
* meaningfully present on short-form citations (Id., supra, short-form case).
|
|
126
|
+
* On full citations, `resolution` is typed as `undefined`.
|
|
101
127
|
*/
|
|
102
|
-
type ResolvedCitation = Citation & {
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
*/
|
|
107
|
-
resolution?: ResolutionResult;
|
|
128
|
+
type ResolvedCitation<C extends Citation = Citation> = C extends ShortFormCitation ? C & {
|
|
129
|
+
resolution: ResolutionResult | undefined;
|
|
130
|
+
} : C & {
|
|
131
|
+
resolution?: undefined;
|
|
108
132
|
};
|
|
109
133
|
//#endregion
|
|
110
134
|
//#region src/extract/extractCitations.d.ts
|
|
@@ -230,7 +254,10 @@ interface ExtractOptions {
|
|
|
230
254
|
* // citations[2].type === "journal"
|
|
231
255
|
* ```
|
|
232
256
|
*/
|
|
233
|
-
declare function extractCitations(text: string, options
|
|
257
|
+
declare function extractCitations(text: string, options: ExtractOptions & {
|
|
258
|
+
resolve: true;
|
|
259
|
+
}): ResolvedCitation[];
|
|
260
|
+
declare function extractCitations(text: string, options?: ExtractOptions): Citation[];
|
|
234
261
|
/**
|
|
235
262
|
* Asynchronous version of extractCitations().
|
|
236
263
|
*
|
|
@@ -253,7 +280,10 @@ declare function extractCitations(text: string, options?: ExtractOptions): Citat
|
|
|
253
280
|
* // Returns ResolvedCitation[] with resolution metadata
|
|
254
281
|
* ```
|
|
255
282
|
*/
|
|
256
|
-
declare function extractCitationsAsync(text: string, options
|
|
283
|
+
declare function extractCitationsAsync(text: string, options: ExtractOptions & {
|
|
284
|
+
resolve: true;
|
|
285
|
+
}): Promise<ResolvedCitation[]>;
|
|
286
|
+
declare function extractCitationsAsync(text: string, options?: ExtractOptions): Promise<Citation[]>;
|
|
257
287
|
//#endregion
|
|
258
288
|
//#region src/clean/cleanText.d.ts
|
|
259
289
|
/**
|
|
@@ -268,17 +298,6 @@ interface CleanTextResult {
|
|
|
268
298
|
warnings: Warning[];
|
|
269
299
|
}
|
|
270
300
|
/**
|
|
271
|
-
* Warning generated during text cleaning.
|
|
272
|
-
*/
|
|
273
|
-
interface Warning {
|
|
274
|
-
level: "error" | "warning" | "info";
|
|
275
|
-
message: string;
|
|
276
|
-
position: {
|
|
277
|
-
start: number;
|
|
278
|
-
end: number;
|
|
279
|
-
};
|
|
280
|
-
}
|
|
281
|
-
/**
|
|
282
301
|
* Clean text using a pipeline of transformation functions.
|
|
283
302
|
*
|
|
284
303
|
* Applies cleaners sequentially while maintaining accurate position mappings
|
|
@@ -404,7 +423,7 @@ declare function tokenize(cleanedText: string, patterns?: Pattern[]): Token[];
|
|
|
404
423
|
* // }
|
|
405
424
|
* ```
|
|
406
425
|
*/
|
|
407
|
-
declare function extractCase(token: Token, transformationMap: TransformationMap): FullCaseCitation;
|
|
426
|
+
declare function extractCase(token: Token, transformationMap: TransformationMap, cleanedText?: string): FullCaseCitation;
|
|
408
427
|
//#endregion
|
|
409
428
|
//#region src/extract/extractStatute.d.ts
|
|
410
429
|
/**
|
|
@@ -597,6 +616,9 @@ declare function extractPublicLaw(token: Token, transformationMap: Transformatio
|
|
|
597
616
|
*/
|
|
598
617
|
declare function extractFederalRegister(token: Token, transformationMap: TransformationMap): FederalRegisterCitation;
|
|
599
618
|
//#endregion
|
|
619
|
+
//#region src/extract/extractStatutesAtLarge.d.ts
|
|
620
|
+
declare function extractStatutesAtLarge(token: Token, transformationMap: TransformationMap): StatutesAtLargeCitation;
|
|
621
|
+
//#endregion
|
|
600
622
|
//#region src/resolve/DocumentResolver.d.ts
|
|
601
623
|
/**
|
|
602
624
|
* Document-scoped resolver that processes citations sequentially
|
|
@@ -634,10 +656,6 @@ declare class DocumentResolver {
|
|
|
634
656
|
*/
|
|
635
657
|
private resolveShortFormCase;
|
|
636
658
|
/**
|
|
637
|
-
* Checks if a citation is a full citation (not short-form).
|
|
638
|
-
*/
|
|
639
|
-
private isFullCitation;
|
|
640
|
-
/**
|
|
641
659
|
* Tracks a full citation in the resolution history.
|
|
642
660
|
* Extracts party name for supra resolution.
|
|
643
661
|
*/
|
|
@@ -648,6 +666,12 @@ declare class DocumentResolver {
|
|
|
648
666
|
*/
|
|
649
667
|
private extractPartyName;
|
|
650
668
|
/**
|
|
669
|
+
* Strips citation signal words that may precede party names.
|
|
670
|
+
* E.g., "In Smith" → "Smith", "See Also Jones" → "Jones"
|
|
671
|
+
* Preserves "In re" which is a case name format, not a signal word.
|
|
672
|
+
*/
|
|
673
|
+
private stripSignalWords;
|
|
674
|
+
/**
|
|
651
675
|
* Normalizes party name for matching.
|
|
652
676
|
*/
|
|
653
677
|
private normalizePartyName;
|
|
@@ -678,5 +702,5 @@ declare class DocumentResolver {
|
|
|
678
702
|
*/
|
|
679
703
|
declare function resolveCitations(citations: Citation[], text: string, options?: ResolutionOptions): ResolvedCitation[];
|
|
680
704
|
//#endregion
|
|
681
|
-
export { type Citation, type CitationBase, type CitationType, type CleanTextResult, DocumentResolver, type ExtractOptions, type FederalRegisterCitation, type FullCaseCitation, type IdCitation, type JournalCitation, type NeutralCitation, type PublicLawCitation, type ResolutionOptions, type ResolutionResult, type ResolvedCitation, type ScopeStrategy, type ShortFormCaseCitation, type Span, type StatuteCitation, type SupraCitation, type Token, type TransformationMap, type Warning, cleanText, extractCase, extractCitations, extractCitationsAsync, extractFederalRegister, extractJournal, extractNeutral, extractPublicLaw, extractStatute, resolveCitations, tokenize };
|
|
705
|
+
export { type Citation, type CitationBase, type CitationOfType, type CitationType, type CleanTextResult, DocumentResolver, type ExtractOptions, type ExtractorMap, type FederalRegisterCitation, type FullCaseCitation, type FullCitation, type FullCitationType, type IdCitation, type JournalCitation, type NeutralCitation, type PublicLawCitation, type ResolutionOptions, type ResolutionResult, type ResolvedCitation, type ScopeStrategy, type ShortFormCaseCitation, type ShortFormCitation, type ShortFormCitationType, type Span, type StatuteCitation, type StatutesAtLargeCitation, type SupraCitation, type Token, type TransformationMap, type Warning, assertUnreachable, cleanText, extractCase, extractCitations, extractCitationsAsync, extractFederalRegister, extractJournal, extractNeutral, extractPublicLaw, extractStatute, extractStatutesAtLarge, isCaseCitation, isCitationType, isFullCitation, isShortFormCitation, resolveCitations, tokenize };
|
|
682
706
|
//# sourceMappingURL=index.d.mts.map
|
package/dist/index.d.mts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.mts","names":[],"sources":["../src/patterns/casePatterns.ts","../src/resolve/types.ts","../src/extract/extractCitations.ts","../src/clean/cleanText.ts","../src/tokenize/tokenizer.ts","../src/extract/extractCase.ts","../src/extract/extractStatute.ts","../src/extract/extractJournal.ts","../src/extract/extractNeutral.ts","../src/extract/extractPublicLaw.ts","../src/extract/extractFederalRegister.ts","../src/resolve/DocumentResolver.ts","../src/resolve/index.ts"],"mappings":";;;;;;
|
|
1
|
+
{"version":3,"file":"index.d.mts","names":[],"sources":["../src/types/guards.ts","../src/patterns/casePatterns.ts","../src/resolve/types.ts","../src/extract/extractCitations.ts","../src/clean/cleanText.ts","../src/tokenize/tokenizer.ts","../src/extract/extractCase.ts","../src/extract/extractStatute.ts","../src/extract/extractJournal.ts","../src/extract/extractNeutral.ts","../src/extract/extractPublicLaw.ts","../src/extract/extractFederalRegister.ts","../src/extract/extractStatutesAtLarge.ts","../src/resolve/DocumentResolver.ts","../src/resolve/index.ts"],"mappings":";;;;;AAKA;iBAAgB,cAAA,CAAe,QAAA,EAAU,QAAA,GAAW,QAAA,IAAY,YAAA;;;;iBAahD,mBAAA,CAAoB,QAAA,EAAU,QAAA,GAAW,QAAA,IAAY,iBAAA;;;;iBASrD,cAAA,CAAe,QAAA,EAAU,QAAA,GAAW,QAAA,IAAY,gBAAA;AAThE;;;;AAAA,iBAiBgB,cAAA,WAAyB,YAAA,CAAA,CACvC,QAAA,EAAU,QAAA,EACV,IAAA,EAAM,CAAA,GACL,QAAA,IAAY,cAAA,CAAe,CAAA;;;;;;AAX9B;;;;;;;;;;AAQA;iBAuBgB,iBAAA,CAAkB,CAAA;;;UC1CjB,OAAA;EACf,EAAA;EACA,KAAA,EAAO,MAAA;EACP,WAAA;EACA,IAAA,EAAM,gBAAA;AAAA;;;;;;;KCPI,aAAA;;AFKZ;;UEAiB,iBAAA;EFAoD;;;;;;;EEQnE,aAAA,GAAgB,aAAA;EFCF;;;;EEKd,oBAAA;;;;;EAMA,wBAAA,GAA2B,MAAA;EFHb;;;;EESd,kBAAA;;;;;;EAOA,mBAAA;;;;;;EAOA,qBAAA;;;;AFAF;EEME,gBAAA;AAAA;;;;UAMe,gBAAA;;ADtDjB;;;EC2DE,UAAA;;;;EAKA,aAAA;;;;EAKA,QAAA;;;;AAxEF;EA8EE,UAAA;AAAA;;;AAzEF;;;;;KAmFY,gBAAA,WAA2B,QAAA,GAAW,QAAA,IAChD,CAAA,SAAU,iBAAA,GACN,CAAA;EAAM,UAAA,EAAY,gBAAA;AAAA,IAClB,CAAA;EAAM,UAAA;AAAA;;;AFnGZ;;;AAAA,UGmCiB,cAAA;;;;;;;AHtBjB;;;;;;;;EGqCC,QAAA,GAAW,KAAA,EAAO,IAAA;EHrCkD;AASrE;;;;;;;;;;AAQA;;;EGoCC,QAAA,GAAW,OAAA;;;;;;;;;;;;;;EAeX,OAAA;;;;AH5BD;;;;;;;;AC1CA;;;EEsFC,iBAAA,GAAoB,iBAAA;AAAA;;;;;;;;;;;ADzFrB;;;;;AAKA;;;;;;;;;;;;;;;AAoDA;;;;;;;;;;AA+BA;;;;;;;;;;;;;;;;;;iBC8DgB,gBAAA,CAAiB,IAAA,UAAc,OAAA,EAAS,cAAA;EAAmB,OAAA;AAAA,IAAkB,gBAAA;AAAA,iBAC7E,gBAAA,CAAiB,IAAA,UAAc,OAAA,GAAU,cAAA,GAAiB,QAAA;;;;;;AA5H1E;;;;;;;;;;;;;;;;;iBAoPsB,qBAAA,CAAsB,IAAA,UAAc,OAAA,EAAS,cAAA;EAAmB,OAAA;AAAA,IAAkB,OAAA,CAAQ,gBAAA;AAAA,iBAC1F,qBAAA,CAAsB,IAAA,UAAc,OAAA,GAAU,cAAA,GAAiB,OAAA,CAAQ,QAAA;;;;AHxR7F;;UIOiB,eAAA;EJP+C;EIS/D,OAAA;;EAGA,iBAAA,EAAmB,iBAAA;;EAGnB,QAAA,EAAU,OAAA;AAAA;AJFX;;;;;;;;;;AASA;;;;;;AATA,iBIqBgB,SAAA,CACf,QAAA,UACA,QAAA,GAAU,KAAA,EAAO,IAAA,uBAMf,eAAA;;;;;;;;;AJpBH;UKMiB,KAAA;;EAEf,IAAA;;EAGA,IAAA,EAAM,IAAA,CAAK,IAAA;;EAGX,IAAA,EAAM,OAAA;;EAGN,SAAA;AAAA;;;;;;;;;;;;;;;;;;;;;ALcF;;;;;;;;AC1CA;;;;;;;;iBImEgB,QAAA,CACd,WAAA,UACA,QAAA,GAAU,OAAA,KAOT,KAAA;;;;;;;;ALjEH;;;;;;;;;;AAQA;;;;;;;;;;;;;;;;;;;;;AAuBA;;;;;;;;AC1CA;;;;;;;iBKkFgB,WAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,EACnB,WAAA,YACE,gBAAA;;;;;ANpFH;;;;;;;;;;AASA;;;;;;;;;;AAQA;;;;;;;;;;;;;;iBOcgB,cAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,eAAA;;;;;APlCH;;;;;;;;;;AASA;;;;;;;;;;AAQA;;;;;;;;;;;;;;;;iBQgBgB,cAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,eAAA;;;;;ARpCH;;;;;;;;;;AASA;;;;;;;;;;AAQA;;;;;;;;;;;;iBSYgB,cAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,eAAA;;;;;AThCH;;;;;;;;;;AASA;;;;;;;;;;AAQA;;;;;;;;;;;;;iBUagB,gBAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,iBAAA;;;;;AVjCH;;;;;;;;;;AASA;;;;;;;;;;AAQA;;;;;;;;;;;iBWWgB,sBAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,uBAAA;;;iBCjCa,sBAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,uBAAA;;;AZDH;;;;AAAA,caea,gBAAA;EAAA,iBACM,SAAA;EAAA,iBACA,IAAA;EAAA,iBACA,OAAA;EAAA,iBACA,OAAA;EbnBkD;AASrE;;;;;;EamBE,WAAA,CACE,SAAA,EAAW,QAAA,IACX,IAAA,UACA,OAAA,GAAS,iBAAA;;;;Abdb;;EasDE,OAAA,CAAA,GAAW,gBAAA;;;;UA2CH,SAAA;;;;UAgCA,YAAA;;;;UAiDA,oBAAA;;;;;UAqCA,iBAAA;EbpNoB;AAoB9B;;;EApB8B,QamOpB,gBAAA;Eb/MwB;;;;AC1ClC;ED0CkC,Qa6OxB,gBAAA;;;;UASA,kBAAA;;;;UAUA,iBAAA;;;;UAUA,aAAA;;AXvTV;;UWmUU,mBAAA;AAAA;;;;;;AbrTV;;;;;;;iBcMgB,gBAAA,CACd,SAAA,EAAW,QAAA,IACX,IAAA,UACA,OAAA,GAAU,iBAAA,GACT,gBAAA"}
|
package/dist/index.mjs
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
function e(e){return e.replace(/<[^>]+>/g,``)}function t(e){return e.replace(/[\t\n\r]+/g,` `).replace(/ {2,}/g,` `)}function n(e){return e.normalize(`NFKC`)}function r(e){return e.replace(/[\u201C\u201D]/g,`"`).replace(/[\u2018\u2019]/g,`'`)}function i(i,o=[e,t,n,r]){let s=i,c=new Map,l=new Map;for(let e=0;e<=i.length;e++)c.set(e,e),l.set(e,e);for(let e of o){let t=s,n=e(s);if(t!==n){let{newCleanToOriginal:e,newOriginalToClean:r}=a(t,n,c,l);c=e,l=r,s=n}}return{cleaned:s,transformationMap:{cleanToOriginal:c,originalToClean:l},warnings:[]}}function a(e,t,n,r){let i=new Map,a=new Map,o=0,s=0;for(;o<=e.length||s<=t.length;){if(o>=e.length&&s>=t.length){let e=n.get(o)??o;i.set(s,e),a.set(e,s);break}if(o>=e.length){let e=n.get(o)??o;i.set(s,e),s++;continue}if(s>=t.length){let e=n.get(o)??o;a.set(e,s),o++;continue}if(e[o]===t[s]){let e=n.get(o)??o;i.set(s,e),a.set(e,s),o++,s++}else{let r=!1;for(let i=1;i<=20&&!(o+i>=e.length);i++)if(e[o+i]===t[s]){for(let e=0;e<i;e++){let t=n.get(o+e)??o+e;a.set(t,s)}o+=i,r=!0;break}if(r)continue;for(let a=1;a<=20&&!(s+a>=t.length);a++)if(e[o]===t[s+a]){let e=n.get(o)??o;for(let t=0;t<a;t++)i.set(s+t,e);s+=a,r=!0;break}if(r)continue;let c=n.get(o)??o;i.set(s,c),a.set(c,s),o++,s++}}return{newCleanToOriginal:i,newOriginalToClean:a}}const o=[{id:`federal-reporter`,regex:/\b(\d+)\s+(F\.|F\.2d|F\.3d|F\.\s?Supp\.|F\.\s?Supp\.\s?2d|F\.\s?Supp\.\s?3d)\s+(\d+)\b/g,description:`Federal Reporter (F., F.2d, F.3d, F.Supp., etc.)`,type:`case`},{id:`supreme-court`,regex:/\b(\d+)\s+(U\.S\.|S\.\s?Ct\.|L\.\s?Ed\.(?:\s?2d)?)\s+(\d+)\b/g,description:`U.S. Supreme Court reporters`,type:`case`},{id:`state-reporter`,regex:/\b(\d+)\s+([A-Z][A-Za-z\.]+(?:\s?2d|\s?3d)?)\s+(\d+)\b/g,description:`State reporters (broad pattern, validated against reporters-db in Phase 3)`,type:`case`}],s=[{id:`usc`,regex:/\b(\d+)\s+U\.S\.C\.?\s+§+\s*(\d+)\b/g,description:`U.S. Code citations (e.g., "42 U.S.C. § 1983")`,type:`statute`},{id:`state-code`,regex:/\b([A-Z][a-z]+\.?\s+[A-Za-z\.]+\s+Code)\s+§\s*(\d+)\b/g,description:`State code citations (broad pattern, e.g., "Cal. Penal Code § 187")`,type:`statute`}],c=[{id:`law-review`,regex:/\b(\d+)\s+([A-Z][A-Za-z\.\s]+)\s+(\d+)\b/g,description:`Law review citations (e.g., "120 Harv. L. Rev. 500"), validated against journals-db in Phase 3`,type:`journal`}],l=[{id:`westlaw`,regex:/\b(\d{4})\s+WL\s+(\d+)\b/g,description:`WestLaw citations (e.g., "2021 WL 123456")`,type:`neutral`},{id:`lexis`,regex:/\b(\d{4})\s+U\.S\.\s+LEXIS\s+(\d+)\b/g,description:`LexisNexis citations (e.g., "2021 U.S. LEXIS 5000")`,type:`neutral`},{id:`public-law`,regex:/\bPub\.\s?L\.\s?No\.\s?(\d+-\d+)\b/g,description:`Public Law citations (e.g., "Pub. L. No. 117-58")`,type:`publicLaw`},{id:`federal-register`,regex:/\b(\d+)\s+Fed\.\s?Reg\.\s+(\d+)\b/g,description:`Federal Register citations (e.g., "86 Fed. Reg. 12345")`,type:`federalRegister`}],u=[{id:`id`,regex:/\b[Ii]d\.(?:\s+at\s+(\d+))?/g,description:`Id. citations (e.g., "Id." or "Id. at 253")`,type:`case`},{id:`ibid`,regex:/\b[Ii]bid\.(?:\s+at\s+(\d+))?/g,description:`Ibid. citations (e.g., "Ibid." or "Ibid. at 125")`,type:`case`},{id:`supra`,regex:/\b([A-Z][a-zA-Z]+(?:(?:\s+v\.?\s+|\s+)[A-Z][a-zA-Z]+)*),?\s+supra(?:,?\s+at\s+(\d+))?/g,description:`Supra citations (e.g., "Smith, supra" or "Smith, supra, at 460")`,type:`case`},{id:`shortFormCase`,regex:/\b(\d+)\s+([A-Z][A-Za-z.\s]+?(?:\d[a-z])?)\s+at\s+(\d+)\b/g,description:`Short-form case citations (e.g., "500 F.2d at 125")`,type:`case`}];function d(e,t=[...o,...s,...c,...l,...u]){let n=[];for(let r of t)try{let t=e.matchAll(r.regex);for(let e of t)n.push({text:e[0],span:{cleanStart:e.index,cleanEnd:e.index+e[0].length},type:r.type,patternId:r.id})}catch(e){console.warn(`Pattern ${r.id} threw error, skipping:`,e instanceof Error?e.message:String(e));continue}return n.sort((e,t)=>e.span.cleanStart-t.span.cleanStart),n}function f(e,t){let{text:n,span:r}=e,i=/^(\d+)\s+([A-Za-z0-9.\s]+)\s+(\d+)/.exec(n);if(!i)throw Error(`Failed to parse case citation: ${n}`);let a=Number.parseInt(i[1],10),o=i[2].trim(),s=Number.parseInt(i[3],10),c=/,\s*(\d+)/.exec(n),l=c?Number.parseInt(c[1],10):void 0,u=/\((?:[^)]*\s)?(\d{4})\)/.exec(n),d=u?Number.parseInt(u[1],10):void 0,f=/\(([^)]*[A-Za-z][^)]*)\)/.exec(n),p=f?f[1].trim():void 0,m=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,h=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd,g=.5;return`F.,F.2d,F.3d,F.4th,U.S.,S. Ct.,L. Ed.,P.,P.2d,P.3d,A.,A.2d,A.3d,N.E.,N.E.2d,N.E.3d,N.W.,N.W.2d,S.E.,S.E.2d,S.W.,S.W.2d,S.W.3d,So.,So. 2d,So. 3d`.split(`,`).some(e=>o.includes(e))&&(g+=.3),d!==void 0&&d<=new Date().getFullYear()&&(g+=.2),g=Math.min(g,1),{type:`case`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:m,originalEnd:h},confidence:g,matchedText:n,processTimeMs:0,patternsChecked:1,volume:a,reporter:o,page:s,pincite:l,court:p,year:d}}function p(e,t){let{text:n,span:r}=e,i=/^(?:(\d+)\s+)?([A-Za-z.\s]+?)\s*§\s*(\d+[A-Za-z0-9\-]*)/.exec(n);if(!i)throw Error(`Failed to parse statute citation: ${n}`);let a=i[1]?Number.parseInt(i[1],10):void 0,o=i[2].trim(),s=i[3],c=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,l=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd,u=.5;return[`U.S.C.`,`C.F.R.`,`Cal. Civ. Code`,`Cal. Penal Code`,`N.Y. Civ. Prac. L. & R.`,`Tex. Civ. Prac. & Rem. Code`].some(e=>o.includes(e))&&(u+=.3),u=Math.min(u,1),{type:`statute`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:c,originalEnd:l},confidence:u,matchedText:n,processTimeMs:0,patternsChecked:1,title:a,code:o,section:s}}function m(e,t){let{text:n,span:r}=e,i=/^(\d+)\s+([A-Za-z.\s]+?)\s+(\d+)/.exec(n);if(!i)throw Error(`Failed to parse journal citation: ${n}`);let a=Number.parseInt(i[1],10),o=i[2].trim(),s=Number.parseInt(i[3],10),c=/,\s*(\d+)/.exec(n),l=c?Number.parseInt(c[1],10):void 0,u=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,d=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd;return{type:`journal`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:u,originalEnd:d},confidence:.6,matchedText:n,processTimeMs:0,patternsChecked:1,volume:a,journal:o,abbreviation:o,page:s,pincite:l}}function h(e,t){let{text:n,span:r}=e,i=/^(\d{4})\s+(WL|LEXIS|U\.S\.\s+LEXIS)\s+(\d+)/.exec(n);if(!i)throw Error(`Failed to parse neutral citation: ${n}`);let a=Number.parseInt(i[1],10),o=i[2],s=i[3],c=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,l=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd;return{type:`neutral`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:c,originalEnd:l},confidence:1,matchedText:n,processTimeMs:0,patternsChecked:1,year:a,court:o,documentNumber:s}}function g(e,t){let{text:n,span:r}=e,i=/Pub\.\s?L\.(?:\s?No\.)?\s?(\d+)-(\d+)/.exec(n);if(!i)throw Error(`Failed to parse public law citation: ${n}`);let a=Number.parseInt(i[1],10),o=Number.parseInt(i[2],10),s=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,c=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd;return{type:`publicLaw`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:s,originalEnd:c},confidence:.9,matchedText:n,processTimeMs:0,patternsChecked:1,congress:a,lawNumber:o}}function _(e,t){let{text:n,span:r}=e,i=/^(\d+)\s+Fed\.\s?Reg\.\s+(\d+)/.exec(n);if(!i)throw Error(`Failed to parse Federal Register citation: ${n}`);let a=Number.parseInt(i[1],10),o=Number.parseInt(i[2],10),s=/\((?:.*?\s)?(\d{4})\)/.exec(n),c=s?Number.parseInt(s[1],10):void 0,l=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,u=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd;return{type:`federalRegister`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:l,originalEnd:u},confidence:.9,matchedText:n,processTimeMs:0,patternsChecked:1,volume:a,page:o,year:c}}function v(e,t){let{text:n,span:r}=e,i=/[Ii](?:d|bid)\.(?:\s+at\s+(\d+))?/.exec(n);if(!i)throw Error(`Failed to parse Id. citation: ${n}`);let a=i[1]?Number.parseInt(i[1],10):void 0,o=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,s=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd;return{type:`id`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:o,originalEnd:s},confidence:1,matchedText:n,processTimeMs:0,patternsChecked:1,pincite:a}}function y(e,t){let{text:n,span:r}=e,i=/\b([A-Z][a-zA-Z]+(?:(?:\s+v\.?\s+|\s+)[A-Z][a-zA-Z]+)*),?\s+supra(?:,?\s+at\s+(\d+))?/.exec(n);if(!i)throw Error(`Failed to parse supra citation: ${n}`);let a=i[1],o=i[2]?Number.parseInt(i[2],10):void 0,s=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,c=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd;return{type:`supra`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:s,originalEnd:c},confidence:.9,matchedText:n,processTimeMs:0,patternsChecked:1,partyName:a,pincite:o}}function b(e,t){let{text:n,span:r}=e,i=/(\d+)\s+([A-Z][A-Za-z.\s]+?(?:\d[a-z])?)\s+at\s+(\d+)/.exec(n);if(!i)throw Error(`Failed to parse short-form case citation: ${n}`);let a=Number.parseInt(i[1],10),o=i[2].trim(),s=Number.parseInt(i[3],10),c=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,l=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd;return{type:`shortFormCase`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:c,originalEnd:l},confidence:.7,matchedText:n,processTimeMs:0,patternsChecked:1,volume:a,reporter:o,pincite:s}}function x(e,t,n=/\n\n+/g){let r=new Map,i=[0],a;for(;(a=n.exec(e))!==null;)i.push(a.index+a[0].length);i.push(e.length);for(let e=0;e<t.length;e++){let n=t[e].span.originalStart,a=0;for(let e=0;e<i.length-1;e++)if(n>=i[e]&&n<i[e+1]){a=e;break}r.set(e,a)}return r}function S(e,t,n,r){if(r===`none`)return!0;let i=n.get(e),a=n.get(t);return i===void 0||a===void 0?!0:i===a}function C(e,t){if(e.length===0)return t.length;if(t.length===0)return e.length;let n=Array.from({length:e.length+1},()=>Array(t.length+1).fill(0));for(let t=0;t<=e.length;t++)n[t][0]=t;for(let e=0;e<=t.length;e++)n[0][e]=e;for(let r=1;r<=e.length;r++)for(let i=1;i<=t.length;i++)e[r-1]===t[i-1]?n[r][i]=n[r-1][i-1]:n[r][i]=1+Math.min(n[r-1][i],n[r][i-1],n[r-1][i-1]);return n[e.length][t.length]}function w(e,t){let n=e.toLowerCase(),r=t.toLowerCase(),i=C(n,r),a=Math.max(n.length,r.length);return a===0?1:1-i/a}var T=class{constructor(e,t,n={}){this.citations=e,this.text=t,this.options={scopeStrategy:n.scopeStrategy??`paragraph`,autoDetectParagraphs:n.autoDetectParagraphs??!0,paragraphBoundaryPattern:n.paragraphBoundaryPattern??/\n\n+/g,fuzzyPartyMatching:n.fuzzyPartyMatching??!0,partyMatchThreshold:n.partyMatchThreshold??.8,allowNestedResolution:n.allowNestedResolution??!1,reportUnresolved:n.reportUnresolved??!0},this.context={citationIndex:0,allCitations:e,lastFullCitation:void 0,fullCitationHistory:new Map,paragraphMap:new Map},this.options.autoDetectParagraphs&&(this.context.paragraphMap=x(t,e,this.options.paragraphBoundaryPattern))}resolve(){let e=[];for(let t=0;t<this.citations.length;t++){this.context.citationIndex=t;let n=this.citations[t],r;switch(n.type){case`id`:r=this.resolveId(n);break;case`supra`:r=this.resolveSupra(n);break;case`shortFormCase`:r=this.resolveShortFormCase(n);break;default:this.isFullCitation(n)&&(this.context.lastFullCitation=t,this.trackFullCitation(n,t));break}e.push({...n,resolution:r})}return e}resolveId(e){let t=this.context.citationIndex,n;for(let e=t-1;e>=0;e--)if(this.citations[e].type===`case`){n=e;break}return n===void 0?this.createFailureResult(`No preceding full case citation found`):this.isWithinScope(n,t)?{resolvedTo:n,confidence:1}:this.createFailureResult(`Antecedent citation outside scope boundary`)}resolveSupra(e){let t=this.context.citationIndex,n=this.normalizePartyName(e.partyName),r;for(let[e,i]of this.context.fullCitationHistory){if(!this.isWithinScope(i,t))continue;let a=w(n,e);(!r||a>r.similarity)&&(r={index:i,similarity:a})}if(!r)return this.createFailureResult(`No full citation found in scope`);if(r.similarity<this.options.partyMatchThreshold)return this.createFailureResult(`Party name similarity ${r.similarity.toFixed(2)} below threshold ${this.options.partyMatchThreshold}`);let i=[];return r.similarity<1&&i.push(`Fuzzy match: similarity ${r.similarity.toFixed(2)}`),{resolvedTo:r.index,confidence:r.similarity,warnings:i.length>0?i:void 0}}resolveShortFormCase(e){let t=this.context.citationIndex;for(let n=t-1;n>=0;n--){let r=this.citations[n];if(r.type!==`case`)continue;let i=r;if(i.volume===e.volume&&this.normalizeReporter(i.reporter)===this.normalizeReporter(e.reporter))return this.isWithinScope(n,t)?{resolvedTo:n,confidence:.95}:this.createFailureResult(`Matching citation outside scope boundary`)}return this.createFailureResult(`No matching full case citation found`)}isFullCitation(e){return e.type===`case`||e.type===`statute`||e.type===`journal`||e.type===`neutral`||e.type===`publicLaw`||e.type===`federalRegister`}trackFullCitation(e,t){if(e.type===`case`){let n=e,r=this.extractPartyName(n);if(r){let e=this.normalizePartyName(r);this.context.fullCitationHistory.set(e,t)}}}extractPartyName(e){let t=e.span.originalStart,n=Math.max(0,t-100),r=this.text.substring(n,t),i=r.match(/([A-Z][a-zA-Z]*(?:\s+[A-Z][a-zA-Z]*)*)\s+v\.?\s+[A-Z][a-zA-Z]*(?:\s+[A-Z][a-zA-Z]*)*,\s*$/);return i?i[1].trim():r.match(/([A-Z][a-zA-Z]*(?:\s+[A-Z][a-zA-Z]*)*),\s*$/)?.[1].trim()}normalizePartyName(e){return e.toLowerCase().replace(/\s+/g,` `).trim()}normalizeReporter(e){return e.toLowerCase().replace(/\s+/g,``).replace(/\./g,``)}isWithinScope(e,t){return S(e,t,this.context.paragraphMap,this.options.scopeStrategy)}createFailureResult(e){if(this.options.reportUnresolved)return{resolvedTo:void 0,failureReason:e,confidence:0}}};function E(e,t,n){return new T(e,t,n).resolve()}function D(e,t){let n=performance.now(),{cleaned:r,transformationMap:a,warnings:x}=i(e,t?.cleaners),S=d(r,t?.patterns||[...l,...u,...o,...s,...c]),C=[],w=new Set;for(let e of S){let t=`${e.span.cleanStart}-${e.span.cleanEnd}`;w.has(t)||(w.add(t),C.push(e))}let T=[];for(let e of C){let t;switch(e.type){case`case`:t=e.patternId===`id`||e.patternId===`ibid`?v(e,a):e.patternId===`supra`?y(e,a):e.patternId===`shortFormCase`?b(e,a):f(e,a);break;case`statute`:t=p(e,a);break;case`journal`:t=m(e,a);break;case`neutral`:t=h(e,a);break;case`publicLaw`:t=g(e,a);break;case`federalRegister`:t=_(e,a);break;default:continue}x.length>0&&(t.warnings=[...t.warnings||[],...x]),t.processTimeMs=performance.now()-n,T.push(t)}return t?.resolve?E(T,e,t.resolutionOptions):T}async function O(e,t){return D(e,t)}export{T as DocumentResolver,i as cleanText,f as extractCase,D as extractCitations,O as extractCitationsAsync,_ as extractFederalRegister,m as extractJournal,h as extractNeutral,g as extractPublicLaw,p as extractStatute,E as resolveCitations,d as tokenize};
|
|
1
|
+
function e(e){return e.type===`case`||e.type===`statute`||e.type===`journal`||e.type===`neutral`||e.type===`publicLaw`||e.type===`federalRegister`||e.type===`statutesAtLarge`}function t(e){return e.type===`id`||e.type===`supra`||e.type===`shortFormCase`}function n(e){return e.type===`case`}function r(e,t){return e.type===t}function i(e){throw Error(`Unexpected value: ${e}`)}function a(e){return e.replace(/<[^>]+>/g,``)}function o(e){return e.replace(/[\t\n\r]+/g,` `).replace(/ {2,}/g,` `)}function s(e){return e.normalize(`NFKC`)}function c(e){return e.replace(/[\u201C\u201D]/g,`"`).replace(/[\u2018\u2019]/g,`'`)}function l(e,t=[a,o,s,c]){let n=e,r=new Map,i=new Map;for(let t=0;t<=e.length;t++)r.set(t,t),i.set(t,t);for(let e of t){let t=n,a=e(n);if(t!==a){let{newCleanToOriginal:e,newOriginalToClean:o}=u(t,a,r,i);r=e,i=o,n=a}}return{cleaned:n,transformationMap:{cleanToOriginal:r,originalToClean:i},warnings:[]}}function u(e,t,n,r){let i=new Map,a=new Map,o=0,s=0;for(;o<=e.length||s<=t.length;){if(o>=e.length&&s>=t.length){let e=n.get(o)??o;i.set(s,e),a.set(e,s);break}if(o>=e.length){let e=n.get(o)??o;i.set(s,e),s++;continue}if(s>=t.length){let e=n.get(o)??o;a.set(e,s),o++;continue}if(e[o]===t[s]){let e=n.get(o)??o;i.set(s,e),a.set(e,s),o++,s++}else{let r=!1;for(let i=1;i<=20&&!(o+i>=e.length);i++)if(e[o+i]===t[s]){for(let e=0;e<i;e++){let t=n.get(o+e)??o+e;a.set(t,s)}o+=i,r=!0;break}if(r)continue;for(let a=1;a<=20&&!(s+a>=t.length);a++)if(e[o]===t[s+a]){let e=n.get(o)??o;for(let t=0;t<a;t++)i.set(s+t,e);s+=a,r=!0;break}if(r)continue;let c=n.get(o)??o;i.set(s,c),a.set(c,s),o++,s++}}return{newCleanToOriginal:i,newOriginalToClean:a}}const d=[{id:`federal-reporter`,regex:/\b(\d+(?:-\d+)?)\s+(F\.|F\.2d|F\.3d|F\.4th|F\.\s?Supp\.|F\.\s?Supp\.\s?2d|F\.\s?Supp\.\s?3d|F\.\s?Supp\.\s?4th)\s+(\d+)\b/g,description:`Federal Reporter (F., F.2d, F.3d, F.4th, F.Supp., etc.)`,type:`case`},{id:`supreme-court`,regex:/\b(\d+(?:-\d+)?)\s+(U\.\s?S\.|S\.\s?Ct\.|L\.\s?Ed\.(?:\s?2d)?)\s+(\d+)\b/g,description:`U.S. Supreme Court reporters`,type:`case`},{id:`state-reporter`,regex:/\b(\d+(?:-\d+)?)\s+([A-Z][A-Za-z.]+(?:\s?2d|\s?3d|\s?4th|\s?5th)?)\s+(\d+)\b/g,description:`State reporters (broad pattern, validated against reporters-db in Phase 3)`,type:`case`}],f=[{id:`usc`,regex:/\b(\d+)\s+U\.S\.C\.?\s+§+\s*(\d+[A-Za-z]*)\b/g,description:`U.S. Code citations (e.g., "42 U.S.C. § 1983")`,type:`statute`},{id:`state-code`,regex:/\b([A-Z][a-z]+\.?\s+[A-Za-z.]+\s+Code)\s+§\s*(\d+[A-Za-z]*)\b/g,description:`State code citations (broad pattern, e.g., "Cal. Penal Code § 187")`,type:`statute`}],p=[{id:`law-review`,regex:/\b(\d+(?:-\d+)?)\s+([A-Z][A-Za-z.\s]+)\s+(\d+)\b/g,description:`Law review citations (e.g., "120 Harv. L. Rev. 500"), validated against journals-db in Phase 3`,type:`journal`}],m=[{id:`westlaw`,regex:/\b(\d{4})\s+WL\s+(\d+)\b/g,description:`WestLaw citations (e.g., "2021 WL 123456")`,type:`neutral`},{id:`lexis`,regex:/\b(\d{4})\s+U\.S\.\s+LEXIS\s+(\d+)\b/g,description:`LexisNexis citations (e.g., "2021 U.S. LEXIS 5000")`,type:`neutral`},{id:`public-law`,regex:/\bPub\.\s?L\.\s?No\.\s?(\d+-\d+)\b/g,description:`Public Law citations (e.g., "Pub. L. No. 117-58")`,type:`publicLaw`},{id:`federal-register`,regex:/\b(\d+(?:-\d+)?)\s+Fed\.\s?Reg\.\s+(\d+)\b/g,description:`Federal Register citations (e.g., "86 Fed. Reg. 12345")`,type:`federalRegister`},{id:`statutes-at-large`,regex:/\b(\d+(?:-\d+)?)\s+Stat\.\s+(\d+)\b/g,description:`Statutes at Large citations (e.g., "124 Stat. 119")`,type:`statutesAtLarge`},{id:`compact-law-review`,regex:/\b(\d+(?:-\d+)?)\s+([A-Z][A-Za-z.]+L\.(?:Rev|J|Q)\.)\s+(\d+)\b/g,description:`Compact law review citations without spaces (e.g., "93 Harv.L.Rev. 752")`,type:`journal`}],h=[{id:`id`,regex:/\b[Ii]d\.(?:\s+at\s+(\d+))?/g,description:`Id. citations (e.g., "Id." or "Id. at 253")`,type:`case`},{id:`ibid`,regex:/\b[Ii]bid\.(?:\s+at\s+(\d+))?/g,description:`Ibid. citations (e.g., "Ibid." or "Ibid. at 125")`,type:`case`},{id:`supra`,regex:/\b([A-Z][a-zA-Z]+(?:(?:\s+v\.?\s+|\s+)[A-Z][a-zA-Z]+)*)\s*,?\s+supra(?:,?\s+at\s+(\d+))?/g,description:`Supra citations (e.g., "Smith, supra" or "Smith, supra, at 460")`,type:`case`},{id:`shortFormCase`,regex:/\b(\d+(?:-\d+)?)\s+([A-Z][A-Za-z.\s]+?(?:\d[a-z])?)\s+at\s+(\d+)\b/g,description:`Short-form case citations (e.g., "500 F.2d at 125")`,type:`case`}];function g(e,t=[...d,...f,...p,...m,...h]){let n=[];for(let r of t)try{let t=e.matchAll(r.regex);for(let e of t)n.push({text:e[0],span:{cleanStart:e.index,cleanEnd:e.index+e[0].length},type:r.type,patternId:r.id})}catch(e){console.warn(`Pattern ${r.id} threw error, skipping:`,e instanceof Error?e.message:String(e))}return n.sort((e,t)=>e.span.cleanStart-t.span.cleanStart),n}function _(e){let t=Number.parseInt(e,10);return String(t)===e?t:e}const v=/(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Sept|Oct|Nov|Dec)\.?/;function y(e){let t=e.replace(/\s*\d{4}\s*$/,``).trim();return t=t.replace(RegExp(`\\s*,?\\s*\\d{1,2}\\s*,?\\s*$`),``).trim(),t=t.replace(RegExp(`\\s*${v.source}\\s*$`),``).trim(),t=t.replace(/,\s*$/,``).trim(),t&&/[A-Za-z]/.test(t)?t:void 0}function b(e,t,n){let{text:r,span:i}=e,a=/^(\d+(?:-\d+)?)\s+([A-Za-z0-9.\s]+)\s+(\d+)/.exec(r);if(!a)throw Error(`Failed to parse case citation: ${r}`);let o=_(a[1]),s=a[2].trim(),c=Number.parseInt(a[3],10),l=/,\s*(\d+)/.exec(r),u=l?Number.parseInt(l[1],10):void 0,d=/\((?:[^)]*\s)?(\d{4})\)/.exec(r),f=d?Number.parseInt(d[1],10):void 0,p=/\(([^)]*[A-Za-z][^)]*)\)/.exec(r),m=p?y(p[1]):void 0;if(n&&f===void 0){let e=n.substring(i.cleanEnd),t=/^(?:,\s*\d+)*\s*\(([^)]+)\)/.exec(e);if(t){let n=t[1],r=/(\d{4})/.exec(n);r&&(f=Number.parseInt(r[1],10));let i=y(n);if(i&&(m=i),u===void 0){let t=/^,\s*(\d+)/.exec(e);t&&(u=Number.parseInt(t[1],10))}}}!m&&/^(?:U\.?\s?S\.|S\.?\s?Ct\.|L\.?\s?Ed\.)/.test(s)&&(m=`scotus`);let h=t.cleanToOriginal.get(i.cleanStart)??i.cleanStart,g=t.cleanToOriginal.get(i.cleanEnd)??i.cleanEnd,v=.5;if(`F.,F.2d,F.3d,F.4th,U.S.,S. Ct.,L. Ed.,P.,P.2d,P.3d,A.,A.2d,A.3d,N.E.,N.E.2d,N.E.3d,N.W.,N.W.2d,S.E.,S.E.2d,S.W.,S.W.2d,S.W.3d,So.,So. 2d,So. 3d`.split(`,`).some(e=>s.includes(e))&&(v+=.3),f!==void 0){let e=new Date().getFullYear();f<=e&&(v+=.2)}return v=Math.min(v,1),{type:`case`,text:r,span:{cleanStart:i.cleanStart,cleanEnd:i.cleanEnd,originalStart:h,originalEnd:g},confidence:v,matchedText:r,processTimeMs:0,patternsChecked:1,volume:o,reporter:s,page:c,pincite:u,court:m,year:f}}function x(e,t){let{text:n,span:r}=e,i=/^(?:(\d+)\s+)?([A-Za-z.\s]+?)\s*§\s*(\d+[A-Za-z0-9-]*)/.exec(n);if(!i)throw Error(`Failed to parse statute citation: ${n}`);let a=i[1]?Number.parseInt(i[1],10):void 0,o=i[2].trim(),s=i[3],c=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,l=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd,u=.5;return[`U.S.C.`,`C.F.R.`,`Cal. Civ. Code`,`Cal. Penal Code`,`N.Y. Civ. Prac. L. & R.`,`Tex. Civ. Prac. & Rem. Code`].some(e=>o.includes(e))&&(u+=.3),u=Math.min(u,1),{type:`statute`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:c,originalEnd:l},confidence:u,matchedText:n,processTimeMs:0,patternsChecked:1,title:a,code:o,section:s}}function S(e,t){let{text:n,span:r}=e,i=/^(\d+(?:-\d+)?)\s+([A-Za-z.\s]+?)\s+(\d+)/.exec(n);if(!i)throw Error(`Failed to parse journal citation: ${n}`);let a=i[1],o=/^\d+$/.test(a)?Number.parseInt(a,10):a,s=i[2].trim(),c=Number.parseInt(i[3],10),l=/,\s*(\d+)/.exec(n),u=l?Number.parseInt(l[1],10):void 0,d=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,f=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd;return{type:`journal`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:d,originalEnd:f},confidence:.6,matchedText:n,processTimeMs:0,patternsChecked:1,volume:o,journal:s,abbreviation:s,page:c,pincite:u}}function C(e,t){let{text:n,span:r}=e,i=/^(\d{4})\s+(WL|LEXIS|U\.S\.\s+LEXIS)\s+(\d+)/.exec(n);if(!i)throw Error(`Failed to parse neutral citation: ${n}`);let a=Number.parseInt(i[1],10),o=i[2],s=i[3],c=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,l=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd;return{type:`neutral`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:c,originalEnd:l},confidence:1,matchedText:n,processTimeMs:0,patternsChecked:1,year:a,court:o,documentNumber:s}}function w(e,t){let{text:n,span:r}=e,i=/Pub\.\s?L\.(?:\s?No\.)?\s?(\d+)-(\d+)/.exec(n);if(!i)throw Error(`Failed to parse public law citation: ${n}`);let a=Number.parseInt(i[1],10),o=Number.parseInt(i[2],10),s=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,c=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd;return{type:`publicLaw`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:s,originalEnd:c},confidence:.9,matchedText:n,processTimeMs:0,patternsChecked:1,congress:a,lawNumber:o}}function T(e,t){let{text:n,span:r}=e,i=/^(\d+(?:-\d+)?)\s+Fed\.\s?Reg\.\s+(\d+)/.exec(n);if(!i)throw Error(`Failed to parse Federal Register citation: ${n}`);let a=i[1],o=/^\d+$/.test(a)?Number.parseInt(a,10):a,s=Number.parseInt(i[2],10),c=/\((?:.*?\s)?(\d{4})\)/.exec(n),l=c?Number.parseInt(c[1],10):void 0,u=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,d=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd;return{type:`federalRegister`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:u,originalEnd:d},confidence:.9,matchedText:n,processTimeMs:0,patternsChecked:1,volume:o,page:s,year:l}}function E(e,t){let{text:n,span:r}=e,i=/^(\d+(?:-\d+)?)\s+Stat\.\s+(\d+)/.exec(n);if(!i)throw Error(`Failed to parse Statutes at Large citation: ${n}`);let a=i[1],o=/^\d+$/.test(a)?Number.parseInt(a,10):a,s=Number.parseInt(i[2],10),c=/\((?:.*?\s)?(\d{4})\)/.exec(n),l=c?Number.parseInt(c[1],10):void 0,u=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,d=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd;return{type:`statutesAtLarge`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:u,originalEnd:d},confidence:.9,matchedText:n,processTimeMs:0,patternsChecked:1,volume:o,page:s,year:l}}function D(e,t){let{text:n,span:r}=e,i=/[Ii](?:d|bid)\.(?:\s+at\s+(\d+))?/.exec(n);if(!i)throw Error(`Failed to parse Id. citation: ${n}`);let a=i[1]?Number.parseInt(i[1],10):void 0,o=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,s=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd;return{type:`id`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:o,originalEnd:s},confidence:1,matchedText:n,processTimeMs:0,patternsChecked:1,pincite:a}}function O(e,t){let{text:n,span:r}=e,i=/\b([A-Z][a-zA-Z]+(?:(?:\s+v\.?\s+|\s+)[A-Z][a-zA-Z]+)*)\s*,?\s+supra(?:,?\s+at\s+(\d+))?/.exec(n);if(!i)throw Error(`Failed to parse supra citation: ${n}`);let a=i[1],o=i[2]?Number.parseInt(i[2],10):void 0,s=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,c=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd;return{type:`supra`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:s,originalEnd:c},confidence:.9,matchedText:n,processTimeMs:0,patternsChecked:1,partyName:a,pincite:o}}function k(e,t){let{text:n,span:r}=e,i=/(\d+(?:-\d+)?)\s+([A-Z][A-Za-z.\s]+?(?:\d[a-z])?)\s+at\s+(\d+)/.exec(n);if(!i)throw Error(`Failed to parse short-form case citation: ${n}`);let a=i[1],o=/^\d+$/.test(a)?Number.parseInt(a,10):a,s=i[2].trim(),c=Number.parseInt(i[3],10),l=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,u=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd;return{type:`shortFormCase`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:l,originalEnd:u},confidence:.7,matchedText:n,processTimeMs:0,patternsChecked:1,volume:o,reporter:s,pincite:c}}function A(e,t,n=/\n\n+/g){let r=new Map,i=[0],a;for(;(a=n.exec(e))!==null;)i.push(a.index+a[0].length);i.push(e.length);for(let e=0;e<t.length;e++){let n=t[e].span.originalStart,a=0;for(let e=0;e<i.length-1;e++)if(n>=i[e]&&n<i[e+1]){a=e;break}r.set(e,a)}return r}function j(e,t,n,r){if(r===`none`)return!0;let i=n.get(e),a=n.get(t);return i===void 0||a===void 0?!0:i===a}function M(e,t){if(e.length===0)return t.length;if(t.length===0)return e.length;let n=Array.from({length:e.length+1},()=>Array(t.length+1).fill(0));for(let t=0;t<=e.length;t++)n[t][0]=t;for(let e=0;e<=t.length;e++)n[0][e]=e;for(let r=1;r<=e.length;r++)for(let i=1;i<=t.length;i++)e[r-1]===t[i-1]?n[r][i]=n[r-1][i-1]:n[r][i]=1+Math.min(n[r-1][i],n[r][i-1],n[r-1][i-1]);return n[e.length][t.length]}function N(e,t){let n=e.toLowerCase(),r=t.toLowerCase(),i=M(n,r),a=Math.max(n.length,r.length);return a===0?1:1-i/a}var P=class{constructor(e,t,n={}){this.citations=e,this.text=t,this.options={scopeStrategy:n.scopeStrategy??`paragraph`,autoDetectParagraphs:n.autoDetectParagraphs??!0,paragraphBoundaryPattern:n.paragraphBoundaryPattern??/\n\n+/g,fuzzyPartyMatching:n.fuzzyPartyMatching??!0,partyMatchThreshold:n.partyMatchThreshold??.8,allowNestedResolution:n.allowNestedResolution??!1,reportUnresolved:n.reportUnresolved??!0},this.context={citationIndex:0,allCitations:e,lastFullCitation:void 0,fullCitationHistory:new Map,paragraphMap:new Map},this.options.autoDetectParagraphs&&(this.context.paragraphMap=A(t,e,this.options.paragraphBoundaryPattern))}resolve(){let t=[];for(let n=0;n<this.citations.length;n++){this.context.citationIndex=n;let r=this.citations[n],i;switch(r.type){case`id`:i=this.resolveId(r);break;case`supra`:i=this.resolveSupra(r);break;case`shortFormCase`:i=this.resolveShortFormCase(r);break;default:e(r)&&(this.context.lastFullCitation=n,this.trackFullCitation(r,n));break}t.push({...r,resolution:i})}return t}resolveId(e){let t=this.context.citationIndex,n;for(let e=t-1;e>=0;e--)if(this.citations[e].type===`case`){n=e;break}return n===void 0?this.createFailureResult(`No preceding full case citation found`):this.isWithinScope(n,t)?{resolvedTo:n,confidence:1}:this.createFailureResult(`Antecedent citation outside scope boundary`)}resolveSupra(e){let t=this.context.citationIndex,n=this.normalizePartyName(e.partyName),r;for(let[e,i]of this.context.fullCitationHistory){if(!this.isWithinScope(i,t))continue;let a=N(n,e);(!r||a>r.similarity)&&(r={index:i,similarity:a})}if(!r)return this.createFailureResult(`No full citation found in scope`);if(r.similarity<this.options.partyMatchThreshold)return this.createFailureResult(`Party name similarity ${r.similarity.toFixed(2)} below threshold ${this.options.partyMatchThreshold}`);let i=[];return r.similarity<1&&i.push(`Fuzzy match: similarity ${r.similarity.toFixed(2)}`),{resolvedTo:r.index,confidence:r.similarity,warnings:i.length>0?i:void 0}}resolveShortFormCase(e){let t=this.context.citationIndex;for(let n=t-1;n>=0;n--){let r=this.citations[n];if(r.type===`case`&&r.volume===e.volume&&this.normalizeReporter(r.reporter)===this.normalizeReporter(e.reporter))return this.isWithinScope(n,t)?{resolvedTo:n,confidence:.95}:this.createFailureResult(`Matching citation outside scope boundary`)}return this.createFailureResult(`No matching full case citation found`)}trackFullCitation(e,t){if(e.type===`case`){let n=this.extractPartyName(e);if(n){let e=this.normalizePartyName(n);this.context.fullCitationHistory.set(e,t)}}}extractPartyName(e){let t=e.span.originalStart,n=Math.max(0,t-100),r=this.text.substring(n,t),i=r.match(/([A-Z][a-zA-Z]*(?:\s+[A-Z][a-zA-Z]*)*)\s+v\.?\s+[A-Z][a-zA-Z]*(?:\s+[A-Z][a-zA-Z]*)*,\s*$/);if(i)return this.stripSignalWords(i[1].trim());let a=r.match(/([A-Z][a-zA-Z]*(?:\s+[A-Z][a-zA-Z]*)*),\s*$/);if(a)return this.stripSignalWords(a[1].trim())}stripSignalWords(e){let t=e.replace(/^(?:In(?!\s+re\b)|See(?:\s+[Aa]lso)?|Compare|But(?:\s+[Ss]ee)?|Cf\.?|Also)\s+/i,``).trim();return t.length>0?t:e}normalizePartyName(e){return e.toLowerCase().replace(/\s+/g,` `).trim()}normalizeReporter(e){return e.toLowerCase().replace(/\s+/g,``).replace(/\./g,``)}isWithinScope(e,t){return j(e,t,this.context.paragraphMap,this.options.scopeStrategy)}createFailureResult(e){if(this.options.reportUnresolved)return{resolvedTo:void 0,failureReason:e,confidence:0}}};function F(e,t,n){return new P(e,t,n).resolve()}function I(e,t){let n=performance.now(),{cleaned:r,transformationMap:i,warnings:a}=l(e,t?.cleaners),o=g(r,t?.patterns||[...m,...h,...d,...f,...p]),s=[],c=new Set;for(let e of o){let t=`${e.span.cleanStart}-${e.span.cleanEnd}`;c.has(t)||(c.add(t),s.push(e))}let u=[];for(let e of s){let t;switch(e.type){case`case`:t=e.patternId===`id`||e.patternId===`ibid`?D(e,i):e.patternId===`supra`?O(e,i):e.patternId===`shortFormCase`?k(e,i):b(e,i,r);break;case`statute`:t=x(e,i);break;case`journal`:t=S(e,i);break;case`neutral`:t=C(e,i);break;case`publicLaw`:t=w(e,i);break;case`federalRegister`:t=T(e,i);break;case`statutesAtLarge`:t=E(e,i);break;default:continue}a.length>0&&(t.warnings=[...t.warnings||[],...a]),t.processTimeMs=performance.now()-n,u.push(t)}return t?.resolve?F(u,e,t.resolutionOptions):u}async function L(e,t){return I(e,t)}export{P as DocumentResolver,i as assertUnreachable,l as cleanText,b as extractCase,I as extractCitations,L as extractCitationsAsync,T as extractFederalRegister,S as extractJournal,C as extractNeutral,w as extractPublicLaw,x as extractStatute,E as extractStatutesAtLarge,n as isCaseCitation,r as isCitationType,e as isFullCitation,t as isShortFormCitation,F as resolveCitations,g as tokenize};
|
|
2
2
|
//# sourceMappingURL=index.mjs.map
|