eyecite-ts 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -1,24 +1,47 @@
1
- import { a as FullCaseCitation, c as NeutralCitation, d as StatuteCitation, f as SupraCitation, i as FederalRegisterCitation, l as PublicLawCitation, m as TransformationMap, n as CitationBase, o as IdCitation, p as Span, r as CitationType, s as JournalCitation, t as Citation, u as ShortFormCaseCitation } from "./citation-BcY5zzWb.cjs";
1
+ import { _ as StatuteCitation, a as ExtractorMap, b as Span, c as FullCitation, d as JournalCitation, f as NeutralCitation, g as ShortFormCitationType, h as ShortFormCitation, i as CitationType, l as FullCitationType, m as ShortFormCaseCitation, n as CitationBase, o as FederalRegisterCitation, p as PublicLawCitation, r as CitationOfType, s as FullCaseCitation, t as Citation, u as IdCitation, v as SupraCitation, x as TransformationMap, y as Warning } from "./citation-BhJJj_AZ.cjs";
2
2
 
3
- //#region src/patterns/casePatterns.d.ts
3
+ //#region src/types/guards.d.ts
4
+ /**
5
+ * Type guard: narrows Citation to a full citation (case, statute, journal, neutral, publicLaw, federalRegister).
6
+ */
7
+ declare function isFullCitation(citation: Citation): citation is FullCitation;
8
+ /**
9
+ * Type guard: narrows Citation to a short-form citation (id, supra, shortFormCase).
10
+ */
11
+ declare function isShortFormCitation(citation: Citation): citation is ShortFormCitation;
12
+ /**
13
+ * Type guard: narrows Citation to a full case citation.
14
+ */
15
+ declare function isCaseCitation(citation: Citation): citation is FullCaseCitation;
16
+ /**
17
+ * Generic type guard that narrows a Citation to a specific type.
18
+ * Useful when the target type is dynamic or generic.
19
+ */
20
+ declare function isCitationType<T extends CitationType>(citation: Citation, type: T): citation is CitationOfType<T>;
4
21
  /**
5
- * Case Citation Regex Patterns
22
+ * Exhaustiveness helper for switch statements on discriminated unions.
6
23
  *
7
- * These patterns are designed for tokenization (broad matching) not extraction.
8
- * They identify potential case citations in text for the tokenizer (Plan 3).
9
- * Metadata parsing and validation against reporters-db happens in Phase 2 Plan 5 (extraction layer).
24
+ * Place in the `default` branch to get a compile-time error if a new
25
+ * variant is added but not handled.
10
26
  *
11
- * Pattern Design Principles (from RESEARCH.md):
12
- * - Use \b word boundaries to avoid matching "F." in "F.B.I."
13
- * - Avoid nested quantifiers: (a+)+ causes ReDoS
14
- * - Keep patterns simple: tokenization only needs to find candidates
15
- * - Use global flag /g for matchAll()
27
+ * @example
28
+ * ```typescript
29
+ * switch (citation.type) {
30
+ * case 'case': ...
31
+ * case 'statute': ...
32
+ * // If you forget a variant, TypeScript errors here:
33
+ * default: assertUnreachable(citation.type)
34
+ * }
35
+ * ```
16
36
  */
37
+ declare function assertUnreachable(x: never): never;
38
+ //#endregion
39
+ //#region src/patterns/casePatterns.d.ts
17
40
  interface Pattern {
18
41
  id: string;
19
42
  regex: RegExp;
20
43
  description: string;
21
- type: "case" | "statute" | "journal" | "neutral" | "publicLaw" | "federalRegister";
44
+ type: FullCitationType;
22
45
  }
23
46
  //#endregion
24
47
  //#region src/resolve/types.d.ts
@@ -96,15 +119,16 @@ interface ResolutionResult {
96
119
  confidence: number;
97
120
  }
98
121
  /**
99
- * Citation with optional resolution metadata.
100
- * Uses intersection type to add resolution field to any Citation type.
122
+ * Citation with resolution metadata.
123
+ *
124
+ * Uses a distributive conditional type so that `resolution` is only
125
+ * meaningfully present on short-form citations (Id., supra, short-form case).
126
+ * On full citations, `resolution` is typed as `undefined`.
101
127
  */
102
- type ResolvedCitation = Citation & {
103
- /**
104
- * Resolution result for short-form citations.
105
- * Only present for Id/supra/shortFormCase types
106
- */
107
- resolution?: ResolutionResult;
128
+ type ResolvedCitation<C extends Citation = Citation> = C extends ShortFormCitation ? C & {
129
+ resolution: ResolutionResult | undefined;
130
+ } : C & {
131
+ resolution?: undefined;
108
132
  };
109
133
  //#endregion
110
134
  //#region src/extract/extractCitations.d.ts
@@ -230,7 +254,10 @@ interface ExtractOptions {
230
254
  * // citations[2].type === "journal"
231
255
  * ```
232
256
  */
233
- declare function extractCitations(text: string, options?: ExtractOptions): Citation[] | ResolvedCitation[];
257
+ declare function extractCitations(text: string, options: ExtractOptions & {
258
+ resolve: true;
259
+ }): ResolvedCitation[];
260
+ declare function extractCitations(text: string, options?: ExtractOptions): Citation[];
234
261
  /**
235
262
  * Asynchronous version of extractCitations().
236
263
  *
@@ -253,7 +280,10 @@ declare function extractCitations(text: string, options?: ExtractOptions): Citat
253
280
  * // Returns ResolvedCitation[] with resolution metadata
254
281
  * ```
255
282
  */
256
- declare function extractCitationsAsync(text: string, options?: ExtractOptions): Promise<Citation[] | ResolvedCitation[]>;
283
+ declare function extractCitationsAsync(text: string, options: ExtractOptions & {
284
+ resolve: true;
285
+ }): Promise<ResolvedCitation[]>;
286
+ declare function extractCitationsAsync(text: string, options?: ExtractOptions): Promise<Citation[]>;
257
287
  //#endregion
258
288
  //#region src/clean/cleanText.d.ts
259
289
  /**
@@ -268,17 +298,6 @@ interface CleanTextResult {
268
298
  warnings: Warning[];
269
299
  }
270
300
  /**
271
- * Warning generated during text cleaning.
272
- */
273
- interface Warning {
274
- level: "error" | "warning" | "info";
275
- message: string;
276
- position: {
277
- start: number;
278
- end: number;
279
- };
280
- }
281
- /**
282
301
  * Clean text using a pipeline of transformation functions.
283
302
  *
284
303
  * Applies cleaners sequentially while maintaining accurate position mappings
@@ -634,10 +653,6 @@ declare class DocumentResolver {
634
653
  */
635
654
  private resolveShortFormCase;
636
655
  /**
637
- * Checks if a citation is a full citation (not short-form).
638
- */
639
- private isFullCitation;
640
- /**
641
656
  * Tracks a full citation in the resolution history.
642
657
  * Extracts party name for supra resolution.
643
658
  */
@@ -678,5 +693,5 @@ declare class DocumentResolver {
678
693
  */
679
694
  declare function resolveCitations(citations: Citation[], text: string, options?: ResolutionOptions): ResolvedCitation[];
680
695
  //#endregion
681
- export { type Citation, type CitationBase, type CitationType, type CleanTextResult, DocumentResolver, type ExtractOptions, type FederalRegisterCitation, type FullCaseCitation, type IdCitation, type JournalCitation, type NeutralCitation, type PublicLawCitation, type ResolutionOptions, type ResolutionResult, type ResolvedCitation, type ScopeStrategy, type ShortFormCaseCitation, type Span, type StatuteCitation, type SupraCitation, type Token, type TransformationMap, type Warning, cleanText, extractCase, extractCitations, extractCitationsAsync, extractFederalRegister, extractJournal, extractNeutral, extractPublicLaw, extractStatute, resolveCitations, tokenize };
696
+ export { type Citation, type CitationBase, type CitationOfType, type CitationType, type CleanTextResult, DocumentResolver, type ExtractOptions, type ExtractorMap, type FederalRegisterCitation, type FullCaseCitation, type FullCitation, type FullCitationType, type IdCitation, type JournalCitation, type NeutralCitation, type PublicLawCitation, type ResolutionOptions, type ResolutionResult, type ResolvedCitation, type ScopeStrategy, type ShortFormCaseCitation, type ShortFormCitation, type ShortFormCitationType, type Span, type StatuteCitation, type SupraCitation, type Token, type TransformationMap, type Warning, assertUnreachable, cleanText, extractCase, extractCitations, extractCitationsAsync, extractFederalRegister, extractJournal, extractNeutral, extractPublicLaw, extractStatute, isCaseCitation, isCitationType, isFullCitation, isShortFormCitation, resolveCitations, tokenize };
682
697
  //# sourceMappingURL=index.d.cts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.cts","names":[],"sources":["../src/patterns/casePatterns.ts","../src/resolve/types.ts","../src/extract/extractCitations.ts","../src/clean/cleanText.ts","../src/tokenize/tokenizer.ts","../src/extract/extractCase.ts","../src/extract/extractStatute.ts","../src/extract/extractJournal.ts","../src/extract/extractNeutral.ts","../src/extract/extractPublicLaw.ts","../src/extract/extractFederalRegister.ts","../src/resolve/DocumentResolver.ts","../src/resolve/index.ts"],"mappings":";;;;;;AAcA;;;;;;;;;;UAAiB,OAAA;EACf,EAAA;EACA,KAAA,EAAO,MAAA;EACP,WAAA;EACA,IAAA;AAAA;;;;;;;KCLU,aAAA;;;;UAKK,iBAAA;;AALjB;;;;;AAKA;EAQE,aAAA,GAAgB,aAAA;;;;;EAMhB,oBAAA;;;;;EAMA,wBAAA,GAA2B,MAAA;;;;AAgC7B;EA1BE,kBAAA;;;;;;EAOA,mBAAA;;;AA+CF;;;EAxCE,qBAAA;;;;;EAMA,gBAAA;AAAA;;;ACzBF;UD+BiB,gBAAA;;;;;EAKf,UAAA;EC0BmB;;;EDrBnB,aAAA;;;;EAKA,QAAA;;;;AC6EF;EDvEE,UAAA;AAAA;;;;;KAOU,gBAAA,GAAmB,QAAA;;;;;EAK7B,UAAA,GAAa,gBAAA;AAAA;;;ADzFf;;;AAAA,UEyBiB,cAAA;;;;;;;;;;;AD1BjB;;;;ECyCC,QAAA,GAAW,KAAA,EAAO,IAAA;EDpCnB;;;;;;;;;;;;;;ECoDC,QAAA,GAAW,OAAA;EDAZ;;;;;;;;;;AA4BA;;;ECbC,OAAA;;;;;;;;;AA9CD;;;;;;EA8DC,iBAAA,GAAoB,iBAAA;AAAA;;;;;;;;;;;AA6DrB;;;;;;;;;;;;;;;AAoHA;;;;;;;;;;;;;;;;;;;;AC3QA;;;;;;;;;;;AAcA;;iBDyIgB,gBAAA,CACf,IAAA,UACA,OAAA,GAAU,cAAA,GACR,QAAA,KAAa,gBAAA;;;;;;;;;ACtHhB;;;;;;;;;;;;;;iBDuOsB,qBAAA,CACrB,IAAA,UACA,OAAA,GAAU,cAAA,GACR,OAAA,CAAQ,QAAA,KAAa,gBAAA;;;;;AF3QxB;UGHiB,eAAA;;EAEhB,OAAA;;EAGA,iBAAA,EAAmB,iBAAA;;EAGnB,QAAA,EAAU,OAAA;AAAA;;;;UAMM,OAAA;EAChB,KAAA;EACA,OAAA;EACA,QAAA;IAAY,KAAA;IAAe,GAAA;EAAA;AAAA;;;;;;;;;;;;;;;AF0C5B;;iBEvBgB,SAAA,CACf,QAAA,UACA,QAAA,GAAU,KAAA,EAAO,IAAA,uBAMf,eAAA;;;AF1CH;;;;;AAKA;;AALA,UGoBiB,KAAA;EHKY;EGH3B,IAAA;;EAGA,IAAA,EAAM,IAAA,CAAK,IAAA;;EAGX,IAAA,EAAM,OAAA;;EAGN,SAAA;AAAA;;;;AH0BF;;;;;;;;;;AA4BA;;;;;;;;;;;;AC3DA;;;;;;;;;;;iBE4CgB,QAAA,CACd,WAAA,UACA,QAAA,GAAU,OAAA,KAOT,KAAA;;;;;;;AH1EH;;;;;;;;;;;;;;;AAoDA;;;;;;;;;;AA4BA;;;;;;;;;;;;AC3DA;;;;;;;;;;iBGgCgB,WAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,gBAAA;;;;;;;;;AJ7DH;;;;;AAKA;;;;;;;;;;;;;;;AAoDA;;;;;;;;;;iBKrBgB,cAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,eAAA;;;;;;;;;ALvCH;;;;;AAKA;;;;;;;;;;;;;;;AAoDA;;;;;;;;;;AA4BA;;iBM/CgB,cAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,eAAA;;;;;;;;;ANzCH;;;;;AAKA;;;;;;;;;;;;;;;AAoDA;;;;;;;;iBOvBgB,cAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,eAAA;;;;;;;;;APrCH;;;;;AAKA;;;;;;;;;;;;;;;AAoDA;;;;;;;;;iBQtBgB,gBAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,iBAAA;;;;;;;;;ARtCH;;;;;AAKA;;;;;;;;;;;;;;;AAoDA;;;;;;;iBSxBgB,sBAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,uBAAA;;;;;;;cCjBU,gBAAA;EAAA,iBACM,SAAA;EAAA,iBACA,IAAA;EAAA,iBACA,OAAA;EAAA,iBACA,OAAA;EVlBnB;;;;;;;EU2BE,WAAA,CACE,SAAA,EAAW,QAAA,IACX,IAAA,UACA,OAAA,GAAS,iBAAA;;;;;;EAwCX,OAAA,CAAA,GAAW,gBAAA;EVxBX;AAMF;;EANE,QUkEQ,SAAA;EV5DO;;;EAAA,QU4FP,YAAA;;;;UAiDA,oBAAA;EVjHE;;;EAAA,QUuJF,cAAA;;;;;UAeA,iBAAA;;;ATjOV;;USiPU,gBAAA;;;;UAyBA,kBAAA;ET5MW;;;EAAA,QSsNX,iBAAA;;;;UAUA,aAAA;;;;UAYA,mBAAA;AAAA;;;;;AV/TV;;;;;;;;iBWegB,gBAAA,CACd,SAAA,EAAW,QAAA,IACX,IAAA,UACA,OAAA,GAAU,iBAAA,GACT,gBAAA"}
1
+ {"version":3,"file":"index.d.cts","names":[],"sources":["../src/types/guards.ts","../src/patterns/casePatterns.ts","../src/resolve/types.ts","../src/extract/extractCitations.ts","../src/clean/cleanText.ts","../src/tokenize/tokenizer.ts","../src/extract/extractCase.ts","../src/extract/extractStatute.ts","../src/extract/extractJournal.ts","../src/extract/extractNeutral.ts","../src/extract/extractPublicLaw.ts","../src/extract/extractFederalRegister.ts","../src/resolve/DocumentResolver.ts","../src/resolve/index.ts"],"mappings":";;;;;AAKA;iBAAgB,cAAA,CAAe,QAAA,EAAU,QAAA,GAAW,QAAA,IAAY,YAAA;;;;iBAYhD,mBAAA,CAAoB,QAAA,EAAU,QAAA,GAAW,QAAA,IAAY,iBAAA;;;;iBASrD,cAAA,CAAe,QAAA,EAAU,QAAA,GAAW,QAAA,IAAY,gBAAA;AAThE;;;;AAAA,iBAiBgB,cAAA,WAAyB,YAAA,CAAA,CACvC,QAAA,EAAU,QAAA,EACV,IAAA,EAAM,CAAA,GACL,QAAA,IAAY,cAAA,CAAe,CAAA;;;;;;AAX9B;;;;;;;;;;AAQA;iBAuBgB,iBAAA,CAAkB,CAAA;;;UCzCjB,OAAA;EACf,EAAA;EACA,KAAA,EAAO,MAAA;EACP,WAAA;EACA,IAAA,EAAM,gBAAA;AAAA;;;;;;;KCPI,aAAA;;AFIZ;;UECiB,iBAAA;EFDoD;;;;;;;EESnE,aAAA,GAAgB,aAAA;EFAF;;;;EEMd,oBAAA;;;;;EAMA,wBAAA,GAA2B,MAAA;EFJb;;;;EEUd,kBAAA;;;;;;EAOA,mBAAA;;;;;;EAOA,qBAAA;;;;AFDF;EEOE,gBAAA;AAAA;;;;UAMe,gBAAA;;ADtDjB;;;EC2DE,UAAA;;;;EAKA,aAAA;;;;EAKA,QAAA;;;;AAxEF;EA8EE,UAAA;AAAA;;;AAzEF;;;;;KAmFY,gBAAA,WAA2B,QAAA,GAAW,QAAA,IAChD,CAAA,SAAU,iBAAA,GACN,CAAA;EAAM,UAAA,EAAY,gBAAA;AAAA,IAClB,CAAA;EAAM,UAAA;AAAA;;;AFnGZ;;;AAAA,UGkCiB,cAAA;;;;;;;AHtBjB;;;;;;;;EGqCC,QAAA,GAAW,KAAA,EAAO,IAAA;EHrCkD;AASrE;;;;;;;;;;AAQA;;;EGoCC,QAAA,GAAW,OAAA;;;;;;;;;;;;;;EAeX,OAAA;;;;AH5BD;;;;;;;;ACzCA;;;EEqFC,iBAAA,GAAoB,iBAAA;AAAA;;;;;;;;;;;ADxFrB;;;;;AAKA;;;;;;;;;;;;;;;AAoDA;;;;;;;;;;AA+BA;;;;;;;;;;;;;;;;;;iBC6DgB,gBAAA,CAAiB,IAAA,UAAc,OAAA,EAAS,cAAA;EAAmB,OAAA;AAAA,IAAkB,gBAAA;AAAA,iBAC7E,gBAAA,CAAiB,IAAA,UAAc,OAAA,GAAU,cAAA,GAAiB,QAAA;;;;;;AA5H1E;;;;;;;;;;;;;;;;;iBAiPsB,qBAAA,CAAsB,IAAA,UAAc,OAAA,EAAS,cAAA;EAAmB,OAAA;AAAA,IAAkB,OAAA,CAAQ,gBAAA;AAAA,iBAC1F,qBAAA,CAAsB,IAAA,UAAc,OAAA,GAAU,cAAA,GAAiB,OAAA,CAAQ,QAAA;;;;AHpR7F;;UIOiB,eAAA;EJP+C;EIS/D,OAAA;;EAGA,iBAAA,EAAmB,iBAAA;;EAGnB,QAAA,EAAU,OAAA;AAAA;AJHX;;;;;;;;;;AASA;;;;;;AATA,iBIsBgB,SAAA,CACf,QAAA,UACA,QAAA,GAAU,KAAA,EAAO,IAAA,uBAMf,eAAA;;;;;;;;;AJrBH;UKOiB,KAAA;;EAEf,IAAA;;EAGA,IAAA,EAAM,IAAA,CAAK,IAAA;;EAGX,IAAA,EAAM,OAAA;;EAGN,SAAA;AAAA;;;;;;;;;;;;;;;;;;;;;ALaF;;;;;;;;ACzCA;;;;;;;;iBImEgB,QAAA,CACd,WAAA,UACA,QAAA,GAAU,OAAA,KAOT,KAAA;;;;;;;;ALlEH;;;;;;;;;;AAQA;;;;;;;;;;;;;;;;;;;;;AAuBA;;;;;;;;ACzCA;;;;;;;iBKuDgB,WAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,gBAAA;;;;;ANzDH;;;;;;;;;;AASA;;;;;;;;;;AAQA;;;;;;;;;;;;;;iBOegB,cAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,eAAA;;;;;APnCH;;;;;;;;;;AASA;;;;;;;;;;AAQA;;;;;;;;;;;;;;;;iBQiBgB,cAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,eAAA;;;;;ARrCH;;;;;;;;;;AASA;;;;;;;;;;AAQA;;;;;;;;;;;;iBSagB,cAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,eAAA;;;;;ATjCH;;;;;;;;;;AASA;;;;;;;;;;AAQA;;;;;;;;;;;;;iBUcgB,gBAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,iBAAA;;;;;AVlCH;;;;;;;;;;AASA;;;;;;;;;;AAQA;;;;;;;;;;;iBWYgB,sBAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,uBAAA;;;AXhCH;;;;AAAA,cYgBa,gBAAA;EAAA,iBACM,SAAA;EAAA,iBACA,IAAA;EAAA,iBACA,OAAA;EAAA,iBACA,OAAA;EZpBkD;AASrE;;;;;;EYoBE,WAAA,CACE,SAAA,EAAW,QAAA,IACX,IAAA,UACA,OAAA,GAAS,iBAAA;;;;AZfb;;EYuDE,OAAA,CAAA,GAAW,gBAAA;;;;UA2CH,SAAA;;;;UAgCA,YAAA;;;;UAiDA,oBAAA;;;;;UAqCA,iBAAA;EZrNoB;AAoB9B;;;EApB8B,QYoOpB,gBAAA;EZhNwB;;;EAAA,QYyOxB,kBAAA;EXlRV;;;EAAA,QW4RU,iBAAA;;;;UAUA,aAAA;;;;UAYA,mBAAA;AAAA;;;;;;AZxSV;;;;;;;iBaOgB,gBAAA,CACd,SAAA,EAAW,QAAA,IACX,IAAA,UACA,OAAA,GAAU,iBAAA,GACT,gBAAA"}
package/dist/index.d.mts CHANGED
@@ -1,24 +1,47 @@
1
- import { a as FullCaseCitation, c as NeutralCitation, d as StatuteCitation, f as SupraCitation, i as FederalRegisterCitation, l as PublicLawCitation, m as TransformationMap, n as CitationBase, o as IdCitation, p as Span, r as CitationType, s as JournalCitation, t as Citation, u as ShortFormCaseCitation } from "./citation-8_GvfEuj.mjs";
1
+ import { _ as StatuteCitation, a as ExtractorMap, b as Span, c as FullCitation, d as JournalCitation, f as NeutralCitation, g as ShortFormCitationType, h as ShortFormCitation, i as CitationType, l as FullCitationType, m as ShortFormCaseCitation, n as CitationBase, o as FederalRegisterCitation, p as PublicLawCitation, r as CitationOfType, s as FullCaseCitation, t as Citation, u as IdCitation, v as SupraCitation, x as TransformationMap, y as Warning } from "./citation-FJ10UFM7.mjs";
2
2
 
3
- //#region src/patterns/casePatterns.d.ts
3
+ //#region src/types/guards.d.ts
4
+ /**
5
+ * Type guard: narrows Citation to a full citation (case, statute, journal, neutral, publicLaw, federalRegister).
6
+ */
7
+ declare function isFullCitation(citation: Citation): citation is FullCitation;
8
+ /**
9
+ * Type guard: narrows Citation to a short-form citation (id, supra, shortFormCase).
10
+ */
11
+ declare function isShortFormCitation(citation: Citation): citation is ShortFormCitation;
12
+ /**
13
+ * Type guard: narrows Citation to a full case citation.
14
+ */
15
+ declare function isCaseCitation(citation: Citation): citation is FullCaseCitation;
16
+ /**
17
+ * Generic type guard that narrows a Citation to a specific type.
18
+ * Useful when the target type is dynamic or generic.
19
+ */
20
+ declare function isCitationType<T extends CitationType>(citation: Citation, type: T): citation is CitationOfType<T>;
4
21
  /**
5
- * Case Citation Regex Patterns
22
+ * Exhaustiveness helper for switch statements on discriminated unions.
6
23
  *
7
- * These patterns are designed for tokenization (broad matching) not extraction.
8
- * They identify potential case citations in text for the tokenizer (Plan 3).
9
- * Metadata parsing and validation against reporters-db happens in Phase 2 Plan 5 (extraction layer).
24
+ * Place in the `default` branch to get a compile-time error if a new
25
+ * variant is added but not handled.
10
26
  *
11
- * Pattern Design Principles (from RESEARCH.md):
12
- * - Use \b word boundaries to avoid matching "F." in "F.B.I."
13
- * - Avoid nested quantifiers: (a+)+ causes ReDoS
14
- * - Keep patterns simple: tokenization only needs to find candidates
15
- * - Use global flag /g for matchAll()
27
+ * @example
28
+ * ```typescript
29
+ * switch (citation.type) {
30
+ * case 'case': ...
31
+ * case 'statute': ...
32
+ * // If you forget a variant, TypeScript errors here:
33
+ * default: assertUnreachable(citation.type)
34
+ * }
35
+ * ```
16
36
  */
37
+ declare function assertUnreachable(x: never): never;
38
+ //#endregion
39
+ //#region src/patterns/casePatterns.d.ts
17
40
  interface Pattern {
18
41
  id: string;
19
42
  regex: RegExp;
20
43
  description: string;
21
- type: "case" | "statute" | "journal" | "neutral" | "publicLaw" | "federalRegister";
44
+ type: FullCitationType;
22
45
  }
23
46
  //#endregion
24
47
  //#region src/resolve/types.d.ts
@@ -96,15 +119,16 @@ interface ResolutionResult {
96
119
  confidence: number;
97
120
  }
98
121
  /**
99
- * Citation with optional resolution metadata.
100
- * Uses intersection type to add resolution field to any Citation type.
122
+ * Citation with resolution metadata.
123
+ *
124
+ * Uses a distributive conditional type so that `resolution` is only
125
+ * meaningfully present on short-form citations (Id., supra, short-form case).
126
+ * On full citations, `resolution` is typed as `undefined`.
101
127
  */
102
- type ResolvedCitation = Citation & {
103
- /**
104
- * Resolution result for short-form citations.
105
- * Only present for Id/supra/shortFormCase types
106
- */
107
- resolution?: ResolutionResult;
128
+ type ResolvedCitation<C extends Citation = Citation> = C extends ShortFormCitation ? C & {
129
+ resolution: ResolutionResult | undefined;
130
+ } : C & {
131
+ resolution?: undefined;
108
132
  };
109
133
  //#endregion
110
134
  //#region src/extract/extractCitations.d.ts
@@ -230,7 +254,10 @@ interface ExtractOptions {
230
254
  * // citations[2].type === "journal"
231
255
  * ```
232
256
  */
233
- declare function extractCitations(text: string, options?: ExtractOptions): Citation[] | ResolvedCitation[];
257
+ declare function extractCitations(text: string, options: ExtractOptions & {
258
+ resolve: true;
259
+ }): ResolvedCitation[];
260
+ declare function extractCitations(text: string, options?: ExtractOptions): Citation[];
234
261
  /**
235
262
  * Asynchronous version of extractCitations().
236
263
  *
@@ -253,7 +280,10 @@ declare function extractCitations(text: string, options?: ExtractOptions): Citat
253
280
  * // Returns ResolvedCitation[] with resolution metadata
254
281
  * ```
255
282
  */
256
- declare function extractCitationsAsync(text: string, options?: ExtractOptions): Promise<Citation[] | ResolvedCitation[]>;
283
+ declare function extractCitationsAsync(text: string, options: ExtractOptions & {
284
+ resolve: true;
285
+ }): Promise<ResolvedCitation[]>;
286
+ declare function extractCitationsAsync(text: string, options?: ExtractOptions): Promise<Citation[]>;
257
287
  //#endregion
258
288
  //#region src/clean/cleanText.d.ts
259
289
  /**
@@ -268,17 +298,6 @@ interface CleanTextResult {
268
298
  warnings: Warning[];
269
299
  }
270
300
  /**
271
- * Warning generated during text cleaning.
272
- */
273
- interface Warning {
274
- level: "error" | "warning" | "info";
275
- message: string;
276
- position: {
277
- start: number;
278
- end: number;
279
- };
280
- }
281
- /**
282
301
  * Clean text using a pipeline of transformation functions.
283
302
  *
284
303
  * Applies cleaners sequentially while maintaining accurate position mappings
@@ -634,10 +653,6 @@ declare class DocumentResolver {
634
653
  */
635
654
  private resolveShortFormCase;
636
655
  /**
637
- * Checks if a citation is a full citation (not short-form).
638
- */
639
- private isFullCitation;
640
- /**
641
656
  * Tracks a full citation in the resolution history.
642
657
  * Extracts party name for supra resolution.
643
658
  */
@@ -678,5 +693,5 @@ declare class DocumentResolver {
678
693
  */
679
694
  declare function resolveCitations(citations: Citation[], text: string, options?: ResolutionOptions): ResolvedCitation[];
680
695
  //#endregion
681
- export { type Citation, type CitationBase, type CitationType, type CleanTextResult, DocumentResolver, type ExtractOptions, type FederalRegisterCitation, type FullCaseCitation, type IdCitation, type JournalCitation, type NeutralCitation, type PublicLawCitation, type ResolutionOptions, type ResolutionResult, type ResolvedCitation, type ScopeStrategy, type ShortFormCaseCitation, type Span, type StatuteCitation, type SupraCitation, type Token, type TransformationMap, type Warning, cleanText, extractCase, extractCitations, extractCitationsAsync, extractFederalRegister, extractJournal, extractNeutral, extractPublicLaw, extractStatute, resolveCitations, tokenize };
696
+ export { type Citation, type CitationBase, type CitationOfType, type CitationType, type CleanTextResult, DocumentResolver, type ExtractOptions, type ExtractorMap, type FederalRegisterCitation, type FullCaseCitation, type FullCitation, type FullCitationType, type IdCitation, type JournalCitation, type NeutralCitation, type PublicLawCitation, type ResolutionOptions, type ResolutionResult, type ResolvedCitation, type ScopeStrategy, type ShortFormCaseCitation, type ShortFormCitation, type ShortFormCitationType, type Span, type StatuteCitation, type SupraCitation, type Token, type TransformationMap, type Warning, assertUnreachable, cleanText, extractCase, extractCitations, extractCitationsAsync, extractFederalRegister, extractJournal, extractNeutral, extractPublicLaw, extractStatute, isCaseCitation, isCitationType, isFullCitation, isShortFormCitation, resolveCitations, tokenize };
682
697
  //# sourceMappingURL=index.d.mts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.mts","names":[],"sources":["../src/patterns/casePatterns.ts","../src/resolve/types.ts","../src/extract/extractCitations.ts","../src/clean/cleanText.ts","../src/tokenize/tokenizer.ts","../src/extract/extractCase.ts","../src/extract/extractStatute.ts","../src/extract/extractJournal.ts","../src/extract/extractNeutral.ts","../src/extract/extractPublicLaw.ts","../src/extract/extractFederalRegister.ts","../src/resolve/DocumentResolver.ts","../src/resolve/index.ts"],"mappings":";;;;;;AAcA;;;;;;;;;;UAAiB,OAAA;EACf,EAAA;EACA,KAAA,EAAO,MAAA;EACP,WAAA;EACA,IAAA;AAAA;;;;;;;KCLU,aAAA;;;;UAKK,iBAAA;;AALjB;;;;;AAKA;EAQE,aAAA,GAAgB,aAAA;;;;;EAMhB,oBAAA;;;;;EAMA,wBAAA,GAA2B,MAAA;;;;AAgC7B;EA1BE,kBAAA;;;;;;EAOA,mBAAA;;;AA+CF;;;EAxCE,qBAAA;;;;;EAMA,gBAAA;AAAA;;;ACzBF;UD+BiB,gBAAA;;;;;EAKf,UAAA;EC0BmB;;;EDrBnB,aAAA;;;;EAKA,QAAA;;;;AC6EF;EDvEE,UAAA;AAAA;;;;;KAOU,gBAAA,GAAmB,QAAA;;;;;EAK7B,UAAA,GAAa,gBAAA;AAAA;;;ADzFf;;;AAAA,UEyBiB,cAAA;;;;;;;;;;;AD1BjB;;;;ECyCC,QAAA,GAAW,KAAA,EAAO,IAAA;EDpCnB;;;;;;;;;;;;;;ECoDC,QAAA,GAAW,OAAA;EDAZ;;;;;;;;;;AA4BA;;;ECbC,OAAA;;;;;;;;;AA9CD;;;;;;EA8DC,iBAAA,GAAoB,iBAAA;AAAA;;;;;;;;;;;AA6DrB;;;;;;;;;;;;;;;AAoHA;;;;;;;;;;;;;;;;;;;;AC3QA;;;;;;;;;;;AAcA;;iBDyIgB,gBAAA,CACf,IAAA,UACA,OAAA,GAAU,cAAA,GACR,QAAA,KAAa,gBAAA;;;;;;;;;ACtHhB;;;;;;;;;;;;;;iBDuOsB,qBAAA,CACrB,IAAA,UACA,OAAA,GAAU,cAAA,GACR,OAAA,CAAQ,QAAA,KAAa,gBAAA;;;;;AF3QxB;UGHiB,eAAA;;EAEhB,OAAA;;EAGA,iBAAA,EAAmB,iBAAA;;EAGnB,QAAA,EAAU,OAAA;AAAA;;;;UAMM,OAAA;EAChB,KAAA;EACA,OAAA;EACA,QAAA;IAAY,KAAA;IAAe,GAAA;EAAA;AAAA;;;;;;;;;;;;;;;AF0C5B;;iBEvBgB,SAAA,CACf,QAAA,UACA,QAAA,GAAU,KAAA,EAAO,IAAA,uBAMf,eAAA;;;AF1CH;;;;;AAKA;;AALA,UGoBiB,KAAA;EHKY;EGH3B,IAAA;;EAGA,IAAA,EAAM,IAAA,CAAK,IAAA;;EAGX,IAAA,EAAM,OAAA;;EAGN,SAAA;AAAA;;;;AH0BF;;;;;;;;;;AA4BA;;;;;;;;;;;;AC3DA;;;;;;;;;;;iBE4CgB,QAAA,CACd,WAAA,UACA,QAAA,GAAU,OAAA,KAOT,KAAA;;;;;;;AH1EH;;;;;;;;;;;;;;;AAoDA;;;;;;;;;;AA4BA;;;;;;;;;;;;AC3DA;;;;;;;;;;iBGgCgB,WAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,gBAAA;;;;;;;;;AJ7DH;;;;;AAKA;;;;;;;;;;;;;;;AAoDA;;;;;;;;;;iBKrBgB,cAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,eAAA;;;;;;;;;ALvCH;;;;;AAKA;;;;;;;;;;;;;;;AAoDA;;;;;;;;;;AA4BA;;iBM/CgB,cAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,eAAA;;;;;;;;;ANzCH;;;;;AAKA;;;;;;;;;;;;;;;AAoDA;;;;;;;;iBOvBgB,cAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,eAAA;;;;;;;;;APrCH;;;;;AAKA;;;;;;;;;;;;;;;AAoDA;;;;;;;;;iBQtBgB,gBAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,iBAAA;;;;;;;;;ARtCH;;;;;AAKA;;;;;;;;;;;;;;;AAoDA;;;;;;;iBSxBgB,sBAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,uBAAA;;;;;;;cCjBU,gBAAA;EAAA,iBACM,SAAA;EAAA,iBACA,IAAA;EAAA,iBACA,OAAA;EAAA,iBACA,OAAA;EVlBnB;;;;;;;EU2BE,WAAA,CACE,SAAA,EAAW,QAAA,IACX,IAAA,UACA,OAAA,GAAS,iBAAA;;;;;;EAwCX,OAAA,CAAA,GAAW,gBAAA;EVxBX;AAMF;;EANE,QUkEQ,SAAA;EV5DO;;;EAAA,QU4FP,YAAA;;;;UAiDA,oBAAA;EVjHE;;;EAAA,QUuJF,cAAA;;;;;UAeA,iBAAA;;;ATjOV;;USiPU,gBAAA;;;;UAyBA,kBAAA;ET5MW;;;EAAA,QSsNX,iBAAA;;;;UAUA,aAAA;;;;UAYA,mBAAA;AAAA;;;;;AV/TV;;;;;;;;iBWegB,gBAAA,CACd,SAAA,EAAW,QAAA,IACX,IAAA,UACA,OAAA,GAAU,iBAAA,GACT,gBAAA"}
1
+ {"version":3,"file":"index.d.mts","names":[],"sources":["../src/types/guards.ts","../src/patterns/casePatterns.ts","../src/resolve/types.ts","../src/extract/extractCitations.ts","../src/clean/cleanText.ts","../src/tokenize/tokenizer.ts","../src/extract/extractCase.ts","../src/extract/extractStatute.ts","../src/extract/extractJournal.ts","../src/extract/extractNeutral.ts","../src/extract/extractPublicLaw.ts","../src/extract/extractFederalRegister.ts","../src/resolve/DocumentResolver.ts","../src/resolve/index.ts"],"mappings":";;;;;AAKA;iBAAgB,cAAA,CAAe,QAAA,EAAU,QAAA,GAAW,QAAA,IAAY,YAAA;;;;iBAYhD,mBAAA,CAAoB,QAAA,EAAU,QAAA,GAAW,QAAA,IAAY,iBAAA;;;;iBASrD,cAAA,CAAe,QAAA,EAAU,QAAA,GAAW,QAAA,IAAY,gBAAA;AAThE;;;;AAAA,iBAiBgB,cAAA,WAAyB,YAAA,CAAA,CACvC,QAAA,EAAU,QAAA,EACV,IAAA,EAAM,CAAA,GACL,QAAA,IAAY,cAAA,CAAe,CAAA;;;;;;AAX9B;;;;;;;;;;AAQA;iBAuBgB,iBAAA,CAAkB,CAAA;;;UCzCjB,OAAA;EACf,EAAA;EACA,KAAA,EAAO,MAAA;EACP,WAAA;EACA,IAAA,EAAM,gBAAA;AAAA;;;;;;;KCPI,aAAA;;AFIZ;;UECiB,iBAAA;EFDoD;;;;;;;EESnE,aAAA,GAAgB,aAAA;EFAF;;;;EEMd,oBAAA;;;;;EAMA,wBAAA,GAA2B,MAAA;EFJb;;;;EEUd,kBAAA;;;;;;EAOA,mBAAA;;;;;;EAOA,qBAAA;;;;AFDF;EEOE,gBAAA;AAAA;;;;UAMe,gBAAA;;ADtDjB;;;EC2DE,UAAA;;;;EAKA,aAAA;;;;EAKA,QAAA;;;;AAxEF;EA8EE,UAAA;AAAA;;;AAzEF;;;;;KAmFY,gBAAA,WAA2B,QAAA,GAAW,QAAA,IAChD,CAAA,SAAU,iBAAA,GACN,CAAA;EAAM,UAAA,EAAY,gBAAA;AAAA,IAClB,CAAA;EAAM,UAAA;AAAA;;;AFnGZ;;;AAAA,UGkCiB,cAAA;;;;;;;AHtBjB;;;;;;;;EGqCC,QAAA,GAAW,KAAA,EAAO,IAAA;EHrCkD;AASrE;;;;;;;;;;AAQA;;;EGoCC,QAAA,GAAW,OAAA;;;;;;;;;;;;;;EAeX,OAAA;;;;AH5BD;;;;;;;;ACzCA;;;EEqFC,iBAAA,GAAoB,iBAAA;AAAA;;;;;;;;;;;ADxFrB;;;;;AAKA;;;;;;;;;;;;;;;AAoDA;;;;;;;;;;AA+BA;;;;;;;;;;;;;;;;;;iBC6DgB,gBAAA,CAAiB,IAAA,UAAc,OAAA,EAAS,cAAA;EAAmB,OAAA;AAAA,IAAkB,gBAAA;AAAA,iBAC7E,gBAAA,CAAiB,IAAA,UAAc,OAAA,GAAU,cAAA,GAAiB,QAAA;;;;;;AA5H1E;;;;;;;;;;;;;;;;;iBAiPsB,qBAAA,CAAsB,IAAA,UAAc,OAAA,EAAS,cAAA;EAAmB,OAAA;AAAA,IAAkB,OAAA,CAAQ,gBAAA;AAAA,iBAC1F,qBAAA,CAAsB,IAAA,UAAc,OAAA,GAAU,cAAA,GAAiB,OAAA,CAAQ,QAAA;;;;AHpR7F;;UIOiB,eAAA;EJP+C;EIS/D,OAAA;;EAGA,iBAAA,EAAmB,iBAAA;;EAGnB,QAAA,EAAU,OAAA;AAAA;AJHX;;;;;;;;;;AASA;;;;;;AATA,iBIsBgB,SAAA,CACf,QAAA,UACA,QAAA,GAAU,KAAA,EAAO,IAAA,uBAMf,eAAA;;;;;;;;;AJrBH;UKOiB,KAAA;;EAEf,IAAA;;EAGA,IAAA,EAAM,IAAA,CAAK,IAAA;;EAGX,IAAA,EAAM,OAAA;;EAGN,SAAA;AAAA;;;;;;;;;;;;;;;;;;;;;ALaF;;;;;;;;ACzCA;;;;;;;;iBImEgB,QAAA,CACd,WAAA,UACA,QAAA,GAAU,OAAA,KAOT,KAAA;;;;;;;;ALlEH;;;;;;;;;;AAQA;;;;;;;;;;;;;;;;;;;;;AAuBA;;;;;;;;ACzCA;;;;;;;iBKuDgB,WAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,gBAAA;;;;;ANzDH;;;;;;;;;;AASA;;;;;;;;;;AAQA;;;;;;;;;;;;;;iBOegB,cAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,eAAA;;;;;APnCH;;;;;;;;;;AASA;;;;;;;;;;AAQA;;;;;;;;;;;;;;;;iBQiBgB,cAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,eAAA;;;;;ARrCH;;;;;;;;;;AASA;;;;;;;;;;AAQA;;;;;;;;;;;;iBSagB,cAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,eAAA;;;;;ATjCH;;;;;;;;;;AASA;;;;;;;;;;AAQA;;;;;;;;;;;;;iBUcgB,gBAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,iBAAA;;;;;AVlCH;;;;;;;;;;AASA;;;;;;;;;;AAQA;;;;;;;;;;;iBWYgB,sBAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,uBAAA;;;AXhCH;;;;AAAA,cYgBa,gBAAA;EAAA,iBACM,SAAA;EAAA,iBACA,IAAA;EAAA,iBACA,OAAA;EAAA,iBACA,OAAA;EZpBkD;AASrE;;;;;;EYoBE,WAAA,CACE,SAAA,EAAW,QAAA,IACX,IAAA,UACA,OAAA,GAAS,iBAAA;;;;AZfb;;EYuDE,OAAA,CAAA,GAAW,gBAAA;;;;UA2CH,SAAA;;;;UAgCA,YAAA;;;;UAiDA,oBAAA;;;;;UAqCA,iBAAA;EZrNoB;AAoB9B;;;EApB8B,QYoOpB,gBAAA;EZhNwB;;;EAAA,QYyOxB,kBAAA;EXlRV;;;EAAA,QW4RU,iBAAA;;;;UAUA,aAAA;;;;UAYA,mBAAA;AAAA;;;;;;AZxSV;;;;;;;iBaOgB,gBAAA,CACd,SAAA,EAAW,QAAA,IACX,IAAA,UACA,OAAA,GAAU,iBAAA,GACT,gBAAA"}
package/dist/index.mjs CHANGED
@@ -1,2 +1,2 @@
1
- function e(e){return e.replace(/<[^>]+>/g,``)}function t(e){return e.replace(/[\t\n\r]+/g,` `).replace(/ {2,}/g,` `)}function n(e){return e.normalize(`NFKC`)}function r(e){return e.replace(/[\u201C\u201D]/g,`"`).replace(/[\u2018\u2019]/g,`'`)}function i(i,o=[e,t,n,r]){let s=i,c=new Map,l=new Map;for(let e=0;e<=i.length;e++)c.set(e,e),l.set(e,e);for(let e of o){let t=s,n=e(s);if(t!==n){let{newCleanToOriginal:e,newOriginalToClean:r}=a(t,n,c,l);c=e,l=r,s=n}}return{cleaned:s,transformationMap:{cleanToOriginal:c,originalToClean:l},warnings:[]}}function a(e,t,n,r){let i=new Map,a=new Map,o=0,s=0;for(;o<=e.length||s<=t.length;){if(o>=e.length&&s>=t.length){let e=n.get(o)??o;i.set(s,e),a.set(e,s);break}if(o>=e.length){let e=n.get(o)??o;i.set(s,e),s++;continue}if(s>=t.length){let e=n.get(o)??o;a.set(e,s),o++;continue}if(e[o]===t[s]){let e=n.get(o)??o;i.set(s,e),a.set(e,s),o++,s++}else{let r=!1;for(let i=1;i<=20&&!(o+i>=e.length);i++)if(e[o+i]===t[s]){for(let e=0;e<i;e++){let t=n.get(o+e)??o+e;a.set(t,s)}o+=i,r=!0;break}if(r)continue;for(let a=1;a<=20&&!(s+a>=t.length);a++)if(e[o]===t[s+a]){let e=n.get(o)??o;for(let t=0;t<a;t++)i.set(s+t,e);s+=a,r=!0;break}if(r)continue;let c=n.get(o)??o;i.set(s,c),a.set(c,s),o++,s++}}return{newCleanToOriginal:i,newOriginalToClean:a}}const o=[{id:`federal-reporter`,regex:/\b(\d+)\s+(F\.|F\.2d|F\.3d|F\.\s?Supp\.|F\.\s?Supp\.\s?2d|F\.\s?Supp\.\s?3d)\s+(\d+)\b/g,description:`Federal Reporter (F., F.2d, F.3d, F.Supp., etc.)`,type:`case`},{id:`supreme-court`,regex:/\b(\d+)\s+(U\.S\.|S\.\s?Ct\.|L\.\s?Ed\.(?:\s?2d)?)\s+(\d+)\b/g,description:`U.S. Supreme Court reporters`,type:`case`},{id:`state-reporter`,regex:/\b(\d+)\s+([A-Z][A-Za-z\.]+(?:\s?2d|\s?3d)?)\s+(\d+)\b/g,description:`State reporters (broad pattern, validated against reporters-db in Phase 3)`,type:`case`}],s=[{id:`usc`,regex:/\b(\d+)\s+U\.S\.C\.?\s+§+\s*(\d+)\b/g,description:`U.S. Code citations (e.g., "42 U.S.C. § 1983")`,type:`statute`},{id:`state-code`,regex:/\b([A-Z][a-z]+\.?\s+[A-Za-z\.]+\s+Code)\s+§\s*(\d+)\b/g,description:`State code citations (broad pattern, e.g., "Cal. Penal Code § 187")`,type:`statute`}],c=[{id:`law-review`,regex:/\b(\d+)\s+([A-Z][A-Za-z\.\s]+)\s+(\d+)\b/g,description:`Law review citations (e.g., "120 Harv. L. Rev. 500"), validated against journals-db in Phase 3`,type:`journal`}],l=[{id:`westlaw`,regex:/\b(\d{4})\s+WL\s+(\d+)\b/g,description:`WestLaw citations (e.g., "2021 WL 123456")`,type:`neutral`},{id:`lexis`,regex:/\b(\d{4})\s+U\.S\.\s+LEXIS\s+(\d+)\b/g,description:`LexisNexis citations (e.g., "2021 U.S. LEXIS 5000")`,type:`neutral`},{id:`public-law`,regex:/\bPub\.\s?L\.\s?No\.\s?(\d+-\d+)\b/g,description:`Public Law citations (e.g., "Pub. L. No. 117-58")`,type:`publicLaw`},{id:`federal-register`,regex:/\b(\d+)\s+Fed\.\s?Reg\.\s+(\d+)\b/g,description:`Federal Register citations (e.g., "86 Fed. Reg. 12345")`,type:`federalRegister`}],u=[{id:`id`,regex:/\b[Ii]d\.(?:\s+at\s+(\d+))?/g,description:`Id. citations (e.g., "Id." or "Id. at 253")`,type:`case`},{id:`ibid`,regex:/\b[Ii]bid\.(?:\s+at\s+(\d+))?/g,description:`Ibid. citations (e.g., "Ibid." or "Ibid. at 125")`,type:`case`},{id:`supra`,regex:/\b([A-Z][a-zA-Z]+(?:(?:\s+v\.?\s+|\s+)[A-Z][a-zA-Z]+)*),?\s+supra(?:,?\s+at\s+(\d+))?/g,description:`Supra citations (e.g., "Smith, supra" or "Smith, supra, at 460")`,type:`case`},{id:`shortFormCase`,regex:/\b(\d+)\s+([A-Z][A-Za-z.\s]+?(?:\d[a-z])?)\s+at\s+(\d+)\b/g,description:`Short-form case citations (e.g., "500 F.2d at 125")`,type:`case`}];function d(e,t=[...o,...s,...c,...l,...u]){let n=[];for(let r of t)try{let t=e.matchAll(r.regex);for(let e of t)n.push({text:e[0],span:{cleanStart:e.index,cleanEnd:e.index+e[0].length},type:r.type,patternId:r.id})}catch(e){console.warn(`Pattern ${r.id} threw error, skipping:`,e instanceof Error?e.message:String(e));continue}return n.sort((e,t)=>e.span.cleanStart-t.span.cleanStart),n}function f(e,t){let{text:n,span:r}=e,i=/^(\d+)\s+([A-Za-z0-9.\s]+)\s+(\d+)/.exec(n);if(!i)throw Error(`Failed to parse case citation: ${n}`);let a=Number.parseInt(i[1],10),o=i[2].trim(),s=Number.parseInt(i[3],10),c=/,\s*(\d+)/.exec(n),l=c?Number.parseInt(c[1],10):void 0,u=/\((?:[^)]*\s)?(\d{4})\)/.exec(n),d=u?Number.parseInt(u[1],10):void 0,f=/\(([^)]*[A-Za-z][^)]*)\)/.exec(n),p=f?f[1].trim():void 0,m=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,h=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd,g=.5;return`F.,F.2d,F.3d,F.4th,U.S.,S. Ct.,L. Ed.,P.,P.2d,P.3d,A.,A.2d,A.3d,N.E.,N.E.2d,N.E.3d,N.W.,N.W.2d,S.E.,S.E.2d,S.W.,S.W.2d,S.W.3d,So.,So. 2d,So. 3d`.split(`,`).some(e=>o.includes(e))&&(g+=.3),d!==void 0&&d<=new Date().getFullYear()&&(g+=.2),g=Math.min(g,1),{type:`case`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:m,originalEnd:h},confidence:g,matchedText:n,processTimeMs:0,patternsChecked:1,volume:a,reporter:o,page:s,pincite:l,court:p,year:d}}function p(e,t){let{text:n,span:r}=e,i=/^(?:(\d+)\s+)?([A-Za-z.\s]+?)\s*§\s*(\d+[A-Za-z0-9\-]*)/.exec(n);if(!i)throw Error(`Failed to parse statute citation: ${n}`);let a=i[1]?Number.parseInt(i[1],10):void 0,o=i[2].trim(),s=i[3],c=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,l=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd,u=.5;return[`U.S.C.`,`C.F.R.`,`Cal. Civ. Code`,`Cal. Penal Code`,`N.Y. Civ. Prac. L. & R.`,`Tex. Civ. Prac. & Rem. Code`].some(e=>o.includes(e))&&(u+=.3),u=Math.min(u,1),{type:`statute`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:c,originalEnd:l},confidence:u,matchedText:n,processTimeMs:0,patternsChecked:1,title:a,code:o,section:s}}function m(e,t){let{text:n,span:r}=e,i=/^(\d+)\s+([A-Za-z.\s]+?)\s+(\d+)/.exec(n);if(!i)throw Error(`Failed to parse journal citation: ${n}`);let a=Number.parseInt(i[1],10),o=i[2].trim(),s=Number.parseInt(i[3],10),c=/,\s*(\d+)/.exec(n),l=c?Number.parseInt(c[1],10):void 0,u=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,d=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd;return{type:`journal`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:u,originalEnd:d},confidence:.6,matchedText:n,processTimeMs:0,patternsChecked:1,volume:a,journal:o,abbreviation:o,page:s,pincite:l}}function h(e,t){let{text:n,span:r}=e,i=/^(\d{4})\s+(WL|LEXIS|U\.S\.\s+LEXIS)\s+(\d+)/.exec(n);if(!i)throw Error(`Failed to parse neutral citation: ${n}`);let a=Number.parseInt(i[1],10),o=i[2],s=i[3],c=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,l=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd;return{type:`neutral`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:c,originalEnd:l},confidence:1,matchedText:n,processTimeMs:0,patternsChecked:1,year:a,court:o,documentNumber:s}}function g(e,t){let{text:n,span:r}=e,i=/Pub\.\s?L\.(?:\s?No\.)?\s?(\d+)-(\d+)/.exec(n);if(!i)throw Error(`Failed to parse public law citation: ${n}`);let a=Number.parseInt(i[1],10),o=Number.parseInt(i[2],10),s=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,c=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd;return{type:`publicLaw`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:s,originalEnd:c},confidence:.9,matchedText:n,processTimeMs:0,patternsChecked:1,congress:a,lawNumber:o}}function _(e,t){let{text:n,span:r}=e,i=/^(\d+)\s+Fed\.\s?Reg\.\s+(\d+)/.exec(n);if(!i)throw Error(`Failed to parse Federal Register citation: ${n}`);let a=Number.parseInt(i[1],10),o=Number.parseInt(i[2],10),s=/\((?:.*?\s)?(\d{4})\)/.exec(n),c=s?Number.parseInt(s[1],10):void 0,l=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,u=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd;return{type:`federalRegister`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:l,originalEnd:u},confidence:.9,matchedText:n,processTimeMs:0,patternsChecked:1,volume:a,page:o,year:c}}function v(e,t){let{text:n,span:r}=e,i=/[Ii](?:d|bid)\.(?:\s+at\s+(\d+))?/.exec(n);if(!i)throw Error(`Failed to parse Id. citation: ${n}`);let a=i[1]?Number.parseInt(i[1],10):void 0,o=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,s=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd;return{type:`id`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:o,originalEnd:s},confidence:1,matchedText:n,processTimeMs:0,patternsChecked:1,pincite:a}}function y(e,t){let{text:n,span:r}=e,i=/\b([A-Z][a-zA-Z]+(?:(?:\s+v\.?\s+|\s+)[A-Z][a-zA-Z]+)*),?\s+supra(?:,?\s+at\s+(\d+))?/.exec(n);if(!i)throw Error(`Failed to parse supra citation: ${n}`);let a=i[1],o=i[2]?Number.parseInt(i[2],10):void 0,s=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,c=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd;return{type:`supra`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:s,originalEnd:c},confidence:.9,matchedText:n,processTimeMs:0,patternsChecked:1,partyName:a,pincite:o}}function b(e,t){let{text:n,span:r}=e,i=/(\d+)\s+([A-Z][A-Za-z.\s]+?(?:\d[a-z])?)\s+at\s+(\d+)/.exec(n);if(!i)throw Error(`Failed to parse short-form case citation: ${n}`);let a=Number.parseInt(i[1],10),o=i[2].trim(),s=Number.parseInt(i[3],10),c=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,l=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd;return{type:`shortFormCase`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:c,originalEnd:l},confidence:.7,matchedText:n,processTimeMs:0,patternsChecked:1,volume:a,reporter:o,pincite:s}}function x(e,t,n=/\n\n+/g){let r=new Map,i=[0],a;for(;(a=n.exec(e))!==null;)i.push(a.index+a[0].length);i.push(e.length);for(let e=0;e<t.length;e++){let n=t[e].span.originalStart,a=0;for(let e=0;e<i.length-1;e++)if(n>=i[e]&&n<i[e+1]){a=e;break}r.set(e,a)}return r}function S(e,t,n,r){if(r===`none`)return!0;let i=n.get(e),a=n.get(t);return i===void 0||a===void 0?!0:i===a}function C(e,t){if(e.length===0)return t.length;if(t.length===0)return e.length;let n=Array.from({length:e.length+1},()=>Array(t.length+1).fill(0));for(let t=0;t<=e.length;t++)n[t][0]=t;for(let e=0;e<=t.length;e++)n[0][e]=e;for(let r=1;r<=e.length;r++)for(let i=1;i<=t.length;i++)e[r-1]===t[i-1]?n[r][i]=n[r-1][i-1]:n[r][i]=1+Math.min(n[r-1][i],n[r][i-1],n[r-1][i-1]);return n[e.length][t.length]}function w(e,t){let n=e.toLowerCase(),r=t.toLowerCase(),i=C(n,r),a=Math.max(n.length,r.length);return a===0?1:1-i/a}var T=class{constructor(e,t,n={}){this.citations=e,this.text=t,this.options={scopeStrategy:n.scopeStrategy??`paragraph`,autoDetectParagraphs:n.autoDetectParagraphs??!0,paragraphBoundaryPattern:n.paragraphBoundaryPattern??/\n\n+/g,fuzzyPartyMatching:n.fuzzyPartyMatching??!0,partyMatchThreshold:n.partyMatchThreshold??.8,allowNestedResolution:n.allowNestedResolution??!1,reportUnresolved:n.reportUnresolved??!0},this.context={citationIndex:0,allCitations:e,lastFullCitation:void 0,fullCitationHistory:new Map,paragraphMap:new Map},this.options.autoDetectParagraphs&&(this.context.paragraphMap=x(t,e,this.options.paragraphBoundaryPattern))}resolve(){let e=[];for(let t=0;t<this.citations.length;t++){this.context.citationIndex=t;let n=this.citations[t],r;switch(n.type){case`id`:r=this.resolveId(n);break;case`supra`:r=this.resolveSupra(n);break;case`shortFormCase`:r=this.resolveShortFormCase(n);break;default:this.isFullCitation(n)&&(this.context.lastFullCitation=t,this.trackFullCitation(n,t));break}e.push({...n,resolution:r})}return e}resolveId(e){let t=this.context.citationIndex,n;for(let e=t-1;e>=0;e--)if(this.citations[e].type===`case`){n=e;break}return n===void 0?this.createFailureResult(`No preceding full case citation found`):this.isWithinScope(n,t)?{resolvedTo:n,confidence:1}:this.createFailureResult(`Antecedent citation outside scope boundary`)}resolveSupra(e){let t=this.context.citationIndex,n=this.normalizePartyName(e.partyName),r;for(let[e,i]of this.context.fullCitationHistory){if(!this.isWithinScope(i,t))continue;let a=w(n,e);(!r||a>r.similarity)&&(r={index:i,similarity:a})}if(!r)return this.createFailureResult(`No full citation found in scope`);if(r.similarity<this.options.partyMatchThreshold)return this.createFailureResult(`Party name similarity ${r.similarity.toFixed(2)} below threshold ${this.options.partyMatchThreshold}`);let i=[];return r.similarity<1&&i.push(`Fuzzy match: similarity ${r.similarity.toFixed(2)}`),{resolvedTo:r.index,confidence:r.similarity,warnings:i.length>0?i:void 0}}resolveShortFormCase(e){let t=this.context.citationIndex;for(let n=t-1;n>=0;n--){let r=this.citations[n];if(r.type!==`case`)continue;let i=r;if(i.volume===e.volume&&this.normalizeReporter(i.reporter)===this.normalizeReporter(e.reporter))return this.isWithinScope(n,t)?{resolvedTo:n,confidence:.95}:this.createFailureResult(`Matching citation outside scope boundary`)}return this.createFailureResult(`No matching full case citation found`)}isFullCitation(e){return e.type===`case`||e.type===`statute`||e.type===`journal`||e.type===`neutral`||e.type===`publicLaw`||e.type===`federalRegister`}trackFullCitation(e,t){if(e.type===`case`){let n=e,r=this.extractPartyName(n);if(r){let e=this.normalizePartyName(r);this.context.fullCitationHistory.set(e,t)}}}extractPartyName(e){let t=e.span.originalStart,n=Math.max(0,t-100),r=this.text.substring(n,t),i=r.match(/([A-Z][a-zA-Z]*(?:\s+[A-Z][a-zA-Z]*)*)\s+v\.?\s+[A-Z][a-zA-Z]*(?:\s+[A-Z][a-zA-Z]*)*,\s*$/);return i?i[1].trim():r.match(/([A-Z][a-zA-Z]*(?:\s+[A-Z][a-zA-Z]*)*),\s*$/)?.[1].trim()}normalizePartyName(e){return e.toLowerCase().replace(/\s+/g,` `).trim()}normalizeReporter(e){return e.toLowerCase().replace(/\s+/g,``).replace(/\./g,``)}isWithinScope(e,t){return S(e,t,this.context.paragraphMap,this.options.scopeStrategy)}createFailureResult(e){if(this.options.reportUnresolved)return{resolvedTo:void 0,failureReason:e,confidence:0}}};function E(e,t,n){return new T(e,t,n).resolve()}function D(e,t){let n=performance.now(),{cleaned:r,transformationMap:a,warnings:x}=i(e,t?.cleaners),S=d(r,t?.patterns||[...l,...u,...o,...s,...c]),C=[],w=new Set;for(let e of S){let t=`${e.span.cleanStart}-${e.span.cleanEnd}`;w.has(t)||(w.add(t),C.push(e))}let T=[];for(let e of C){let t;switch(e.type){case`case`:t=e.patternId===`id`||e.patternId===`ibid`?v(e,a):e.patternId===`supra`?y(e,a):e.patternId===`shortFormCase`?b(e,a):f(e,a);break;case`statute`:t=p(e,a);break;case`journal`:t=m(e,a);break;case`neutral`:t=h(e,a);break;case`publicLaw`:t=g(e,a);break;case`federalRegister`:t=_(e,a);break;default:continue}x.length>0&&(t.warnings=[...t.warnings||[],...x]),t.processTimeMs=performance.now()-n,T.push(t)}return t?.resolve?E(T,e,t.resolutionOptions):T}async function O(e,t){return D(e,t)}export{T as DocumentResolver,i as cleanText,f as extractCase,D as extractCitations,O as extractCitationsAsync,_ as extractFederalRegister,m as extractJournal,h as extractNeutral,g as extractPublicLaw,p as extractStatute,E as resolveCitations,d as tokenize};
1
+ function e(e){return e.type===`case`||e.type===`statute`||e.type===`journal`||e.type===`neutral`||e.type===`publicLaw`||e.type===`federalRegister`}function t(e){return e.type===`id`||e.type===`supra`||e.type===`shortFormCase`}function n(e){return e.type===`case`}function r(e,t){return e.type===t}function i(e){throw Error(`Unexpected value: ${e}`)}function a(e){return e.replace(/<[^>]+>/g,``)}function o(e){return e.replace(/[\t\n\r]+/g,` `).replace(/ {2,}/g,` `)}function s(e){return e.normalize(`NFKC`)}function c(e){return e.replace(/[\u201C\u201D]/g,`"`).replace(/[\u2018\u2019]/g,`'`)}function l(e,t=[a,o,s,c]){let n=e,r=new Map,i=new Map;for(let t=0;t<=e.length;t++)r.set(t,t),i.set(t,t);for(let e of t){let t=n,a=e(n);if(t!==a){let{newCleanToOriginal:e,newOriginalToClean:o}=u(t,a,r,i);r=e,i=o,n=a}}return{cleaned:n,transformationMap:{cleanToOriginal:r,originalToClean:i},warnings:[]}}function u(e,t,n,r){let i=new Map,a=new Map,o=0,s=0;for(;o<=e.length||s<=t.length;){if(o>=e.length&&s>=t.length){let e=n.get(o)??o;i.set(s,e),a.set(e,s);break}if(o>=e.length){let e=n.get(o)??o;i.set(s,e),s++;continue}if(s>=t.length){let e=n.get(o)??o;a.set(e,s),o++;continue}if(e[o]===t[s]){let e=n.get(o)??o;i.set(s,e),a.set(e,s),o++,s++}else{let r=!1;for(let i=1;i<=20&&!(o+i>=e.length);i++)if(e[o+i]===t[s]){for(let e=0;e<i;e++){let t=n.get(o+e)??o+e;a.set(t,s)}o+=i,r=!0;break}if(r)continue;for(let a=1;a<=20&&!(s+a>=t.length);a++)if(e[o]===t[s+a]){let e=n.get(o)??o;for(let t=0;t<a;t++)i.set(s+t,e);s+=a,r=!0;break}if(r)continue;let c=n.get(o)??o;i.set(s,c),a.set(c,s),o++,s++}}return{newCleanToOriginal:i,newOriginalToClean:a}}const d=[{id:`federal-reporter`,regex:/\b(\d+)\s+(F\.|F\.2d|F\.3d|F\.\s?Supp\.|F\.\s?Supp\.\s?2d|F\.\s?Supp\.\s?3d)\s+(\d+)\b/g,description:`Federal Reporter (F., F.2d, F.3d, F.Supp., etc.)`,type:`case`},{id:`supreme-court`,regex:/\b(\d+)\s+(U\.S\.|S\.\s?Ct\.|L\.\s?Ed\.(?:\s?2d)?)\s+(\d+)\b/g,description:`U.S. Supreme Court reporters`,type:`case`},{id:`state-reporter`,regex:/\b(\d+)\s+([A-Z][A-Za-z\.]+(?:\s?2d|\s?3d)?)\s+(\d+)\b/g,description:`State reporters (broad pattern, validated against reporters-db in Phase 3)`,type:`case`}],f=[{id:`usc`,regex:/\b(\d+)\s+U\.S\.C\.?\s+§+\s*(\d+)\b/g,description:`U.S. Code citations (e.g., "42 U.S.C. § 1983")`,type:`statute`},{id:`state-code`,regex:/\b([A-Z][a-z]+\.?\s+[A-Za-z\.]+\s+Code)\s+§\s*(\d+)\b/g,description:`State code citations (broad pattern, e.g., "Cal. Penal Code § 187")`,type:`statute`}],p=[{id:`law-review`,regex:/\b(\d+)\s+([A-Z][A-Za-z\.\s]+)\s+(\d+)\b/g,description:`Law review citations (e.g., "120 Harv. L. Rev. 500"), validated against journals-db in Phase 3`,type:`journal`}],m=[{id:`westlaw`,regex:/\b(\d{4})\s+WL\s+(\d+)\b/g,description:`WestLaw citations (e.g., "2021 WL 123456")`,type:`neutral`},{id:`lexis`,regex:/\b(\d{4})\s+U\.S\.\s+LEXIS\s+(\d+)\b/g,description:`LexisNexis citations (e.g., "2021 U.S. LEXIS 5000")`,type:`neutral`},{id:`public-law`,regex:/\bPub\.\s?L\.\s?No\.\s?(\d+-\d+)\b/g,description:`Public Law citations (e.g., "Pub. L. No. 117-58")`,type:`publicLaw`},{id:`federal-register`,regex:/\b(\d+)\s+Fed\.\s?Reg\.\s+(\d+)\b/g,description:`Federal Register citations (e.g., "86 Fed. Reg. 12345")`,type:`federalRegister`}],h=[{id:`id`,regex:/\b[Ii]d\.(?:\s+at\s+(\d+))?/g,description:`Id. citations (e.g., "Id." or "Id. at 253")`,type:`case`},{id:`ibid`,regex:/\b[Ii]bid\.(?:\s+at\s+(\d+))?/g,description:`Ibid. citations (e.g., "Ibid." or "Ibid. at 125")`,type:`case`},{id:`supra`,regex:/\b([A-Z][a-zA-Z]+(?:(?:\s+v\.?\s+|\s+)[A-Z][a-zA-Z]+)*),?\s+supra(?:,?\s+at\s+(\d+))?/g,description:`Supra citations (e.g., "Smith, supra" or "Smith, supra, at 460")`,type:`case`},{id:`shortFormCase`,regex:/\b(\d+)\s+([A-Z][A-Za-z.\s]+?(?:\d[a-z])?)\s+at\s+(\d+)\b/g,description:`Short-form case citations (e.g., "500 F.2d at 125")`,type:`case`}];function g(e,t=[...d,...f,...p,...m,...h]){let n=[];for(let r of t)try{let t=e.matchAll(r.regex);for(let e of t)n.push({text:e[0],span:{cleanStart:e.index,cleanEnd:e.index+e[0].length},type:r.type,patternId:r.id})}catch(e){console.warn(`Pattern ${r.id} threw error, skipping:`,e instanceof Error?e.message:String(e));continue}return n.sort((e,t)=>e.span.cleanStart-t.span.cleanStart),n}function _(e,t){let{text:n,span:r}=e,i=/^(\d+)\s+([A-Za-z0-9.\s]+)\s+(\d+)/.exec(n);if(!i)throw Error(`Failed to parse case citation: ${n}`);let a=Number.parseInt(i[1],10),o=i[2].trim(),s=Number.parseInt(i[3],10),c=/,\s*(\d+)/.exec(n),l=c?Number.parseInt(c[1],10):void 0,u=/\((?:[^)]*\s)?(\d{4})\)/.exec(n),d=u?Number.parseInt(u[1],10):void 0,f=/\(([^)]*[A-Za-z][^)]*)\)/.exec(n),p=f?f[1].trim():void 0,m=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,h=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd,g=.5;return`F.,F.2d,F.3d,F.4th,U.S.,S. Ct.,L. Ed.,P.,P.2d,P.3d,A.,A.2d,A.3d,N.E.,N.E.2d,N.E.3d,N.W.,N.W.2d,S.E.,S.E.2d,S.W.,S.W.2d,S.W.3d,So.,So. 2d,So. 3d`.split(`,`).some(e=>o.includes(e))&&(g+=.3),d!==void 0&&d<=new Date().getFullYear()&&(g+=.2),g=Math.min(g,1),{type:`case`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:m,originalEnd:h},confidence:g,matchedText:n,processTimeMs:0,patternsChecked:1,volume:a,reporter:o,page:s,pincite:l,court:p,year:d}}function v(e,t){let{text:n,span:r}=e,i=/^(?:(\d+)\s+)?([A-Za-z.\s]+?)\s*§\s*(\d+[A-Za-z0-9\-]*)/.exec(n);if(!i)throw Error(`Failed to parse statute citation: ${n}`);let a=i[1]?Number.parseInt(i[1],10):void 0,o=i[2].trim(),s=i[3],c=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,l=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd,u=.5;return[`U.S.C.`,`C.F.R.`,`Cal. Civ. Code`,`Cal. Penal Code`,`N.Y. Civ. Prac. L. & R.`,`Tex. Civ. Prac. & Rem. Code`].some(e=>o.includes(e))&&(u+=.3),u=Math.min(u,1),{type:`statute`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:c,originalEnd:l},confidence:u,matchedText:n,processTimeMs:0,patternsChecked:1,title:a,code:o,section:s}}function y(e,t){let{text:n,span:r}=e,i=/^(\d+)\s+([A-Za-z.\s]+?)\s+(\d+)/.exec(n);if(!i)throw Error(`Failed to parse journal citation: ${n}`);let a=Number.parseInt(i[1],10),o=i[2].trim(),s=Number.parseInt(i[3],10),c=/,\s*(\d+)/.exec(n),l=c?Number.parseInt(c[1],10):void 0,u=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,d=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd;return{type:`journal`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:u,originalEnd:d},confidence:.6,matchedText:n,processTimeMs:0,patternsChecked:1,volume:a,journal:o,abbreviation:o,page:s,pincite:l}}function b(e,t){let{text:n,span:r}=e,i=/^(\d{4})\s+(WL|LEXIS|U\.S\.\s+LEXIS)\s+(\d+)/.exec(n);if(!i)throw Error(`Failed to parse neutral citation: ${n}`);let a=Number.parseInt(i[1],10),o=i[2],s=i[3],c=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,l=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd;return{type:`neutral`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:c,originalEnd:l},confidence:1,matchedText:n,processTimeMs:0,patternsChecked:1,year:a,court:o,documentNumber:s}}function x(e,t){let{text:n,span:r}=e,i=/Pub\.\s?L\.(?:\s?No\.)?\s?(\d+)-(\d+)/.exec(n);if(!i)throw Error(`Failed to parse public law citation: ${n}`);let a=Number.parseInt(i[1],10),o=Number.parseInt(i[2],10),s=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,c=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd;return{type:`publicLaw`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:s,originalEnd:c},confidence:.9,matchedText:n,processTimeMs:0,patternsChecked:1,congress:a,lawNumber:o}}function S(e,t){let{text:n,span:r}=e,i=/^(\d+)\s+Fed\.\s?Reg\.\s+(\d+)/.exec(n);if(!i)throw Error(`Failed to parse Federal Register citation: ${n}`);let a=Number.parseInt(i[1],10),o=Number.parseInt(i[2],10),s=/\((?:.*?\s)?(\d{4})\)/.exec(n),c=s?Number.parseInt(s[1],10):void 0,l=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,u=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd;return{type:`federalRegister`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:l,originalEnd:u},confidence:.9,matchedText:n,processTimeMs:0,patternsChecked:1,volume:a,page:o,year:c}}function C(e,t){let{text:n,span:r}=e,i=/[Ii](?:d|bid)\.(?:\s+at\s+(\d+))?/.exec(n);if(!i)throw Error(`Failed to parse Id. citation: ${n}`);let a=i[1]?Number.parseInt(i[1],10):void 0,o=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,s=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd;return{type:`id`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:o,originalEnd:s},confidence:1,matchedText:n,processTimeMs:0,patternsChecked:1,pincite:a}}function w(e,t){let{text:n,span:r}=e,i=/\b([A-Z][a-zA-Z]+(?:(?:\s+v\.?\s+|\s+)[A-Z][a-zA-Z]+)*),?\s+supra(?:,?\s+at\s+(\d+))?/.exec(n);if(!i)throw Error(`Failed to parse supra citation: ${n}`);let a=i[1],o=i[2]?Number.parseInt(i[2],10):void 0,s=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,c=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd;return{type:`supra`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:s,originalEnd:c},confidence:.9,matchedText:n,processTimeMs:0,patternsChecked:1,partyName:a,pincite:o}}function T(e,t){let{text:n,span:r}=e,i=/(\d+)\s+([A-Z][A-Za-z.\s]+?(?:\d[a-z])?)\s+at\s+(\d+)/.exec(n);if(!i)throw Error(`Failed to parse short-form case citation: ${n}`);let a=Number.parseInt(i[1],10),o=i[2].trim(),s=Number.parseInt(i[3],10),c=t.cleanToOriginal.get(r.cleanStart)??r.cleanStart,l=t.cleanToOriginal.get(r.cleanEnd)??r.cleanEnd;return{type:`shortFormCase`,text:n,span:{cleanStart:r.cleanStart,cleanEnd:r.cleanEnd,originalStart:c,originalEnd:l},confidence:.7,matchedText:n,processTimeMs:0,patternsChecked:1,volume:a,reporter:o,pincite:s}}function E(e,t,n=/\n\n+/g){let r=new Map,i=[0],a;for(;(a=n.exec(e))!==null;)i.push(a.index+a[0].length);i.push(e.length);for(let e=0;e<t.length;e++){let n=t[e].span.originalStart,a=0;for(let e=0;e<i.length-1;e++)if(n>=i[e]&&n<i[e+1]){a=e;break}r.set(e,a)}return r}function D(e,t,n,r){if(r===`none`)return!0;let i=n.get(e),a=n.get(t);return i===void 0||a===void 0?!0:i===a}function O(e,t){if(e.length===0)return t.length;if(t.length===0)return e.length;let n=Array.from({length:e.length+1},()=>Array(t.length+1).fill(0));for(let t=0;t<=e.length;t++)n[t][0]=t;for(let e=0;e<=t.length;e++)n[0][e]=e;for(let r=1;r<=e.length;r++)for(let i=1;i<=t.length;i++)e[r-1]===t[i-1]?n[r][i]=n[r-1][i-1]:n[r][i]=1+Math.min(n[r-1][i],n[r][i-1],n[r-1][i-1]);return n[e.length][t.length]}function k(e,t){let n=e.toLowerCase(),r=t.toLowerCase(),i=O(n,r),a=Math.max(n.length,r.length);return a===0?1:1-i/a}var A=class{constructor(e,t,n={}){this.citations=e,this.text=t,this.options={scopeStrategy:n.scopeStrategy??`paragraph`,autoDetectParagraphs:n.autoDetectParagraphs??!0,paragraphBoundaryPattern:n.paragraphBoundaryPattern??/\n\n+/g,fuzzyPartyMatching:n.fuzzyPartyMatching??!0,partyMatchThreshold:n.partyMatchThreshold??.8,allowNestedResolution:n.allowNestedResolution??!1,reportUnresolved:n.reportUnresolved??!0},this.context={citationIndex:0,allCitations:e,lastFullCitation:void 0,fullCitationHistory:new Map,paragraphMap:new Map},this.options.autoDetectParagraphs&&(this.context.paragraphMap=E(t,e,this.options.paragraphBoundaryPattern))}resolve(){let t=[];for(let n=0;n<this.citations.length;n++){this.context.citationIndex=n;let r=this.citations[n],i;switch(r.type){case`id`:i=this.resolveId(r);break;case`supra`:i=this.resolveSupra(r);break;case`shortFormCase`:i=this.resolveShortFormCase(r);break;default:e(r)&&(this.context.lastFullCitation=n,this.trackFullCitation(r,n));break}t.push({...r,resolution:i})}return t}resolveId(e){let t=this.context.citationIndex,n;for(let e=t-1;e>=0;e--)if(this.citations[e].type===`case`){n=e;break}return n===void 0?this.createFailureResult(`No preceding full case citation found`):this.isWithinScope(n,t)?{resolvedTo:n,confidence:1}:this.createFailureResult(`Antecedent citation outside scope boundary`)}resolveSupra(e){let t=this.context.citationIndex,n=this.normalizePartyName(e.partyName),r;for(let[e,i]of this.context.fullCitationHistory){if(!this.isWithinScope(i,t))continue;let a=k(n,e);(!r||a>r.similarity)&&(r={index:i,similarity:a})}if(!r)return this.createFailureResult(`No full citation found in scope`);if(r.similarity<this.options.partyMatchThreshold)return this.createFailureResult(`Party name similarity ${r.similarity.toFixed(2)} below threshold ${this.options.partyMatchThreshold}`);let i=[];return r.similarity<1&&i.push(`Fuzzy match: similarity ${r.similarity.toFixed(2)}`),{resolvedTo:r.index,confidence:r.similarity,warnings:i.length>0?i:void 0}}resolveShortFormCase(e){let t=this.context.citationIndex;for(let n=t-1;n>=0;n--){let r=this.citations[n];if(r.type===`case`&&r.volume===e.volume&&this.normalizeReporter(r.reporter)===this.normalizeReporter(e.reporter))return this.isWithinScope(n,t)?{resolvedTo:n,confidence:.95}:this.createFailureResult(`Matching citation outside scope boundary`)}return this.createFailureResult(`No matching full case citation found`)}trackFullCitation(e,t){if(e.type===`case`){let n=this.extractPartyName(e);if(n){let e=this.normalizePartyName(n);this.context.fullCitationHistory.set(e,t)}}}extractPartyName(e){let t=e.span.originalStart,n=Math.max(0,t-100),r=this.text.substring(n,t),i=r.match(/([A-Z][a-zA-Z]*(?:\s+[A-Z][a-zA-Z]*)*)\s+v\.?\s+[A-Z][a-zA-Z]*(?:\s+[A-Z][a-zA-Z]*)*,\s*$/);return i?i[1].trim():r.match(/([A-Z][a-zA-Z]*(?:\s+[A-Z][a-zA-Z]*)*),\s*$/)?.[1].trim()}normalizePartyName(e){return e.toLowerCase().replace(/\s+/g,` `).trim()}normalizeReporter(e){return e.toLowerCase().replace(/\s+/g,``).replace(/\./g,``)}isWithinScope(e,t){return D(e,t,this.context.paragraphMap,this.options.scopeStrategy)}createFailureResult(e){if(this.options.reportUnresolved)return{resolvedTo:void 0,failureReason:e,confidence:0}}};function j(e,t,n){return new A(e,t,n).resolve()}function M(e,t){let n=performance.now(),{cleaned:r,transformationMap:i,warnings:a}=l(e,t?.cleaners),o=g(r,t?.patterns||[...m,...h,...d,...f,...p]),s=[],c=new Set;for(let e of o){let t=`${e.span.cleanStart}-${e.span.cleanEnd}`;c.has(t)||(c.add(t),s.push(e))}let u=[];for(let e of s){let t;switch(e.type){case`case`:t=e.patternId===`id`||e.patternId===`ibid`?C(e,i):e.patternId===`supra`?w(e,i):e.patternId===`shortFormCase`?T(e,i):_(e,i);break;case`statute`:t=v(e,i);break;case`journal`:t=y(e,i);break;case`neutral`:t=b(e,i);break;case`publicLaw`:t=x(e,i);break;case`federalRegister`:t=S(e,i);break;default:continue}a.length>0&&(t.warnings=[...t.warnings||[],...a]),t.processTimeMs=performance.now()-n,u.push(t)}return t?.resolve?j(u,e,t.resolutionOptions):u}async function N(e,t){return M(e,t)}export{A as DocumentResolver,i as assertUnreachable,l as cleanText,_ as extractCase,M as extractCitations,N as extractCitationsAsync,S as extractFederalRegister,y as extractJournal,b as extractNeutral,x as extractPublicLaw,v as extractStatute,n as isCaseCitation,r as isCitationType,e as isFullCitation,t as isShortFormCitation,j as resolveCitations,g as tokenize};
2
2
  //# sourceMappingURL=index.mjs.map