eyecite-ts 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/README.md +80 -10
  2. package/dist/annotate/index.cjs.map +1 -1
  3. package/dist/annotate/index.d.cts +1 -1
  4. package/dist/annotate/index.d.mts +1 -1
  5. package/dist/annotate/index.mjs.map +1 -1
  6. package/dist/{citation-4bmWbhSK.d.cts → citation-BwXdJTA9.d.mts} +190 -12
  7. package/dist/citation-BwXdJTA9.d.mts.map +1 -0
  8. package/dist/{citation-BVN0o8TJ.d.mts → citation-By8QXtGC.d.cts} +190 -12
  9. package/dist/citation-By8QXtGC.d.cts.map +1 -0
  10. package/dist/data/index.cjs +1 -1
  11. package/dist/data/index.cjs.map +1 -1
  12. package/dist/data/index.d.cts +91 -1
  13. package/dist/data/index.d.cts.map +1 -1
  14. package/dist/data/index.d.mts +91 -1
  15. package/dist/data/index.d.mts.map +1 -1
  16. package/dist/data/index.mjs +1 -1
  17. package/dist/data/index.mjs.map +1 -1
  18. package/dist/index.cjs +1 -1
  19. package/dist/index.cjs.map +1 -1
  20. package/dist/index.d.cts +94 -144
  21. package/dist/index.d.cts.map +1 -1
  22. package/dist/index.d.mts +94 -144
  23. package/dist/index.d.mts.map +1 -1
  24. package/dist/index.mjs +1 -1
  25. package/dist/index.mjs.map +1 -1
  26. package/dist/knownCodes-CI-vnoBO.cjs +2 -0
  27. package/dist/knownCodes-CI-vnoBO.cjs.map +1 -0
  28. package/dist/knownCodes-MkDSiR1j.mjs +2 -0
  29. package/dist/knownCodes-MkDSiR1j.mjs.map +1 -0
  30. package/dist/{reporters-DYNnh4O0.mjs → reporters-CZoC98-L.mjs} +1 -1
  31. package/dist/reporters-CZoC98-L.mjs.map +1 -0
  32. package/dist/reporters-Wob0oyD9.cjs +2 -0
  33. package/dist/reporters-Wob0oyD9.cjs.map +1 -0
  34. package/dist/types-BfYnmSHC.d.mts +115 -0
  35. package/dist/types-BfYnmSHC.d.mts.map +1 -0
  36. package/dist/types-C_5aOCZ1.d.cts +115 -0
  37. package/dist/types-C_5aOCZ1.d.cts.map +1 -0
  38. package/dist/utils/index.cjs +7 -0
  39. package/dist/utils/index.cjs.map +1 -0
  40. package/dist/utils/index.d.cts +122 -0
  41. package/dist/utils/index.d.cts.map +1 -0
  42. package/dist/utils/index.d.mts +122 -0
  43. package/dist/utils/index.d.mts.map +1 -0
  44. package/dist/utils/index.mjs +7 -0
  45. package/dist/utils/index.mjs.map +1 -0
  46. package/package.json +10 -1
  47. package/dist/citation-4bmWbhSK.d.cts.map +0 -1
  48. package/dist/citation-BVN0o8TJ.d.mts.map +0 -1
  49. package/dist/reporters-BclWimmk.cjs +0 -2
  50. package/dist/reporters-BclWimmk.cjs.map +0 -1
  51. package/dist/reporters-DYNnh4O0.mjs.map +0 -1
package/dist/index.d.cts CHANGED
@@ -1,8 +1,9 @@
1
- import { S as TransformationMap, _ as StatuteCitation, a as ExtractorMap, b as Warning, c as FullCitation, d as JournalCitation, f as NeutralCitation, g as ShortFormCitationType, h as ShortFormCitation, i as CitationType, l as FullCitationType, m as ShortFormCaseCitation, n as CitationBase, o as FederalRegisterCitation, p as PublicLawCitation, r as CitationOfType, s as FullCaseCitation, t as Citation, u as IdCitation, v as StatutesAtLargeCitation, x as Span, y as SupraCitation } from "./citation-4bmWbhSK.cjs";
1
+ import { A as PinciteInfo, C as StatuteCitation, D as Warning, E as SupraCitation, O as Span, S as ShortFormCitationType, T as SubsequentHistoryEntry, _ as Parenthetical, a as CitationType, b as ShortFormCaseCitation, c as ExtractorMap, d as FullCitation, f as FullCitationType, g as NeutralCitation, h as JournalCitation, i as CitationSignal, j as parsePincite, k as TransformationMap, l as FederalRegisterCitation, m as IdCitation, n as CitationBase, o as ConstitutionalCitation, p as HistorySignal, r as CitationOfType, s as CourtInference, t as Citation, u as FullCaseCitation, v as ParentheticalType, w as StatutesAtLargeCitation, x as ShortFormCitation, y as PublicLawCitation } from "./citation-By8QXtGC.cjs";
2
+ import { a as FootnoteMap, i as ScopeStrategy, n as ResolutionResult, o as FootnoteZone, r as ResolvedCitation, t as ResolutionOptions } from "./types-C_5aOCZ1.cjs";
2
3
 
3
4
  //#region src/types/guards.d.ts
4
5
  /**
5
- * Type guard: narrows Citation to a full citation (case, statute, journal, neutral, publicLaw, federalRegister).
6
+ * Type guard: narrows Citation to a full citation (case, statute, journal, neutral, publicLaw, federalRegister, statutesAtLarge, constitutional).
6
7
  */
7
8
  declare function isFullCitation(citation: Citation): citation is FullCitation;
8
9
  /**
@@ -44,93 +45,6 @@ interface Pattern {
44
45
  type: FullCitationType;
45
46
  }
46
47
  //#endregion
47
- //#region src/resolve/types.d.ts
48
- /**
49
- * Scope boundary strategy for resolution.
50
- * Determines how far back to search for antecedent citations.
51
- */
52
- type ScopeStrategy = "paragraph" | "section" | "footnote" | "none";
53
- /**
54
- * Options for citation resolution.
55
- */
56
- interface ResolutionOptions {
57
- /**
58
- * Scope boundary strategy (default: 'paragraph')
59
- * - paragraph: Only resolve within same paragraph
60
- * - section: Only resolve within same section
61
- * - footnote: Only resolve within same footnote
62
- * - none: Resolve across entire document
63
- */
64
- scopeStrategy?: ScopeStrategy;
65
- /**
66
- * Auto-detect paragraph boundaries from text (default: true)
67
- * Uses paragraphBoundaryPattern to split text
68
- */
69
- autoDetectParagraphs?: boolean;
70
- /**
71
- * Regex pattern to detect paragraph boundaries (default: /\n\n+/)
72
- * Only used if autoDetectParagraphs is true
73
- */
74
- paragraphBoundaryPattern?: RegExp;
75
- /**
76
- * Enable fuzzy party name matching for supra resolution (default: true)
77
- * Uses Levenshtein distance to handle typos and variations
78
- */
79
- fuzzyPartyMatching?: boolean;
80
- /**
81
- * Similarity threshold for fuzzy party matching (default: 0.8)
82
- * Range: 0-1 where 1.0 is exact match
83
- * Only used if fuzzyPartyMatching is true
84
- */
85
- partyMatchThreshold?: number;
86
- /**
87
- * Allow Id. citations to resolve to other short-form citations (default: false)
88
- * If true: "Smith v. Jones, 500 F.2d 100" -> "Id." -> "Id. at 105"
89
- * If false: Second Id. fails to resolve (no full citation between them)
90
- */
91
- allowNestedResolution?: boolean;
92
- /**
93
- * Report unresolved citations with failure reasons (default: true)
94
- * If false: resolution field will be undefined for unresolved citations
95
- */
96
- reportUnresolved?: boolean;
97
- }
98
- /**
99
- * Result of resolving a short-form citation.
100
- */
101
- interface ResolutionResult {
102
- /**
103
- * Index of the citation this resolves to.
104
- * undefined if resolution failed
105
- */
106
- resolvedTo?: number;
107
- /**
108
- * Reason for resolution failure (if any)
109
- */
110
- failureReason?: string;
111
- /**
112
- * Warnings about ambiguous or uncertain resolutions
113
- */
114
- warnings?: string[];
115
- /**
116
- * Confidence in the resolution (0-1)
117
- * Factors: party name similarity, scope boundary, citation type match
118
- */
119
- confidence: number;
120
- }
121
- /**
122
- * Citation with resolution metadata.
123
- *
124
- * Uses a distributive conditional type so that `resolution` is only
125
- * meaningfully present on short-form citations (Id., supra, short-form case).
126
- * On full citations, `resolution` is typed as `undefined`.
127
- */
128
- type ResolvedCitation<C extends Citation = Citation> = C extends ShortFormCitation ? C & {
129
- resolution: ResolutionResult | undefined;
130
- } : C & {
131
- resolution?: undefined;
132
- };
133
- //#endregion
134
48
  //#region src/extract/extractCitations.d.ts
135
49
  /**
136
50
  * Options for customizing citation extraction behavior.
@@ -195,6 +109,25 @@ interface ExtractOptions {
195
109
  * ```
196
110
  */
197
111
  resolutionOptions?: ResolutionOptions;
112
+ /**
113
+ * Remove citations flagged as likely false positives (default: false).
114
+ *
115
+ * When false (default), flagged citations get reduced confidence (0.1) and a warning.
116
+ * When true, flagged citations are removed from results entirely.
117
+ *
118
+ * False positive detection uses:
119
+ * - A static blocklist of known non-US reporter abbreviations (international, UK, European)
120
+ * - A year plausibility heuristic (years before 1750 predate US legal reporting)
121
+ *
122
+ * @example
123
+ * ```typescript
124
+ * // Remove false positives from results
125
+ * const citations = extractCitations(text, { filterFalsePositives: true })
126
+ * ```
127
+ */
128
+ filterFalsePositives?: boolean;
129
+ /** Detect footnote zones and annotate citations with inFootnote/footnoteNumber (default: false) */
130
+ detectFootnotes?: boolean;
198
131
  }
199
132
  /**
200
133
  * Extracts legal citations from text using the full parsing pipeline.
@@ -285,6 +218,16 @@ declare function extractCitationsAsync(text: string, options: ExtractOptions & {
285
218
  }): Promise<ResolvedCitation[]>;
286
219
  declare function extractCitationsAsync(text: string, options?: ExtractOptions): Promise<Citation[]>;
287
220
  //#endregion
221
+ //#region src/extract/filterFalsePositives.d.ts
222
+ /**
223
+ * Apply false positive filters to extracted citations.
224
+ *
225
+ * @param citations - Extracted citations (may be mutated in penalize mode)
226
+ * @param remove - If true, remove flagged citations. If false, penalize confidence + add warning.
227
+ * @returns Filtered array (same reference if remove=false, new array if remove=true and items removed)
228
+ */
229
+ declare function applyFalsePositiveFilters(citations: Citation[], remove: boolean): Citation[];
230
+ //#endregion
288
231
  //#region src/clean/cleanText.d.ts
289
232
  /**
290
233
  * Result of text cleaning operation.
@@ -305,7 +248,7 @@ interface CleanTextResult {
305
248
  * cleaned text while reporting positions in the original text.
306
249
  *
307
250
  * @param original - Original input text
308
- * @param cleaners - Array of cleaner functions to apply (default: stripHtmlTags, normalizeWhitespace, normalizeUnicode, fixSmartQuotes)
251
+ * @param cleaners - Array of cleaner functions to apply (default: stripHtmlTags, decodeHtmlEntities, normalizeWhitespace, normalizeUnicode, normalizeDashes, fixSmartQuotes, normalizeTypography, normalizeReporterSpacing)
309
252
  * @returns Cleaned text with position mappings and warnings
310
253
  *
311
254
  * @example
@@ -425,44 +368,51 @@ declare function tokenize(cleanedText: string, patterns?: Pattern[]): Token[];
425
368
  */
426
369
  declare function extractCase(token: Token, transformationMap: TransformationMap, cleanedText?: string): FullCaseCitation;
427
370
  //#endregion
428
- //#region src/extract/extractStatute.d.ts
371
+ //#region src/extract/extractConstitutional.d.ts
429
372
  /**
430
- * Extracts statute citation metadata from a tokenized citation.
373
+ * Extract a constitutional citation from a tokenized match.
374
+ *
375
+ * @param token - Tokenized citation candidate from the tokenizer
376
+ * @param transformationMap - Maps cleaned text positions to original text positions
377
+ * @returns Parsed constitutional citation with structured fields
378
+ */
379
+ declare function extractConstitutional(token: Token, transformationMap: TransformationMap): ConstitutionalCitation;
380
+ //#endregion
381
+ //#region src/extract/extractFederalRegister.d.ts
382
+ /**
383
+ * Extracts Federal Register citation metadata from a tokenized citation.
431
384
  *
432
385
  * Parses token text to extract:
433
- * - Title: Optional leading digits (e.g., "42" from "42 U.S.C. § 1983")
434
- * - Code: Statutory code abbreviation (e.g., "U.S.C.", "Cal. Civ. Code")
435
- * - Section: Section number after § symbol (e.g., "1983")
436
- * - Subsections: Optional parenthetical subdivisions (e.g., "(a)(1)")
386
+ * - Volume: Federal Register volume number (e.g., "85")
387
+ * - Page: Page number (e.g., "12345")
388
+ * - Year: Optional publication year in parentheses (e.g., "(2021)")
437
389
  *
438
390
  * Confidence scoring:
439
- * - Base: 0.5
440
- * - Known code pattern (U.S.C., C.F.R., state codes): +0.3
441
- * - Capped at 1.0
391
+ * - 0.9 (Federal Register format is standardized)
442
392
  *
443
393
  * @param token - Token from tokenizer containing matched text and clean positions
444
394
  * @param transformationMap - Position mapping from clean → original text
445
- * @returns StatuteCitation with parsed metadata and translated positions
395
+ * @returns FederalRegisterCitation with parsed metadata and translated positions
446
396
  *
447
397
  * @example
448
398
  * ```typescript
449
399
  * const token = {
450
- * text: "42 U.S.C. § 1983",
451
- * span: { cleanStart: 10, cleanEnd: 26 },
452
- * type: "statute",
453
- * patternId: "usc"
400
+ * text: "85 Fed. Reg. 12345",
401
+ * span: { cleanStart: 10, cleanEnd: 28 },
402
+ * type: "federalRegister",
403
+ * patternId: "federal-register"
454
404
  * }
455
- * const citation = extractStatute(token, transformationMap)
405
+ * const citation = extractFederalRegister(token, transformationMap)
456
406
  * // citation = {
457
- * // type: "statute",
458
- * // title: 42,
459
- * // code: "U.S.C.",
460
- * // section: "1983",
407
+ * // type: "federalRegister",
408
+ * // volume: 85,
409
+ * // page: 12345,
410
+ * // confidence: 0.9,
461
411
  * // ...
462
412
  * // }
463
413
  * ```
464
414
  */
465
- declare function extractStatute(token: Token, transformationMap: TransformationMap): StatuteCitation;
415
+ declare function extractFederalRegister(token: Token, transformationMap: TransformationMap): FederalRegisterCitation;
466
416
  //#endregion
467
417
  //#region src/extract/extractJournal.d.ts
468
418
  /**
@@ -580,45 +530,31 @@ declare function extractNeutral(token: Token, transformationMap: TransformationM
580
530
  */
581
531
  declare function extractPublicLaw(token: Token, transformationMap: TransformationMap): PublicLawCitation;
582
532
  //#endregion
583
- //#region src/extract/extractFederalRegister.d.ts
533
+ //#region src/extract/extractStatute.d.ts
584
534
  /**
585
- * Extracts Federal Register citation metadata from a tokenized citation.
586
- *
587
- * Parses token text to extract:
588
- * - Volume: Federal Register volume number (e.g., "85")
589
- * - Page: Page number (e.g., "12345")
590
- * - Year: Optional publication year in parentheses (e.g., "(2021)")
591
- *
592
- * Confidence scoring:
593
- * - 0.9 (Federal Register format is standardized)
594
- *
595
- * @param token - Token from tokenizer containing matched text and clean positions
596
- * @param transformationMap - Position mapping from clean → original text
597
- * @returns FederalRegisterCitation with parsed metadata and translated positions
598
- *
599
- * @example
600
- * ```typescript
601
- * const token = {
602
- * text: "85 Fed. Reg. 12345",
603
- * span: { cleanStart: 10, cleanEnd: 28 },
604
- * type: "federalRegister",
605
- * patternId: "federal-register"
606
- * }
607
- * const citation = extractFederalRegister(token, transformationMap)
608
- * // citation = {
609
- * // type: "federalRegister",
610
- * // volume: 85,
611
- * // page: 12345,
612
- * // confidence: 0.9,
613
- * // ...
614
- * // }
615
- * ```
535
+ * Extracts statute citation metadata from a tokenized citation.
536
+ * Dispatches to family-specific extractors based on patternId.
616
537
  */
617
- declare function extractFederalRegister(token: Token, transformationMap: TransformationMap): FederalRegisterCitation;
538
+ declare function extractStatute(token: Token, transformationMap: TransformationMap): StatuteCitation;
618
539
  //#endregion
619
540
  //#region src/extract/extractStatutesAtLarge.d.ts
620
541
  declare function extractStatutesAtLarge(token: Token, transformationMap: TransformationMap): StatutesAtLargeCitation;
621
542
  //#endregion
543
+ //#region src/extract/courtNormalization.d.ts
544
+ /**
545
+ * Normalize a court string extracted from a citation parenthetical.
546
+ *
547
+ * - Collapses spaces after periods: "S.D. N.Y." → "S.D.N.Y."
548
+ * - Ensures trailing period on abbreviated forms: "2d Cir" → "2d Cir."
549
+ * - Returns undefined for empty/undefined input
550
+ *
551
+ * @example
552
+ * normalizeCourt("S.D. N.Y.") // "S.D.N.Y."
553
+ * normalizeCourt("2d Cir") // "2d Cir."
554
+ * normalizeCourt("U.S.") // "U.S."
555
+ */
556
+ declare function normalizeCourt(court: string | undefined): string | undefined;
557
+ //#endregion
622
558
  //#region src/resolve/DocumentResolver.d.ts
623
559
  /**
624
560
  * Document-scoped resolver that processes citations sequentially
@@ -703,5 +639,19 @@ declare class DocumentResolver {
703
639
  */
704
640
  declare function resolveCitations(citations: Citation[], text: string, options?: ResolutionOptions): ResolvedCitation[];
705
641
  //#endregion
706
- export { type Citation, type CitationBase, type CitationOfType, type CitationType, type CleanTextResult, DocumentResolver, type ExtractOptions, type ExtractorMap, type FederalRegisterCitation, type FullCaseCitation, type FullCitation, type FullCitationType, type IdCitation, type JournalCitation, type NeutralCitation, type PublicLawCitation, type ResolutionOptions, type ResolutionResult, type ResolvedCitation, type ScopeStrategy, type ShortFormCaseCitation, type ShortFormCitation, type ShortFormCitationType, type Span, type StatuteCitation, type StatutesAtLargeCitation, type SupraCitation, type Token, type TransformationMap, type Warning, assertUnreachable, cleanText, extractCase, extractCitations, extractCitationsAsync, extractFederalRegister, extractJournal, extractNeutral, extractPublicLaw, extractStatute, extractStatutesAtLarge, isCaseCitation, isCitationType, isFullCitation, isShortFormCitation, resolveCitations, tokenize };
642
+ //#region src/footnotes/detectFootnotes.d.ts
643
+ /**
644
+ * Detect footnote zones in text (HTML or plain text).
645
+ *
646
+ * Strategy: if the input contains HTML tags, try HTML structural detection
647
+ * first. If that yields no results (HTML without footnote elements), fall
648
+ * back to plain-text heuristic detection. For non-HTML input, use plain-text
649
+ * detection directly.
650
+ *
651
+ * @param text - Raw input text (HTML or plain text)
652
+ * @returns FootnoteMap with zones in input-text coordinates, sorted by start
653
+ */
654
+ declare function detectFootnotes(text: string): FootnoteMap;
655
+ //#endregion
656
+ export { type Citation, type CitationBase, type CitationOfType, type CitationSignal, type CitationType, type CleanTextResult, type ConstitutionalCitation, type CourtInference, DocumentResolver, type ExtractOptions, type ExtractorMap, type FederalRegisterCitation, type FootnoteMap, type FootnoteZone, type FullCaseCitation, type FullCitation, type FullCitationType, type HistorySignal, type IdCitation, type JournalCitation, type NeutralCitation, type Parenthetical, type ParentheticalType, type PinciteInfo, type PublicLawCitation, type ResolutionOptions, type ResolutionResult, type ResolvedCitation, type ScopeStrategy, type ShortFormCaseCitation, type ShortFormCitation, type ShortFormCitationType, type Span, type StatuteCitation, type StatutesAtLargeCitation, type SubsequentHistoryEntry, type SupraCitation, type Token, type TransformationMap, type Warning, applyFalsePositiveFilters, assertUnreachable, cleanText, detectFootnotes, extractCase, extractCitations, extractCitationsAsync, extractConstitutional, extractFederalRegister, extractJournal, extractNeutral, extractPublicLaw, extractStatute, extractStatutesAtLarge, isCaseCitation, isCitationType, isFullCitation, isShortFormCitation, normalizeCourt, parsePincite, resolveCitations, tokenize };
707
657
  //# sourceMappingURL=index.d.cts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.cts","names":[],"sources":["../src/types/guards.ts","../src/patterns/casePatterns.ts","../src/resolve/types.ts","../src/extract/extractCitations.ts","../src/clean/cleanText.ts","../src/tokenize/tokenizer.ts","../src/extract/extractCase.ts","../src/extract/extractStatute.ts","../src/extract/extractJournal.ts","../src/extract/extractNeutral.ts","../src/extract/extractPublicLaw.ts","../src/extract/extractFederalRegister.ts","../src/extract/extractStatutesAtLarge.ts","../src/resolve/DocumentResolver.ts","../src/resolve/index.ts"],"mappings":";;;;;AAKA;iBAAgB,cAAA,CAAe,QAAA,EAAU,QAAA,GAAW,QAAA,IAAY,YAAA;;;;iBAahD,mBAAA,CAAoB,QAAA,EAAU,QAAA,GAAW,QAAA,IAAY,iBAAA;;;;iBASrD,cAAA,CAAe,QAAA,EAAU,QAAA,GAAW,QAAA,IAAY,gBAAA;AAThE;;;;AAAA,iBAiBgB,cAAA,WAAyB,YAAA,CAAA,CACvC,QAAA,EAAU,QAAA,EACV,IAAA,EAAM,CAAA,GACL,QAAA,IAAY,cAAA,CAAe,CAAA;;;;;;AAX9B;;;;;;;;;;AAQA;iBAuBgB,iBAAA,CAAkB,CAAA;;;UC1CjB,OAAA;EACf,EAAA;EACA,KAAA,EAAO,MAAA;EACP,WAAA;EACA,IAAA,EAAM,gBAAA;AAAA;;;;;;;KCPI,aAAA;;AFKZ;;UEAiB,iBAAA;EFAoD;;;;;;;EEQnE,aAAA,GAAgB,aAAA;EFCF;;;;EEKd,oBAAA;;;;;EAMA,wBAAA,GAA2B,MAAA;EFHb;;;;EESd,kBAAA;;;;;;EAOA,mBAAA;;;;;;EAOA,qBAAA;;;;AFAF;EEME,gBAAA;AAAA;;;;UAMe,gBAAA;;ADtDjB;;;EC2DE,UAAA;;;;EAKA,aAAA;;;;EAKA,QAAA;;;;AAxEF;EA8EE,UAAA;AAAA;;;AAzEF;;;;;KAmFY,gBAAA,WAA2B,QAAA,GAAW,QAAA,IAChD,CAAA,SAAU,iBAAA,GACN,CAAA;EAAM,UAAA,EAAY,gBAAA;AAAA,IAClB,CAAA;EAAM,UAAA;AAAA;;;AFnGZ;;;AAAA,UGoCiB,cAAA;;;;;;;AHvBjB;;;;;;;;EGsCC,QAAA,GAAW,KAAA,EAAO,IAAA;EHtCkD;AASrE;;;;;;;;;;AAQA;;;EGqCC,QAAA,GAAW,OAAA;;;;;;;;;;;;;;EAeX,OAAA;;;;AH7BD;;;;;;;;AC1CA;;;EEuFC,iBAAA,GAAoB,iBAAA;AAAA;;;;;;;;;;;AD1FrB;;;;;AAKA;;;;;;;;;;;;;;;AAoDA;;;;;;;;;;AA+BA;;;;;;;;;;;;;;;;;;iBC+DgB,gBAAA,CAAiB,IAAA,UAAc,OAAA,EAAS,cAAA;EAAmB,OAAA;AAAA,IAAkB,gBAAA;AAAA,iBAC7E,gBAAA,CAAiB,IAAA,UAAc,OAAA,GAAU,cAAA,GAAiB,QAAA;;;;;;AA5H1E;;;;;;;;;;;;;;;;;iBAgTsB,qBAAA,CAAsB,IAAA,UAAc,OAAA,EAAS,cAAA;EAAmB,OAAA;AAAA,IAAkB,OAAA,CAAQ,gBAAA;AAAA,iBAC1F,qBAAA,CAAsB,IAAA,UAAc,OAAA,GAAU,cAAA,GAAiB,OAAA,CAAQ,QAAA;;;;AHrV7F;;UIOiB,eAAA;EJP+C;EIS/D,OAAA;;EAGA,iBAAA,EAAmB,iBAAA;;EAGnB,QAAA,EAAU,OAAA;AAAA;AJFX;;;;;;;;;;AASA;;;;;;AATA,iBIqBgB,SAAA,CACf,QAAA,UACA,QAAA,GAAU,KAAA,EAAO,IAAA,uBAMf,eAAA;;;;;;;;;AJpBH;UKMiB,KAAA;;EAEf,IAAA;;EAGA,IAAA,EAAM,IAAA,CAAK,IAAA;;EAGX,IAAA,EAAM,OAAA;;EAGN,SAAA;AAAA;;;;;;;;;;;;;;;;;;;;;ALcF;;;;;;;;AC1CA;;;;;;;;iBImEgB,QAAA,CACd,WAAA,UACA,QAAA,GAAU,OAAA,KAOT,KAAA;;;;;;;;ALjEH;;;;;;;;;;AAQA;;;;;;;;;;;;;;;;;;;;;AAuBA;;;;;;;;AC1CA;;;;;;;iBK6cgB,WAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,EACnB,WAAA,YACE,gBAAA;;;;;AN/cH;;;;;;;;;;AASA;;;;;;;;;;AAQA;;;;;;;;;;;;;;iBOcgB,cAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,eAAA;;;;;APlCH;;;;;;;;;;AASA;;;;;;;;;;AAQA;;;;;;;;;;;;;;;;iBQgBgB,cAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,eAAA;;;;;ARpCH;;;;;;;;;;AASA;;;;;;;;;;AAQA;;;;;;;;;;;;iBSYgB,cAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,eAAA;;;;;AThCH;;;;;;;;;;AASA;;;;;;;;;;AAQA;;;;;;;;;;;;;iBUagB,gBAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,iBAAA;;;;;AVjCH;;;;;;;;;;AASA;;;;;;;;;;AAQA;;;;;;;;;;;iBWWgB,sBAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,uBAAA;;;iBCjCa,sBAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,uBAAA;;;AZDH;;;;AAAA,caea,gBAAA;EAAA,iBACM,SAAA;EAAA,iBACA,IAAA;EAAA,iBACA,OAAA;EAAA,iBACA,OAAA;EbnBkD;AASrE;;;;;;EamBE,WAAA,CACE,SAAA,EAAW,QAAA,IACX,IAAA,UACA,OAAA,GAAS,iBAAA;;;;Abdb;;EasDE,OAAA,CAAA,GAAW,gBAAA;;;;UA2CH,SAAA;;;;UAgCA,YAAA;;;;UAiDA,oBAAA;;;;;;UAsCA,iBAAA;EbjMV;;;;EAAA,Qa4NU,gBAAA;;;;AZtQV;;UYoSU,gBAAA;EZhSF;;;EAAA,QYySE,kBAAA;;;;UAUA,iBAAA;EZnTF;;;EAAA,QY6TE,aAAA;EXpUV;;;EAAA,QWgVU,mBAAA;AAAA;;;;;;AblUV;;;;;;;iBcMgB,gBAAA,CACd,SAAA,EAAW,QAAA,IACX,IAAA,UACA,OAAA,GAAU,iBAAA,GACT,gBAAA"}
1
+ {"version":3,"file":"index.d.cts","names":[],"sources":["../src/types/guards.ts","../src/patterns/casePatterns.ts","../src/extract/extractCitations.ts","../src/extract/filterFalsePositives.ts","../src/clean/cleanText.ts","../src/tokenize/tokenizer.ts","../src/extract/extractCase.ts","../src/extract/extractConstitutional.ts","../src/extract/extractFederalRegister.ts","../src/extract/extractJournal.ts","../src/extract/extractNeutral.ts","../src/extract/extractPublicLaw.ts","../src/extract/extractStatute.ts","../src/extract/extractStatutesAtLarge.ts","../src/extract/courtNormalization.ts","../src/resolve/DocumentResolver.ts","../src/resolve/index.ts","../src/footnotes/detectFootnotes.ts"],"mappings":";;;;;;;iBAYgB,cAAA,CAAe,QAAA,EAAU,QAAA,GAAW,QAAA,IAAY,YAAA;;;;iBAgBhD,mBAAA,CAAoB,QAAA,EAAU,QAAA,GAAW,QAAA,IAAY,iBAAA;;;;iBAOrD,cAAA,CAAe,QAAA,EAAU,QAAA,GAAW,QAAA,IAAY,gBAAA;;AAPhE;;;iBAegB,cAAA,WAAyB,YAAA,CAAA,CACvC,QAAA,EAAU,QAAA,EACV,IAAA,EAAM,CAAA,GACL,QAAA,IAAY,cAAA,CAAe,CAAA;;;;;;;AAX9B;;;;;;;;;;iBA+BgB,iBAAA,CAAkB,CAAA;;;UClDjB,OAAA;EACf,EAAA;EACA,KAAA,EAAO,MAAA;EACP,WAAA;EACA,IAAA,EAAM,gBAAA;AAAA;;;;ADRR;;UE2CiB,cAAA;EF3C+C;;;;;;;AAgBhE;;;;;;;EE0CE,QAAA,GAAW,KAAA,EAAO,IAAA;;;AFnCpB;;;;;;;;;;AAQA;;EE2CE,QAAA,GAAW,OAAA;;;;;;;;;;;;;;EAeX,OAAA;;;;;AFnCF;;;;;;;;AClDA;;ECqGE,iBAAA,GAAoB,iBAAA;EDjGd;;;;;;;;;;;;ACmCR;;;;EAgFE,oBAAA;;EAGA,eAAA;AAAA;;;;;;;;;;;;;AA6DF;;;;;;;;;;;AAIA;;;;;;;;;;AA8OA;;;;;;;;;;;;;;;;AAIA;;;;;;;;;iBAtPgB,gBAAA,CACd,IAAA,UACA,OAAA,EAAS,cAAA;EAAmB,OAAA;AAAA,IAC3B,gBAAA;AAAA,iBACa,gBAAA,CAAiB,IAAA,UAAc,OAAA,GAAU,cAAA,GAAiB,QAAA;;;;;;ACjE1E;;;;;;;;;;;;;AC1HA;;;;iBFyasB,qBAAA,CACpB,IAAA,UACA,OAAA,EAAS,cAAA;EAAmB,OAAA;AAAA,IAC3B,OAAA,CAAQ,gBAAA;AAAA,iBACW,qBAAA,CACpB,IAAA,UACA,OAAA,GAAU,cAAA,GACT,OAAA,CAAQ,QAAA;;;;AFpaX;;;;;;iBG8GgB,yBAAA,CAA0B,SAAA,EAAW,QAAA,IAAY,MAAA,YAAkB,QAAA;;;;;AH9HnF;UIIiB,eAAA;;EAEf,OAAA;;EAGA,iBAAA,EAAmB,iBAAA;;EAGnB,QAAA,EAAU,OAAA;AAAA;;AJIZ;;;;;;;;;;AAOA;;;;;iBIQgB,SAAA,CACd,QAAA,UACA,QAAA,GAAU,KAAA,EAAO,IAAA,uBAUhB,eAAA;;;;;;;;;;UC3Bc,KAAA;ELOD;EKLd,IAAA;ELK8D;EKF9D,IAAA,EAAM,IAAA,CAAK,IAAA;;EAGX,IAAA,EAAM,OAAA;;EAGN,SAAA;AAAA;ALIF;;;;;;;;;;;;;;;;;;;;;AAuBA;;;;;;;;AClDA;;;;;;;AD2BA,iBKmCgB,QAAA,CACd,WAAA,UACA,QAAA,GAAU,OAAA,KAOT,KAAA;;;;;;;;;ALpDH;;;;;;;;;;AAQA;;;;;;;;;;;;;;;;;;;;;AAuBA;;;;;;;;AClDA;;;;;;iBKsqBgB,WAAA,CACd,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,EACnB,WAAA,YACC,gBAAA;;;;AN9qBH;;;;;;iBOuIgB,qBAAA,CACd,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GAClB,sBAAA;;;;;;AP1HH;;;;;;;;;;AAOA;;;;;;;;;;AAQA;;;;;;;;;;iBQGgB,sBAAA,CACd,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GAClB,uBAAA;;;;;;ARrBH;;;;;;;;;;AAOA;;;;;;;;;;AAQA;;;;;;;;;;;;;;;iBSQgB,cAAA,CACd,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GAClB,eAAA;;;;;;AT1BH;;;;;;;;;;AAOA;;;;;;;;;;AAQA;;;;;;;;;;;iBUIgB,cAAA,CACd,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GAClB,eAAA;;;;;;AVtBH;;;;;;;;;;AAOA;;;;;;;;;;AAQA;;;;;;;;;;;;iBWKgB,gBAAA,CACd,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GAClB,iBAAA;;;;;;;iBC+Ca,cAAA,CACd,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GAClB,eAAA;;;iBCrFa,sBAAA,CACd,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GAClB,uBAAA;;;;;;;AbPH;;;;;;;;iBcAgB,cAAA,CAAe,KAAA;;;;AdgB/B;;;ceKa,gBAAA;EAAA,iBACM,SAAA;EAAA,iBACA,IAAA;EAAA,iBACA,OAAA;EAAA,iBAGA,OAAA;;;AfJnB;;;;;EeaE,WAAA,CAAY,SAAA,EAAW,QAAA,IAAY,IAAA,UAAc,OAAA,GAAS,iBAAA;;;;;AfL5D;EekDE,OAAA,CAAA,GAAW,gBAAA;;;;UA2CH,SAAA;;;;UAgCA,YAAA;;;;UAiDA,oBAAA;;;;;;UAsCA,iBAAA;EfjNoB;AAoB9B;;;EApB8B,Qe4OpB,gBAAA;EfxNwB;;;;AClDlC;EDkDkC,QewPxB,gBAAA;;;;UAWA,kBAAA;;;;UAUA,iBAAA;;;;UAUA,aAAA;;AblSV;;UamTU,mBAAA;AAAA;;;;;;;AfvUV;;;;;;iBgBFgB,gBAAA,CACd,SAAA,EAAW,QAAA,IACX,IAAA,UACA,OAAA,GAAU,iBAAA,GACT,gBAAA;;;;;;AhBzBH;;;;;;;;iBiBKgB,eAAA,CAAgB,IAAA,WAAe,WAAA"}
package/dist/index.d.mts CHANGED
@@ -1,8 +1,9 @@
1
- import { S as TransformationMap, _ as StatuteCitation, a as ExtractorMap, b as Warning, c as FullCitation, d as JournalCitation, f as NeutralCitation, g as ShortFormCitationType, h as ShortFormCitation, i as CitationType, l as FullCitationType, m as ShortFormCaseCitation, n as CitationBase, o as FederalRegisterCitation, p as PublicLawCitation, r as CitationOfType, s as FullCaseCitation, t as Citation, u as IdCitation, v as StatutesAtLargeCitation, x as Span, y as SupraCitation } from "./citation-BVN0o8TJ.mjs";
1
+ import { A as PinciteInfo, C as StatuteCitation, D as Warning, E as SupraCitation, O as Span, S as ShortFormCitationType, T as SubsequentHistoryEntry, _ as Parenthetical, a as CitationType, b as ShortFormCaseCitation, c as ExtractorMap, d as FullCitation, f as FullCitationType, g as NeutralCitation, h as JournalCitation, i as CitationSignal, j as parsePincite, k as TransformationMap, l as FederalRegisterCitation, m as IdCitation, n as CitationBase, o as ConstitutionalCitation, p as HistorySignal, r as CitationOfType, s as CourtInference, t as Citation, u as FullCaseCitation, v as ParentheticalType, w as StatutesAtLargeCitation, x as ShortFormCitation, y as PublicLawCitation } from "./citation-BwXdJTA9.mjs";
2
+ import { a as FootnoteMap, i as ScopeStrategy, n as ResolutionResult, o as FootnoteZone, r as ResolvedCitation, t as ResolutionOptions } from "./types-BfYnmSHC.mjs";
2
3
 
3
4
  //#region src/types/guards.d.ts
4
5
  /**
5
- * Type guard: narrows Citation to a full citation (case, statute, journal, neutral, publicLaw, federalRegister).
6
+ * Type guard: narrows Citation to a full citation (case, statute, journal, neutral, publicLaw, federalRegister, statutesAtLarge, constitutional).
6
7
  */
7
8
  declare function isFullCitation(citation: Citation): citation is FullCitation;
8
9
  /**
@@ -44,93 +45,6 @@ interface Pattern {
44
45
  type: FullCitationType;
45
46
  }
46
47
  //#endregion
47
- //#region src/resolve/types.d.ts
48
- /**
49
- * Scope boundary strategy for resolution.
50
- * Determines how far back to search for antecedent citations.
51
- */
52
- type ScopeStrategy = "paragraph" | "section" | "footnote" | "none";
53
- /**
54
- * Options for citation resolution.
55
- */
56
- interface ResolutionOptions {
57
- /**
58
- * Scope boundary strategy (default: 'paragraph')
59
- * - paragraph: Only resolve within same paragraph
60
- * - section: Only resolve within same section
61
- * - footnote: Only resolve within same footnote
62
- * - none: Resolve across entire document
63
- */
64
- scopeStrategy?: ScopeStrategy;
65
- /**
66
- * Auto-detect paragraph boundaries from text (default: true)
67
- * Uses paragraphBoundaryPattern to split text
68
- */
69
- autoDetectParagraphs?: boolean;
70
- /**
71
- * Regex pattern to detect paragraph boundaries (default: /\n\n+/)
72
- * Only used if autoDetectParagraphs is true
73
- */
74
- paragraphBoundaryPattern?: RegExp;
75
- /**
76
- * Enable fuzzy party name matching for supra resolution (default: true)
77
- * Uses Levenshtein distance to handle typos and variations
78
- */
79
- fuzzyPartyMatching?: boolean;
80
- /**
81
- * Similarity threshold for fuzzy party matching (default: 0.8)
82
- * Range: 0-1 where 1.0 is exact match
83
- * Only used if fuzzyPartyMatching is true
84
- */
85
- partyMatchThreshold?: number;
86
- /**
87
- * Allow Id. citations to resolve to other short-form citations (default: false)
88
- * If true: "Smith v. Jones, 500 F.2d 100" -> "Id." -> "Id. at 105"
89
- * If false: Second Id. fails to resolve (no full citation between them)
90
- */
91
- allowNestedResolution?: boolean;
92
- /**
93
- * Report unresolved citations with failure reasons (default: true)
94
- * If false: resolution field will be undefined for unresolved citations
95
- */
96
- reportUnresolved?: boolean;
97
- }
98
- /**
99
- * Result of resolving a short-form citation.
100
- */
101
- interface ResolutionResult {
102
- /**
103
- * Index of the citation this resolves to.
104
- * undefined if resolution failed
105
- */
106
- resolvedTo?: number;
107
- /**
108
- * Reason for resolution failure (if any)
109
- */
110
- failureReason?: string;
111
- /**
112
- * Warnings about ambiguous or uncertain resolutions
113
- */
114
- warnings?: string[];
115
- /**
116
- * Confidence in the resolution (0-1)
117
- * Factors: party name similarity, scope boundary, citation type match
118
- */
119
- confidence: number;
120
- }
121
- /**
122
- * Citation with resolution metadata.
123
- *
124
- * Uses a distributive conditional type so that `resolution` is only
125
- * meaningfully present on short-form citations (Id., supra, short-form case).
126
- * On full citations, `resolution` is typed as `undefined`.
127
- */
128
- type ResolvedCitation<C extends Citation = Citation> = C extends ShortFormCitation ? C & {
129
- resolution: ResolutionResult | undefined;
130
- } : C & {
131
- resolution?: undefined;
132
- };
133
- //#endregion
134
48
  //#region src/extract/extractCitations.d.ts
135
49
  /**
136
50
  * Options for customizing citation extraction behavior.
@@ -195,6 +109,25 @@ interface ExtractOptions {
195
109
  * ```
196
110
  */
197
111
  resolutionOptions?: ResolutionOptions;
112
+ /**
113
+ * Remove citations flagged as likely false positives (default: false).
114
+ *
115
+ * When false (default), flagged citations get reduced confidence (0.1) and a warning.
116
+ * When true, flagged citations are removed from results entirely.
117
+ *
118
+ * False positive detection uses:
119
+ * - A static blocklist of known non-US reporter abbreviations (international, UK, European)
120
+ * - A year plausibility heuristic (years before 1750 predate US legal reporting)
121
+ *
122
+ * @example
123
+ * ```typescript
124
+ * // Remove false positives from results
125
+ * const citations = extractCitations(text, { filterFalsePositives: true })
126
+ * ```
127
+ */
128
+ filterFalsePositives?: boolean;
129
+ /** Detect footnote zones and annotate citations with inFootnote/footnoteNumber (default: false) */
130
+ detectFootnotes?: boolean;
198
131
  }
199
132
  /**
200
133
  * Extracts legal citations from text using the full parsing pipeline.
@@ -285,6 +218,16 @@ declare function extractCitationsAsync(text: string, options: ExtractOptions & {
285
218
  }): Promise<ResolvedCitation[]>;
286
219
  declare function extractCitationsAsync(text: string, options?: ExtractOptions): Promise<Citation[]>;
287
220
  //#endregion
221
+ //#region src/extract/filterFalsePositives.d.ts
222
+ /**
223
+ * Apply false positive filters to extracted citations.
224
+ *
225
+ * @param citations - Extracted citations (may be mutated in penalize mode)
226
+ * @param remove - If true, remove flagged citations. If false, penalize confidence + add warning.
227
+ * @returns Filtered array (same reference if remove=false, new array if remove=true and items removed)
228
+ */
229
+ declare function applyFalsePositiveFilters(citations: Citation[], remove: boolean): Citation[];
230
+ //#endregion
288
231
  //#region src/clean/cleanText.d.ts
289
232
  /**
290
233
  * Result of text cleaning operation.
@@ -305,7 +248,7 @@ interface CleanTextResult {
305
248
  * cleaned text while reporting positions in the original text.
306
249
  *
307
250
  * @param original - Original input text
308
- * @param cleaners - Array of cleaner functions to apply (default: stripHtmlTags, normalizeWhitespace, normalizeUnicode, fixSmartQuotes)
251
+ * @param cleaners - Array of cleaner functions to apply (default: stripHtmlTags, decodeHtmlEntities, normalizeWhitespace, normalizeUnicode, normalizeDashes, fixSmartQuotes, normalizeTypography, normalizeReporterSpacing)
309
252
  * @returns Cleaned text with position mappings and warnings
310
253
  *
311
254
  * @example
@@ -425,44 +368,51 @@ declare function tokenize(cleanedText: string, patterns?: Pattern[]): Token[];
425
368
  */
426
369
  declare function extractCase(token: Token, transformationMap: TransformationMap, cleanedText?: string): FullCaseCitation;
427
370
  //#endregion
428
- //#region src/extract/extractStatute.d.ts
371
+ //#region src/extract/extractConstitutional.d.ts
429
372
  /**
430
- * Extracts statute citation metadata from a tokenized citation.
373
+ * Extract a constitutional citation from a tokenized match.
374
+ *
375
+ * @param token - Tokenized citation candidate from the tokenizer
376
+ * @param transformationMap - Maps cleaned text positions to original text positions
377
+ * @returns Parsed constitutional citation with structured fields
378
+ */
379
+ declare function extractConstitutional(token: Token, transformationMap: TransformationMap): ConstitutionalCitation;
380
+ //#endregion
381
+ //#region src/extract/extractFederalRegister.d.ts
382
+ /**
383
+ * Extracts Federal Register citation metadata from a tokenized citation.
431
384
  *
432
385
  * Parses token text to extract:
433
- * - Title: Optional leading digits (e.g., "42" from "42 U.S.C. § 1983")
434
- * - Code: Statutory code abbreviation (e.g., "U.S.C.", "Cal. Civ. Code")
435
- * - Section: Section number after § symbol (e.g., "1983")
436
- * - Subsections: Optional parenthetical subdivisions (e.g., "(a)(1)")
386
+ * - Volume: Federal Register volume number (e.g., "85")
387
+ * - Page: Page number (e.g., "12345")
388
+ * - Year: Optional publication year in parentheses (e.g., "(2021)")
437
389
  *
438
390
  * Confidence scoring:
439
- * - Base: 0.5
440
- * - Known code pattern (U.S.C., C.F.R., state codes): +0.3
441
- * - Capped at 1.0
391
+ * - 0.9 (Federal Register format is standardized)
442
392
  *
443
393
  * @param token - Token from tokenizer containing matched text and clean positions
444
394
  * @param transformationMap - Position mapping from clean → original text
445
- * @returns StatuteCitation with parsed metadata and translated positions
395
+ * @returns FederalRegisterCitation with parsed metadata and translated positions
446
396
  *
447
397
  * @example
448
398
  * ```typescript
449
399
  * const token = {
450
- * text: "42 U.S.C. § 1983",
451
- * span: { cleanStart: 10, cleanEnd: 26 },
452
- * type: "statute",
453
- * patternId: "usc"
400
+ * text: "85 Fed. Reg. 12345",
401
+ * span: { cleanStart: 10, cleanEnd: 28 },
402
+ * type: "federalRegister",
403
+ * patternId: "federal-register"
454
404
  * }
455
- * const citation = extractStatute(token, transformationMap)
405
+ * const citation = extractFederalRegister(token, transformationMap)
456
406
  * // citation = {
457
- * // type: "statute",
458
- * // title: 42,
459
- * // code: "U.S.C.",
460
- * // section: "1983",
407
+ * // type: "federalRegister",
408
+ * // volume: 85,
409
+ * // page: 12345,
410
+ * // confidence: 0.9,
461
411
  * // ...
462
412
  * // }
463
413
  * ```
464
414
  */
465
- declare function extractStatute(token: Token, transformationMap: TransformationMap): StatuteCitation;
415
+ declare function extractFederalRegister(token: Token, transformationMap: TransformationMap): FederalRegisterCitation;
466
416
  //#endregion
467
417
  //#region src/extract/extractJournal.d.ts
468
418
  /**
@@ -580,45 +530,31 @@ declare function extractNeutral(token: Token, transformationMap: TransformationM
580
530
  */
581
531
  declare function extractPublicLaw(token: Token, transformationMap: TransformationMap): PublicLawCitation;
582
532
  //#endregion
583
- //#region src/extract/extractFederalRegister.d.ts
533
+ //#region src/extract/extractStatute.d.ts
584
534
  /**
585
- * Extracts Federal Register citation metadata from a tokenized citation.
586
- *
587
- * Parses token text to extract:
588
- * - Volume: Federal Register volume number (e.g., "85")
589
- * - Page: Page number (e.g., "12345")
590
- * - Year: Optional publication year in parentheses (e.g., "(2021)")
591
- *
592
- * Confidence scoring:
593
- * - 0.9 (Federal Register format is standardized)
594
- *
595
- * @param token - Token from tokenizer containing matched text and clean positions
596
- * @param transformationMap - Position mapping from clean → original text
597
- * @returns FederalRegisterCitation with parsed metadata and translated positions
598
- *
599
- * @example
600
- * ```typescript
601
- * const token = {
602
- * text: "85 Fed. Reg. 12345",
603
- * span: { cleanStart: 10, cleanEnd: 28 },
604
- * type: "federalRegister",
605
- * patternId: "federal-register"
606
- * }
607
- * const citation = extractFederalRegister(token, transformationMap)
608
- * // citation = {
609
- * // type: "federalRegister",
610
- * // volume: 85,
611
- * // page: 12345,
612
- * // confidence: 0.9,
613
- * // ...
614
- * // }
615
- * ```
535
+ * Extracts statute citation metadata from a tokenized citation.
536
+ * Dispatches to family-specific extractors based on patternId.
616
537
  */
617
- declare function extractFederalRegister(token: Token, transformationMap: TransformationMap): FederalRegisterCitation;
538
+ declare function extractStatute(token: Token, transformationMap: TransformationMap): StatuteCitation;
618
539
  //#endregion
619
540
  //#region src/extract/extractStatutesAtLarge.d.ts
620
541
  declare function extractStatutesAtLarge(token: Token, transformationMap: TransformationMap): StatutesAtLargeCitation;
621
542
  //#endregion
543
+ //#region src/extract/courtNormalization.d.ts
544
+ /**
545
+ * Normalize a court string extracted from a citation parenthetical.
546
+ *
547
+ * - Collapses spaces after periods: "S.D. N.Y." → "S.D.N.Y."
548
+ * - Ensures trailing period on abbreviated forms: "2d Cir" → "2d Cir."
549
+ * - Returns undefined for empty/undefined input
550
+ *
551
+ * @example
552
+ * normalizeCourt("S.D. N.Y.") // "S.D.N.Y."
553
+ * normalizeCourt("2d Cir") // "2d Cir."
554
+ * normalizeCourt("U.S.") // "U.S."
555
+ */
556
+ declare function normalizeCourt(court: string | undefined): string | undefined;
557
+ //#endregion
622
558
  //#region src/resolve/DocumentResolver.d.ts
623
559
  /**
624
560
  * Document-scoped resolver that processes citations sequentially
@@ -703,5 +639,19 @@ declare class DocumentResolver {
703
639
  */
704
640
  declare function resolveCitations(citations: Citation[], text: string, options?: ResolutionOptions): ResolvedCitation[];
705
641
  //#endregion
706
- export { type Citation, type CitationBase, type CitationOfType, type CitationType, type CleanTextResult, DocumentResolver, type ExtractOptions, type ExtractorMap, type FederalRegisterCitation, type FullCaseCitation, type FullCitation, type FullCitationType, type IdCitation, type JournalCitation, type NeutralCitation, type PublicLawCitation, type ResolutionOptions, type ResolutionResult, type ResolvedCitation, type ScopeStrategy, type ShortFormCaseCitation, type ShortFormCitation, type ShortFormCitationType, type Span, type StatuteCitation, type StatutesAtLargeCitation, type SupraCitation, type Token, type TransformationMap, type Warning, assertUnreachable, cleanText, extractCase, extractCitations, extractCitationsAsync, extractFederalRegister, extractJournal, extractNeutral, extractPublicLaw, extractStatute, extractStatutesAtLarge, isCaseCitation, isCitationType, isFullCitation, isShortFormCitation, resolveCitations, tokenize };
642
+ //#region src/footnotes/detectFootnotes.d.ts
643
+ /**
644
+ * Detect footnote zones in text (HTML or plain text).
645
+ *
646
+ * Strategy: if the input contains HTML tags, try HTML structural detection
647
+ * first. If that yields no results (HTML without footnote elements), fall
648
+ * back to plain-text heuristic detection. For non-HTML input, use plain-text
649
+ * detection directly.
650
+ *
651
+ * @param text - Raw input text (HTML or plain text)
652
+ * @returns FootnoteMap with zones in input-text coordinates, sorted by start
653
+ */
654
+ declare function detectFootnotes(text: string): FootnoteMap;
655
+ //#endregion
656
+ export { type Citation, type CitationBase, type CitationOfType, type CitationSignal, type CitationType, type CleanTextResult, type ConstitutionalCitation, type CourtInference, DocumentResolver, type ExtractOptions, type ExtractorMap, type FederalRegisterCitation, type FootnoteMap, type FootnoteZone, type FullCaseCitation, type FullCitation, type FullCitationType, type HistorySignal, type IdCitation, type JournalCitation, type NeutralCitation, type Parenthetical, type ParentheticalType, type PinciteInfo, type PublicLawCitation, type ResolutionOptions, type ResolutionResult, type ResolvedCitation, type ScopeStrategy, type ShortFormCaseCitation, type ShortFormCitation, type ShortFormCitationType, type Span, type StatuteCitation, type StatutesAtLargeCitation, type SubsequentHistoryEntry, type SupraCitation, type Token, type TransformationMap, type Warning, applyFalsePositiveFilters, assertUnreachable, cleanText, detectFootnotes, extractCase, extractCitations, extractCitationsAsync, extractConstitutional, extractFederalRegister, extractJournal, extractNeutral, extractPublicLaw, extractStatute, extractStatutesAtLarge, isCaseCitation, isCitationType, isFullCitation, isShortFormCitation, normalizeCourt, parsePincite, resolveCitations, tokenize };
707
657
  //# sourceMappingURL=index.d.mts.map