eyecite-ts 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,682 @@
1
+ import { a as FullCaseCitation, c as NeutralCitation, d as StatuteCitation, f as SupraCitation, i as FederalRegisterCitation, l as PublicLawCitation, m as TransformationMap, n as CitationBase, o as IdCitation, p as Span, r as CitationType, s as JournalCitation, t as Citation, u as ShortFormCaseCitation } from "./citation-BcY5zzWb.cjs";
2
+
3
+ //#region src/patterns/casePatterns.d.ts
4
+ /**
5
+ * Case Citation Regex Patterns
6
+ *
7
+ * These patterns are designed for tokenization (broad matching) not extraction.
8
+ * They identify potential case citations in text for the tokenizer (Plan 3).
9
+ * Metadata parsing and validation against reporters-db happens in Phase 2 Plan 5 (extraction layer).
10
+ *
11
+ * Pattern Design Principles (from RESEARCH.md):
12
+ * - Use \b word boundaries to avoid matching "F." in "F.B.I."
13
+ * - Avoid nested quantifiers: (a+)+ causes ReDoS
14
+ * - Keep patterns simple: tokenization only needs to find candidates
15
+ * - Use global flag /g for matchAll()
16
+ */
17
+ interface Pattern {
18
+ id: string;
19
+ regex: RegExp;
20
+ description: string;
21
+ type: "case" | "statute" | "journal" | "neutral" | "publicLaw" | "federalRegister";
22
+ }
23
+ //#endregion
24
+ //#region src/resolve/types.d.ts
25
+ /**
26
+ * Scope boundary strategy for resolution.
27
+ * Determines how far back to search for antecedent citations.
28
+ */
29
+ type ScopeStrategy = "paragraph" | "section" | "footnote" | "none";
30
+ /**
31
+ * Options for citation resolution.
32
+ */
33
+ interface ResolutionOptions {
34
+ /**
35
+ * Scope boundary strategy (default: 'paragraph')
36
+ * - paragraph: Only resolve within same paragraph
37
+ * - section: Only resolve within same section
38
+ * - footnote: Only resolve within same footnote
39
+ * - none: Resolve across entire document
40
+ */
41
+ scopeStrategy?: ScopeStrategy;
42
+ /**
43
+ * Auto-detect paragraph boundaries from text (default: true)
44
+ * Uses paragraphBoundaryPattern to split text
45
+ */
46
+ autoDetectParagraphs?: boolean;
47
+ /**
48
+ * Regex pattern to detect paragraph boundaries (default: /\n\n+/)
49
+ * Only used if autoDetectParagraphs is true
50
+ */
51
+ paragraphBoundaryPattern?: RegExp;
52
+ /**
53
+ * Enable fuzzy party name matching for supra resolution (default: true)
54
+ * Uses Levenshtein distance to handle typos and variations
55
+ */
56
+ fuzzyPartyMatching?: boolean;
57
+ /**
58
+ * Similarity threshold for fuzzy party matching (default: 0.8)
59
+ * Range: 0-1 where 1.0 is exact match
60
+ * Only used if fuzzyPartyMatching is true
61
+ */
62
+ partyMatchThreshold?: number;
63
+ /**
64
+ * Allow Id. citations to resolve to other short-form citations (default: false)
65
+ * If true: "Smith v. Jones, 500 F.2d 100" -> "Id." -> "Id. at 105"
66
+ * If false: Second Id. fails to resolve (no full citation between them)
67
+ */
68
+ allowNestedResolution?: boolean;
69
+ /**
70
+ * Report unresolved citations with failure reasons (default: true)
71
+ * If false: resolution field will be undefined for unresolved citations
72
+ */
73
+ reportUnresolved?: boolean;
74
+ }
75
+ /**
76
+ * Result of resolving a short-form citation.
77
+ */
78
+ interface ResolutionResult {
79
+ /**
80
+ * Index of the citation this resolves to.
81
+ * undefined if resolution failed
82
+ */
83
+ resolvedTo?: number;
84
+ /**
85
+ * Reason for resolution failure (if any)
86
+ */
87
+ failureReason?: string;
88
+ /**
89
+ * Warnings about ambiguous or uncertain resolutions
90
+ */
91
+ warnings?: string[];
92
+ /**
93
+ * Confidence in the resolution (0-1)
94
+ * Factors: party name similarity, scope boundary, citation type match
95
+ */
96
+ confidence: number;
97
+ }
98
+ /**
99
+ * Citation with optional resolution metadata.
100
+ * Uses intersection type to add resolution field to any Citation type.
101
+ */
102
+ type ResolvedCitation = Citation & {
103
+ /**
104
+ * Resolution result for short-form citations.
105
+ * Only present for Id/supra/shortFormCase types
106
+ */
107
+ resolution?: ResolutionResult;
108
+ };
109
+ //#endregion
110
+ //#region src/extract/extractCitations.d.ts
111
+ /**
112
+ * Options for customizing citation extraction behavior.
113
+ */
114
+ interface ExtractOptions {
115
+ /**
116
+ * Custom text cleaners (overrides defaults).
117
+ *
118
+ * If provided, these cleaners replace the default pipeline:
119
+ * [stripHtmlTags, normalizeWhitespace, normalizeUnicode, fixSmartQuotes]
120
+ *
121
+ * @example
122
+ * ```typescript
123
+ * // Use only HTML stripping, skip Unicode normalization
124
+ * const citations = extractCitations(text, {
125
+ * cleaners: [stripHtmlTags]
126
+ * })
127
+ * ```
128
+ */
129
+ cleaners?: Array<(text: string) => string>;
130
+ /**
131
+ * Custom regex patterns (overrides defaults).
132
+ *
133
+ * If provided, these patterns replace the default pattern set:
134
+ * [casePatterns, statutePatterns, journalPatterns, neutralPatterns, shortFormPatterns]
135
+ *
136
+ * @example
137
+ * ```typescript
138
+ * // Extract only case citations
139
+ * const citations = extractCitations(text, {
140
+ * patterns: casePatterns
141
+ * })
142
+ * ```
143
+ */
144
+ patterns?: Pattern[];
145
+ /**
146
+ * Resolve short-form citations to their full antecedents (default: false).
147
+ *
148
+ * If true, returns ResolvedCitation[] with resolution metadata for short-form citations
149
+ * (Id., supra, short-form case). Full citations are unchanged.
150
+ *
151
+ * @example
152
+ * ```typescript
153
+ * const text = "Smith v. Jones, 500 F.2d 100 (1974). Id. at 105."
154
+ * const citations = extractCitations(text, { resolve: true })
155
+ * // citations[1].resolution.resolvedTo === 0 (points to Smith v. Jones)
156
+ * ```
157
+ */
158
+ resolve?: boolean;
159
+ /**
160
+ * Options for citation resolution (only used if resolve: true).
161
+ *
162
+ * @example
163
+ * ```typescript
164
+ * const citations = extractCitations(text, {
165
+ * resolve: true,
166
+ * resolutionOptions: {
167
+ * scopeStrategy: 'paragraph',
168
+ * fuzzyPartyMatching: true
169
+ * }
170
+ * })
171
+ * ```
172
+ */
173
+ resolutionOptions?: ResolutionOptions;
174
+ }
175
+ /**
176
+ * Extracts legal citations from text using the full parsing pipeline.
177
+ *
178
+ * Pipeline flow:
179
+ * 1. **Clean:** Remove HTML tags, normalize Unicode, fix smart quotes
180
+ * 2. **Tokenize:** Apply regex patterns to find citation candidates
181
+ * 3. **Extract:** Parse metadata (volume, reporter, page, etc.)
182
+ * 4. **Translate:** Map positions from cleaned text back to original text
183
+ *
184
+ * This function is synchronous because all stages (cleaning, tokenization,
185
+ * extraction) are synchronous. For async operations (e.g., future reporters-db
186
+ * lookups), use extractCitationsAsync().
187
+ *
188
+ * Position tracking:
189
+ * - TransformationMap is built during cleaning
190
+ * - Tokens contain positions in cleaned text (cleanStart/cleanEnd)
191
+ * - Extraction translates cleaned positions → original positions
192
+ * - Final citations have originalStart/originalEnd pointing to input text
193
+ *
194
+ * Warnings from cleaning layer are attached to all extracted citations.
195
+ *
196
+ * @param text - Raw text to extract citations from (may contain HTML, Unicode)
197
+ * @param options - Optional customization (cleaners, patterns)
198
+ * @returns Array of citations with parsed metadata and accurate positions
199
+ *
200
+ * @example
201
+ * ```typescript
202
+ * const text = "See Smith v. Doe, 500 F.2d 123 (9th Cir. 2020)"
203
+ * const citations = extractCitations(text)
204
+ * // citations[0] = {
205
+ * // type: "case",
206
+ * // volume: 500,
207
+ * // reporter: "F.2d",
208
+ * // page: 123,
209
+ * // court: "9th Cir.",
210
+ * // year: 2020,
211
+ * // span: { originalStart: 18, originalEnd: 30, ... }
212
+ * // }
213
+ * ```
214
+ *
215
+ * @example
216
+ * ```typescript
217
+ * // Extract from HTML
218
+ * const html = "<p>In <b>Smith</b>, 500 F.2d 123, the court held...</p>"
219
+ * const citations = extractCitations(html)
220
+ * // HTML is stripped, positions point to original HTML
221
+ * ```
222
+ *
223
+ * @example
224
+ * ```typescript
225
+ * // Extract multiple citation types
226
+ * const text = "See 42 U.S.C. § 1983; Smith, 500 F.2d 123; 123 Harv. L. Rev. 456"
227
+ * const citations = extractCitations(text)
228
+ * // citations[0].type === "statute"
229
+ * // citations[1].type === "case"
230
+ * // citations[2].type === "journal"
231
+ * ```
232
+ */
233
+ declare function extractCitations(text: string, options?: ExtractOptions): Citation[] | ResolvedCitation[];
234
+ /**
235
+ * Asynchronous version of extractCitations().
236
+ *
237
+ * Currently wraps the synchronous extractCitations() function. This API
238
+ * exists for future extensibility when async operations are added:
239
+ * - Async reporters-db lookups (Phase 3)
240
+ * - Async resolution/annotation services
241
+ * - Web Workers for parallel processing
242
+ *
243
+ * For now, this function immediately resolves with the same results as
244
+ * the synchronous version.
245
+ *
246
+ * @param text - Raw text to extract citations from
247
+ * @param options - Optional customization (cleaners, patterns, resolve)
248
+ * @returns Promise resolving to array of citations (or ResolvedCitation[] if resolve: true)
249
+ *
250
+ * @example
251
+ * ```typescript
252
+ * const citations = await extractCitationsAsync(text, { resolve: true })
253
+ * // Returns ResolvedCitation[] with resolution metadata
254
+ * ```
255
+ */
256
+ declare function extractCitationsAsync(text: string, options?: ExtractOptions): Promise<Citation[] | ResolvedCitation[]>;
257
+ //#endregion
258
+ //#region src/clean/cleanText.d.ts
259
+ /**
260
+ * Result of text cleaning operation.
261
+ */
262
+ interface CleanTextResult {
263
+ /** Cleaned text after all transformations */
264
+ cleaned: string;
265
+ /** Position mappings between cleaned and original text */
266
+ transformationMap: TransformationMap;
267
+ /** Warnings generated during cleaning (currently unused) */
268
+ warnings: Warning[];
269
+ }
270
+ /**
271
+ * Warning generated during text cleaning.
272
+ */
273
+ interface Warning {
274
+ level: "error" | "warning" | "info";
275
+ message: string;
276
+ position: {
277
+ start: number;
278
+ end: number;
279
+ };
280
+ }
281
+ /**
282
+ * Clean text using a pipeline of transformation functions.
283
+ *
284
+ * Applies cleaners sequentially while maintaining accurate position mappings
285
+ * between the original and cleaned text. This enables citation extraction from
286
+ * cleaned text while reporting positions in the original text.
287
+ *
288
+ * @param original - Original input text
289
+ * @param cleaners - Array of cleaner functions to apply (default: stripHtmlTags, normalizeWhitespace, normalizeUnicode, fixSmartQuotes)
290
+ * @returns Cleaned text with position mappings and warnings
291
+ *
292
+ * @example
293
+ * const result = cleanText("Smith v. <b>Doe</b>, 500 F.2d 123")
294
+ * // result.cleaned: "Smith v. Doe, 500 F.2d 123"
295
+ * // result.transformationMap tracks position shifts from HTML removal
296
+ */
297
+ declare function cleanText(original: string, cleaners?: Array<(text: string) => string>): CleanTextResult;
298
+ //#endregion
299
+ //#region src/tokenize/tokenizer.d.ts
300
+ /**
301
+ * A token representing a potential citation found in cleaned text.
302
+ *
303
+ * Tokens are produced by applying regex patterns to cleaned text.
304
+ * They include matched text, position in cleaned text, and pattern metadata
305
+ * for use in the extraction layer.
306
+ */
307
+ interface Token {
308
+ /** Matched text from input */
309
+ text: string;
310
+ /** Position in cleaned text (cleanStart/cleanEnd only, no original positions yet) */
311
+ span: Pick<Span, "cleanStart" | "cleanEnd">;
312
+ /** Pattern type that matched this token */
313
+ type: Pattern["type"];
314
+ /** Pattern ID that matched this token */
315
+ patternId: string;
316
+ }
317
+ /**
318
+ * Tokenizes cleaned text by applying regex patterns to find citation candidates.
319
+ *
320
+ * For each pattern in the patterns array:
321
+ * 1. Apply pattern.regex.matchAll(cleanedText)
322
+ * 2. Create Token for each match with position, text, and pattern metadata
323
+ * 3. Collect all tokens from all patterns
324
+ * 4. Sort by cleanStart position (ascending)
325
+ *
326
+ * Timeout protection: If a pattern throws (e.g., ReDoS), skip it and continue
327
+ * with remaining patterns. Logs warning to console.
328
+ *
329
+ * Note: This function is synchronous because regex matching is inherently
330
+ * synchronous. This enables both sync (extractCitations) and async
331
+ * (extractCitationsAsync) APIs in Plan 6.
332
+ *
333
+ * @param cleanedText - Text that has been cleaned by cleanText() from Plan 1
334
+ * @param patterns - Regex patterns to apply (defaults to all patterns from Plan 2)
335
+ * @returns Array of tokens sorted by position (cleanStart ascending)
336
+ *
337
+ * @example
338
+ * ```typescript
339
+ * import { tokenize } from '@/tokenize'
340
+ * import { cleanText } from '@/clean'
341
+ *
342
+ * const original = "See Smith v. Doe, 500 F.2d 123 (9th Cir. 2020)"
343
+ * const { cleanedText } = cleanText(original)
344
+ * const tokens = tokenize(cleanedText)
345
+ * // tokens[0] = {
346
+ * // text: "500 F.2d 123",
347
+ * // span: { cleanStart: 18, cleanEnd: 30 },
348
+ * // type: "case",
349
+ * // patternId: "federal-reporter"
350
+ * // }
351
+ * ```
352
+ */
353
+ declare function tokenize(cleanedText: string, patterns?: Pattern[]): Token[];
354
+ //#endregion
355
+ //#region src/extract/extractCase.d.ts
356
+ /**
357
+ * Extracts case citation metadata from a tokenized citation.
358
+ *
359
+ * Parses token text to extract:
360
+ * - Volume: Leading digits (e.g., "500" from "500 F.2d 123")
361
+ * - Reporter: Alphabetic abbreviation (e.g., "F.2d")
362
+ * - Page: Trailing digits after reporter (e.g., "123")
363
+ * - Pincite: Optional page reference after comma (e.g., ", 125")
364
+ * - Court: Optional court abbreviation in parentheses (e.g., "(9th Cir.)")
365
+ * - Year: Optional year in parentheses (e.g., "(2020)")
366
+ *
367
+ * Confidence scoring:
368
+ * - Base: 0.5
369
+ * - Common reporter pattern (F., U.S., etc.): +0.3
370
+ * - Valid year (not future): +0.2
371
+ * - Capped at 1.0
372
+ *
373
+ * Position translation:
374
+ * - Uses TransformationMap to convert clean positions → original positions
375
+ * - cleanStart/cleanEnd from token span
376
+ * - originalStart/originalEnd via transformationMap.cleanToOriginal
377
+ *
378
+ * Note: This function does NOT validate against reporters-db. That happens
379
+ * in Phase 3 (resolution layer). Phase 2 extraction only parses structure.
380
+ *
381
+ * @param token - Token from tokenizer containing matched text and clean positions
382
+ * @param transformationMap - Position mapping from clean → original text
383
+ * @returns FullCaseCitation with parsed metadata and translated positions
384
+ *
385
+ * @example
386
+ * ```typescript
387
+ * const token = {
388
+ * text: "500 F.2d 123, 125",
389
+ * span: { cleanStart: 10, cleanEnd: 27 },
390
+ * type: "case",
391
+ * patternId: "federal-reporter"
392
+ * }
393
+ * const citation = extractCase(token, transformationMap)
394
+ * // citation = {
395
+ * // type: "case",
396
+ * // text: "500 F.2d 123, 125",
397
+ * // volume: 500,
398
+ * // reporter: "F.2d",
399
+ * // page: 123,
400
+ * // pincite: 125,
401
+ * // span: { cleanStart: 10, cleanEnd: 27, originalStart: 10, originalEnd: 27 },
402
+ * // confidence: 0.8,
403
+ * // ...
404
+ * // }
405
+ * ```
406
+ */
407
+ declare function extractCase(token: Token, transformationMap: TransformationMap): FullCaseCitation;
408
+ //#endregion
409
+ //#region src/extract/extractStatute.d.ts
410
+ /**
411
+ * Extracts statute citation metadata from a tokenized citation.
412
+ *
413
+ * Parses token text to extract:
414
+ * - Title: Optional leading digits (e.g., "42" from "42 U.S.C. § 1983")
415
+ * - Code: Statutory code abbreviation (e.g., "U.S.C.", "Cal. Civ. Code")
416
+ * - Section: Section number after § symbol (e.g., "1983")
417
+ * - Subsections: Optional parenthetical subdivisions (e.g., "(a)(1)")
418
+ *
419
+ * Confidence scoring:
420
+ * - Base: 0.5
421
+ * - Known code pattern (U.S.C., C.F.R., state codes): +0.3
422
+ * - Capped at 1.0
423
+ *
424
+ * @param token - Token from tokenizer containing matched text and clean positions
425
+ * @param transformationMap - Position mapping from clean → original text
426
+ * @returns StatuteCitation with parsed metadata and translated positions
427
+ *
428
+ * @example
429
+ * ```typescript
430
+ * const token = {
431
+ * text: "42 U.S.C. § 1983",
432
+ * span: { cleanStart: 10, cleanEnd: 26 },
433
+ * type: "statute",
434
+ * patternId: "usc"
435
+ * }
436
+ * const citation = extractStatute(token, transformationMap)
437
+ * // citation = {
438
+ * // type: "statute",
439
+ * // title: 42,
440
+ * // code: "U.S.C.",
441
+ * // section: "1983",
442
+ * // ...
443
+ * // }
444
+ * ```
445
+ */
446
+ declare function extractStatute(token: Token, transformationMap: TransformationMap): StatuteCitation;
447
+ //#endregion
448
+ //#region src/extract/extractJournal.d.ts
449
+ /**
450
+ * Extracts journal citation metadata from a tokenized citation.
451
+ *
452
+ * Parses token text to extract:
453
+ * - Volume: Leading digits (e.g., "123" from "123 Harv. L. Rev. 456")
454
+ * - Journal: Journal abbreviation (e.g., "Harv. L. Rev.")
455
+ * - Page: Starting page number (e.g., "456")
456
+ * - Pincite: Optional specific page reference after comma (e.g., ", 458")
457
+ *
458
+ * Confidence scoring:
459
+ * - Base: 0.6 (journal validation happens in Phase 3)
460
+ *
461
+ * Note: Author and title extraction from preceding text is not implemented
462
+ * in Phase 2. That requires context analysis in Phase 3.
463
+ *
464
+ * @param token - Token from tokenizer containing matched text and clean positions
465
+ * @param transformationMap - Position mapping from clean → original text
466
+ * @returns JournalCitation with parsed metadata and translated positions
467
+ *
468
+ * @example
469
+ * ```typescript
470
+ * const token = {
471
+ * text: "123 Harv. L. Rev. 456",
472
+ * span: { cleanStart: 10, cleanEnd: 31 },
473
+ * type: "journal",
474
+ * patternId: "journal-standard"
475
+ * }
476
+ * const citation = extractJournal(token, transformationMap)
477
+ * // citation = {
478
+ * // type: "journal",
479
+ * // volume: 123,
480
+ * // journal: "Harv. L. Rev.",
481
+ * // abbreviation: "Harv. L. Rev.",
482
+ * // page: 456,
483
+ * // ...
484
+ * // }
485
+ * ```
486
+ */
487
+ declare function extractJournal(token: Token, transformationMap: TransformationMap): JournalCitation;
488
+ //#endregion
489
+ //#region src/extract/extractNeutral.d.ts
490
+ /**
491
+ * Extracts neutral citation metadata from a tokenized citation.
492
+ *
493
+ * Parses token text to extract:
494
+ * - Year: 4-digit year (e.g., "2020")
495
+ * - Court: Vendor identifier (e.g., "WL", "U.S. LEXIS")
496
+ * - Document number: Unique document identifier (e.g., "123456")
497
+ *
498
+ * Confidence scoring:
499
+ * - 1.0 (neutral format is unambiguous and standardized)
500
+ *
501
+ * @param token - Token from tokenizer containing matched text and clean positions
502
+ * @param transformationMap - Position mapping from clean → original text
503
+ * @returns NeutralCitation with parsed metadata and translated positions
504
+ *
505
+ * @example
506
+ * ```typescript
507
+ * const token = {
508
+ * text: "2020 WL 123456",
509
+ * span: { cleanStart: 10, cleanEnd: 24 },
510
+ * type: "neutral",
511
+ * patternId: "westlaw-neutral"
512
+ * }
513
+ * const citation = extractNeutral(token, transformationMap)
514
+ * // citation = {
515
+ * // type: "neutral",
516
+ * // year: 2020,
517
+ * // court: "WL",
518
+ * // documentNumber: "123456",
519
+ * // confidence: 1.0,
520
+ * // ...
521
+ * // }
522
+ * ```
523
+ */
524
+ declare function extractNeutral(token: Token, transformationMap: TransformationMap): NeutralCitation;
525
+ //#endregion
526
+ //#region src/extract/extractPublicLaw.d.ts
527
+ /**
528
+ * Extracts public law citation metadata from a tokenized citation.
529
+ *
530
+ * Parses token text to extract:
531
+ * - Congress: Congress number (e.g., "116" from "Pub. L. No. 116-283")
532
+ * - Law number: Law number within that Congress (e.g., "283")
533
+ *
534
+ * Confidence scoring:
535
+ * - 0.9 (public law format is fairly standard)
536
+ *
537
+ * Note: Bill title extraction from nearby text is not implemented in Phase 2.
538
+ * That requires context analysis in Phase 3.
539
+ *
540
+ * @param token - Token from tokenizer containing matched text and clean positions
541
+ * @param transformationMap - Position mapping from clean → original text
542
+ * @returns PublicLawCitation with parsed metadata and translated positions
543
+ *
544
+ * @example
545
+ * ```typescript
546
+ * const token = {
547
+ * text: "Pub. L. No. 116-283",
548
+ * span: { cleanStart: 10, cleanEnd: 29 },
549
+ * type: "publicLaw",
550
+ * patternId: "public-law"
551
+ * }
552
+ * const citation = extractPublicLaw(token, transformationMap)
553
+ * // citation = {
554
+ * // type: "publicLaw",
555
+ * // congress: 116,
556
+ * // lawNumber: 283,
557
+ * // confidence: 0.9,
558
+ * // ...
559
+ * // }
560
+ * ```
561
+ */
562
+ declare function extractPublicLaw(token: Token, transformationMap: TransformationMap): PublicLawCitation;
563
+ //#endregion
564
+ //#region src/extract/extractFederalRegister.d.ts
565
+ /**
566
+ * Extracts Federal Register citation metadata from a tokenized citation.
567
+ *
568
+ * Parses token text to extract:
569
+ * - Volume: Federal Register volume number (e.g., "85")
570
+ * - Page: Page number (e.g., "12345")
571
+ * - Year: Optional publication year in parentheses (e.g., "(2021)")
572
+ *
573
+ * Confidence scoring:
574
+ * - 0.9 (Federal Register format is standardized)
575
+ *
576
+ * @param token - Token from tokenizer containing matched text and clean positions
577
+ * @param transformationMap - Position mapping from clean → original text
578
+ * @returns FederalRegisterCitation with parsed metadata and translated positions
579
+ *
580
+ * @example
581
+ * ```typescript
582
+ * const token = {
583
+ * text: "85 Fed. Reg. 12345",
584
+ * span: { cleanStart: 10, cleanEnd: 28 },
585
+ * type: "federalRegister",
586
+ * patternId: "federal-register"
587
+ * }
588
+ * const citation = extractFederalRegister(token, transformationMap)
589
+ * // citation = {
590
+ * // type: "federalRegister",
591
+ * // volume: 85,
592
+ * // page: 12345,
593
+ * // confidence: 0.9,
594
+ * // ...
595
+ * // }
596
+ * ```
597
+ */
598
+ declare function extractFederalRegister(token: Token, transformationMap: TransformationMap): FederalRegisterCitation;
599
+ //#endregion
600
+ //#region src/resolve/DocumentResolver.d.ts
601
+ /**
602
+ * Document-scoped resolver that processes citations sequentially
603
+ * and resolves short-form citations to their antecedents.
604
+ */
605
+ declare class DocumentResolver {
606
+ private readonly citations;
607
+ private readonly text;
608
+ private readonly options;
609
+ private readonly context;
610
+ /**
611
+ * Creates a new DocumentResolver.
612
+ *
613
+ * @param citations - All citations in document (in order of appearance)
614
+ * @param text - Original document text
615
+ * @param options - Resolution options
616
+ */
617
+ constructor(citations: Citation[], text: string, options?: ResolutionOptions);
618
+ /**
619
+ * Resolves all citations in the document.
620
+ *
621
+ * @returns Array of citations with resolution metadata
622
+ */
623
+ resolve(): ResolvedCitation[];
624
+ /**
625
+ * Resolves Id. citation to immediately preceding full case citation.
626
+ */
627
+ private resolveId;
628
+ /**
629
+ * Resolves supra citation by matching party name.
630
+ */
631
+ private resolveSupra;
632
+ /**
633
+ * Resolves short-form case citation by matching volume/reporter.
634
+ */
635
+ private resolveShortFormCase;
636
+ /**
637
+ * Checks if a citation is a full citation (not short-form).
638
+ */
639
+ private isFullCitation;
640
+ /**
641
+ * Tracks a full citation in the resolution history.
642
+ * Extracts party name for supra resolution.
643
+ */
644
+ private trackFullCitation;
645
+ /**
646
+ * Extracts party name from full case citation text.
647
+ * Handles "Party v. Party" format by looking at text before citation span.
648
+ */
649
+ private extractPartyName;
650
+ /**
651
+ * Normalizes party name for matching.
652
+ */
653
+ private normalizePartyName;
654
+ /**
655
+ * Normalizes reporter abbreviation for matching.
656
+ */
657
+ private normalizeReporter;
658
+ /**
659
+ * Checks if antecedent citation is within scope boundary.
660
+ */
661
+ private isWithinScope;
662
+ /**
663
+ * Creates a failure result for unresolved citations.
664
+ */
665
+ private createFailureResult;
666
+ }
667
+ //#endregion
668
+ //#region src/resolve/index.d.ts
669
+ /**
670
+ * Resolves short-form citations to their full antecedents.
671
+ *
672
+ * Convenience wrapper around DocumentResolver that handles common use cases.
673
+ *
674
+ * @param citations - Extracted citations in order of appearance
675
+ * @param text - Original document text
676
+ * @param options - Resolution options
677
+ * @returns Citations with resolution metadata
678
+ */
679
+ declare function resolveCitations(citations: Citation[], text: string, options?: ResolutionOptions): ResolvedCitation[];
680
+ //#endregion
681
+ export { type Citation, type CitationBase, type CitationType, type CleanTextResult, DocumentResolver, type ExtractOptions, type FederalRegisterCitation, type FullCaseCitation, type IdCitation, type JournalCitation, type NeutralCitation, type PublicLawCitation, type ResolutionOptions, type ResolutionResult, type ResolvedCitation, type ScopeStrategy, type ShortFormCaseCitation, type Span, type StatuteCitation, type SupraCitation, type Token, type TransformationMap, type Warning, cleanText, extractCase, extractCitations, extractCitationsAsync, extractFederalRegister, extractJournal, extractNeutral, extractPublicLaw, extractStatute, resolveCitations, tokenize };
682
+ //# sourceMappingURL=index.d.cts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.cts","names":[],"sources":["../src/patterns/casePatterns.ts","../src/resolve/types.ts","../src/extract/extractCitations.ts","../src/clean/cleanText.ts","../src/tokenize/tokenizer.ts","../src/extract/extractCase.ts","../src/extract/extractStatute.ts","../src/extract/extractJournal.ts","../src/extract/extractNeutral.ts","../src/extract/extractPublicLaw.ts","../src/extract/extractFederalRegister.ts","../src/resolve/DocumentResolver.ts","../src/resolve/index.ts"],"mappings":";;;;;;AAcA;;;;;;;;;;UAAiB,OAAA;EACf,EAAA;EACA,KAAA,EAAO,MAAA;EACP,WAAA;EACA,IAAA;AAAA;;;;;;;KCLU,aAAA;;;;UAKK,iBAAA;;AALjB;;;;;AAKA;EAQE,aAAA,GAAgB,aAAA;;;;;EAMhB,oBAAA;;;;;EAMA,wBAAA,GAA2B,MAAA;;;;AAgC7B;EA1BE,kBAAA;;;;;;EAOA,mBAAA;;;AA+CF;;;EAxCE,qBAAA;;;;;EAMA,gBAAA;AAAA;;;ACzBF;UD+BiB,gBAAA;;;;;EAKf,UAAA;EC0BmB;;;EDrBnB,aAAA;;;;EAKA,QAAA;;;;AC6EF;EDvEE,UAAA;AAAA;;;;;KAOU,gBAAA,GAAmB,QAAA;;;;;EAK7B,UAAA,GAAa,gBAAA;AAAA;;;ADzFf;;;AAAA,UEyBiB,cAAA;;;;;;;;;;;AD1BjB;;;;ECyCC,QAAA,GAAW,KAAA,EAAO,IAAA;EDpCnB;;;;;;;;;;;;;;ECoDC,QAAA,GAAW,OAAA;EDAZ;;;;;;;;;;AA4BA;;;ECbC,OAAA;;;;;;;;;AA9CD;;;;;;EA8DC,iBAAA,GAAoB,iBAAA;AAAA;;;;;;;;;;;AA6DrB;;;;;;;;;;;;;;;AAoHA;;;;;;;;;;;;;;;;;;;;AC3QA;;;;;;;;;;;AAcA;;iBDyIgB,gBAAA,CACf,IAAA,UACA,OAAA,GAAU,cAAA,GACR,QAAA,KAAa,gBAAA;;;;;;;;;ACtHhB;;;;;;;;;;;;;;iBDuOsB,qBAAA,CACrB,IAAA,UACA,OAAA,GAAU,cAAA,GACR,OAAA,CAAQ,QAAA,KAAa,gBAAA;;;;;AF3QxB;UGHiB,eAAA;;EAEhB,OAAA;;EAGA,iBAAA,EAAmB,iBAAA;;EAGnB,QAAA,EAAU,OAAA;AAAA;;;;UAMM,OAAA;EAChB,KAAA;EACA,OAAA;EACA,QAAA;IAAY,KAAA;IAAe,GAAA;EAAA;AAAA;;;;;;;;;;;;;;;AF0C5B;;iBEvBgB,SAAA,CACf,QAAA,UACA,QAAA,GAAU,KAAA,EAAO,IAAA,uBAMf,eAAA;;;AF1CH;;;;;AAKA;;AALA,UGoBiB,KAAA;EHKY;EGH3B,IAAA;;EAGA,IAAA,EAAM,IAAA,CAAK,IAAA;;EAGX,IAAA,EAAM,OAAA;;EAGN,SAAA;AAAA;;;;AH0BF;;;;;;;;;;AA4BA;;;;;;;;;;;;AC3DA;;;;;;;;;;;iBE4CgB,QAAA,CACd,WAAA,UACA,QAAA,GAAU,OAAA,KAOT,KAAA;;;;;;;AH1EH;;;;;;;;;;;;;;;AAoDA;;;;;;;;;;AA4BA;;;;;;;;;;;;AC3DA;;;;;;;;;;iBGgCgB,WAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,gBAAA;;;;;;;;;AJ7DH;;;;;AAKA;;;;;;;;;;;;;;;AAoDA;;;;;;;;;;iBKrBgB,cAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,eAAA;;;;;;;;;ALvCH;;;;;AAKA;;;;;;;;;;;;;;;AAoDA;;;;;;;;;;AA4BA;;iBM/CgB,cAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,eAAA;;;;;;;;;ANzCH;;;;;AAKA;;;;;;;;;;;;;;;AAoDA;;;;;;;;iBOvBgB,cAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,eAAA;;;;;;;;;APrCH;;;;;AAKA;;;;;;;;;;;;;;;AAoDA;;;;;;;;;iBQtBgB,gBAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,iBAAA;;;;;;;;;ARtCH;;;;;AAKA;;;;;;;;;;;;;;;AAoDA;;;;;;;iBSxBgB,sBAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,uBAAA;;;;;;;cCjBU,gBAAA;EAAA,iBACM,SAAA;EAAA,iBACA,IAAA;EAAA,iBACA,OAAA;EAAA,iBACA,OAAA;EVlBnB;;;;;;;EU2BE,WAAA,CACE,SAAA,EAAW,QAAA,IACX,IAAA,UACA,OAAA,GAAS,iBAAA;;;;;;EAwCX,OAAA,CAAA,GAAW,gBAAA;EVxBX;AAMF;;EANE,QUkEQ,SAAA;EV5DO;;;EAAA,QU4FP,YAAA;;;;UAiDA,oBAAA;EVjHE;;;EAAA,QUuJF,cAAA;;;;;UAeA,iBAAA;;;ATjOV;;USiPU,gBAAA;;;;UAyBA,kBAAA;ET5MW;;;EAAA,QSsNX,iBAAA;;;;UAUA,aAAA;;;;UAYA,mBAAA;AAAA;;;;;AV/TV;;;;;;;;iBWegB,gBAAA,CACd,SAAA,EAAW,QAAA,IACX,IAAA,UACA,OAAA,GAAU,iBAAA,GACT,gBAAA"}