eyecite-ts 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +338 -0
- package/dist/annotate/index.cjs +2 -0
- package/dist/annotate/index.cjs.map +1 -0
- package/dist/annotate/index.d.cts +163 -0
- package/dist/annotate/index.d.cts.map +1 -0
- package/dist/annotate/index.d.mts +163 -0
- package/dist/annotate/index.d.mts.map +1 -0
- package/dist/annotate/index.mjs +2 -0
- package/dist/annotate/index.mjs.map +1 -0
- package/dist/citation-8_GvfEuj.d.mts +286 -0
- package/dist/citation-8_GvfEuj.d.mts.map +1 -0
- package/dist/citation-BcY5zzWb.d.cts +286 -0
- package/dist/citation-BcY5zzWb.d.cts.map +1 -0
- package/dist/data/index.cjs +2 -0
- package/dist/data/index.cjs.map +1 -0
- package/dist/data/index.d.cts +116 -0
- package/dist/data/index.d.cts.map +1 -0
- package/dist/data/index.d.mts +116 -0
- package/dist/data/index.d.mts.map +1 -0
- package/dist/data/index.mjs +2 -0
- package/dist/data/index.mjs.map +1 -0
- package/dist/index.cjs +2 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +682 -0
- package/dist/index.d.cts.map +1 -0
- package/dist/index.d.mts +682 -0
- package/dist/index.d.mts.map +1 -0
- package/dist/index.mjs +2 -0
- package/dist/index.mjs.map +1 -0
- package/dist/reporters-BclWimmk.cjs +2 -0
- package/dist/reporters-BclWimmk.cjs.map +1 -0
- package/dist/reporters-DYNnh4O0.mjs +2 -0
- package/dist/reporters-DYNnh4O0.mjs.map +1 -0
- package/package.json +69 -0
package/dist/index.d.cts
ADDED
|
@@ -0,0 +1,682 @@
|
|
|
1
|
+
import { a as FullCaseCitation, c as NeutralCitation, d as StatuteCitation, f as SupraCitation, i as FederalRegisterCitation, l as PublicLawCitation, m as TransformationMap, n as CitationBase, o as IdCitation, p as Span, r as CitationType, s as JournalCitation, t as Citation, u as ShortFormCaseCitation } from "./citation-BcY5zzWb.cjs";
|
|
2
|
+
|
|
3
|
+
//#region src/patterns/casePatterns.d.ts
|
|
4
|
+
/**
|
|
5
|
+
* Case Citation Regex Patterns
|
|
6
|
+
*
|
|
7
|
+
* These patterns are designed for tokenization (broad matching) not extraction.
|
|
8
|
+
* They identify potential case citations in text for the tokenizer (Plan 3).
|
|
9
|
+
* Metadata parsing and validation against reporters-db happens in Phase 2 Plan 5 (extraction layer).
|
|
10
|
+
*
|
|
11
|
+
* Pattern Design Principles (from RESEARCH.md):
|
|
12
|
+
* - Use \b word boundaries to avoid matching "F." in "F.B.I."
|
|
13
|
+
* - Avoid nested quantifiers: (a+)+ causes ReDoS
|
|
14
|
+
* - Keep patterns simple: tokenization only needs to find candidates
|
|
15
|
+
* - Use global flag /g for matchAll()
|
|
16
|
+
*/
|
|
17
|
+
interface Pattern {
|
|
18
|
+
id: string;
|
|
19
|
+
regex: RegExp;
|
|
20
|
+
description: string;
|
|
21
|
+
type: "case" | "statute" | "journal" | "neutral" | "publicLaw" | "federalRegister";
|
|
22
|
+
}
|
|
23
|
+
//#endregion
|
|
24
|
+
//#region src/resolve/types.d.ts
|
|
25
|
+
/**
|
|
26
|
+
* Scope boundary strategy for resolution.
|
|
27
|
+
* Determines how far back to search for antecedent citations.
|
|
28
|
+
*/
|
|
29
|
+
type ScopeStrategy = "paragraph" | "section" | "footnote" | "none";
|
|
30
|
+
/**
|
|
31
|
+
* Options for citation resolution.
|
|
32
|
+
*/
|
|
33
|
+
interface ResolutionOptions {
|
|
34
|
+
/**
|
|
35
|
+
* Scope boundary strategy (default: 'paragraph')
|
|
36
|
+
* - paragraph: Only resolve within same paragraph
|
|
37
|
+
* - section: Only resolve within same section
|
|
38
|
+
* - footnote: Only resolve within same footnote
|
|
39
|
+
* - none: Resolve across entire document
|
|
40
|
+
*/
|
|
41
|
+
scopeStrategy?: ScopeStrategy;
|
|
42
|
+
/**
|
|
43
|
+
* Auto-detect paragraph boundaries from text (default: true)
|
|
44
|
+
* Uses paragraphBoundaryPattern to split text
|
|
45
|
+
*/
|
|
46
|
+
autoDetectParagraphs?: boolean;
|
|
47
|
+
/**
|
|
48
|
+
* Regex pattern to detect paragraph boundaries (default: /\n\n+/)
|
|
49
|
+
* Only used if autoDetectParagraphs is true
|
|
50
|
+
*/
|
|
51
|
+
paragraphBoundaryPattern?: RegExp;
|
|
52
|
+
/**
|
|
53
|
+
* Enable fuzzy party name matching for supra resolution (default: true)
|
|
54
|
+
* Uses Levenshtein distance to handle typos and variations
|
|
55
|
+
*/
|
|
56
|
+
fuzzyPartyMatching?: boolean;
|
|
57
|
+
/**
|
|
58
|
+
* Similarity threshold for fuzzy party matching (default: 0.8)
|
|
59
|
+
* Range: 0-1 where 1.0 is exact match
|
|
60
|
+
* Only used if fuzzyPartyMatching is true
|
|
61
|
+
*/
|
|
62
|
+
partyMatchThreshold?: number;
|
|
63
|
+
/**
|
|
64
|
+
* Allow Id. citations to resolve to other short-form citations (default: false)
|
|
65
|
+
* If true: "Smith v. Jones, 500 F.2d 100" -> "Id." -> "Id. at 105"
|
|
66
|
+
* If false: Second Id. fails to resolve (no full citation between them)
|
|
67
|
+
*/
|
|
68
|
+
allowNestedResolution?: boolean;
|
|
69
|
+
/**
|
|
70
|
+
* Report unresolved citations with failure reasons (default: true)
|
|
71
|
+
* If false: resolution field will be undefined for unresolved citations
|
|
72
|
+
*/
|
|
73
|
+
reportUnresolved?: boolean;
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Result of resolving a short-form citation.
|
|
77
|
+
*/
|
|
78
|
+
interface ResolutionResult {
|
|
79
|
+
/**
|
|
80
|
+
* Index of the citation this resolves to.
|
|
81
|
+
* undefined if resolution failed
|
|
82
|
+
*/
|
|
83
|
+
resolvedTo?: number;
|
|
84
|
+
/**
|
|
85
|
+
* Reason for resolution failure (if any)
|
|
86
|
+
*/
|
|
87
|
+
failureReason?: string;
|
|
88
|
+
/**
|
|
89
|
+
* Warnings about ambiguous or uncertain resolutions
|
|
90
|
+
*/
|
|
91
|
+
warnings?: string[];
|
|
92
|
+
/**
|
|
93
|
+
* Confidence in the resolution (0-1)
|
|
94
|
+
* Factors: party name similarity, scope boundary, citation type match
|
|
95
|
+
*/
|
|
96
|
+
confidence: number;
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Citation with optional resolution metadata.
|
|
100
|
+
* Uses intersection type to add resolution field to any Citation type.
|
|
101
|
+
*/
|
|
102
|
+
type ResolvedCitation = Citation & {
|
|
103
|
+
/**
|
|
104
|
+
* Resolution result for short-form citations.
|
|
105
|
+
* Only present for Id/supra/shortFormCase types
|
|
106
|
+
*/
|
|
107
|
+
resolution?: ResolutionResult;
|
|
108
|
+
};
|
|
109
|
+
//#endregion
|
|
110
|
+
//#region src/extract/extractCitations.d.ts
|
|
111
|
+
/**
|
|
112
|
+
* Options for customizing citation extraction behavior.
|
|
113
|
+
*/
|
|
114
|
+
interface ExtractOptions {
|
|
115
|
+
/**
|
|
116
|
+
* Custom text cleaners (overrides defaults).
|
|
117
|
+
*
|
|
118
|
+
* If provided, these cleaners replace the default pipeline:
|
|
119
|
+
* [stripHtmlTags, normalizeWhitespace, normalizeUnicode, fixSmartQuotes]
|
|
120
|
+
*
|
|
121
|
+
* @example
|
|
122
|
+
* ```typescript
|
|
123
|
+
* // Use only HTML stripping, skip Unicode normalization
|
|
124
|
+
* const citations = extractCitations(text, {
|
|
125
|
+
* cleaners: [stripHtmlTags]
|
|
126
|
+
* })
|
|
127
|
+
* ```
|
|
128
|
+
*/
|
|
129
|
+
cleaners?: Array<(text: string) => string>;
|
|
130
|
+
/**
|
|
131
|
+
* Custom regex patterns (overrides defaults).
|
|
132
|
+
*
|
|
133
|
+
* If provided, these patterns replace the default pattern set:
|
|
134
|
+
* [casePatterns, statutePatterns, journalPatterns, neutralPatterns, shortFormPatterns]
|
|
135
|
+
*
|
|
136
|
+
* @example
|
|
137
|
+
* ```typescript
|
|
138
|
+
* // Extract only case citations
|
|
139
|
+
* const citations = extractCitations(text, {
|
|
140
|
+
* patterns: casePatterns
|
|
141
|
+
* })
|
|
142
|
+
* ```
|
|
143
|
+
*/
|
|
144
|
+
patterns?: Pattern[];
|
|
145
|
+
/**
|
|
146
|
+
* Resolve short-form citations to their full antecedents (default: false).
|
|
147
|
+
*
|
|
148
|
+
* If true, returns ResolvedCitation[] with resolution metadata for short-form citations
|
|
149
|
+
* (Id., supra, short-form case). Full citations are unchanged.
|
|
150
|
+
*
|
|
151
|
+
* @example
|
|
152
|
+
* ```typescript
|
|
153
|
+
* const text = "Smith v. Jones, 500 F.2d 100 (1974). Id. at 105."
|
|
154
|
+
* const citations = extractCitations(text, { resolve: true })
|
|
155
|
+
* // citations[1].resolution.resolvedTo === 0 (points to Smith v. Jones)
|
|
156
|
+
* ```
|
|
157
|
+
*/
|
|
158
|
+
resolve?: boolean;
|
|
159
|
+
/**
|
|
160
|
+
* Options for citation resolution (only used if resolve: true).
|
|
161
|
+
*
|
|
162
|
+
* @example
|
|
163
|
+
* ```typescript
|
|
164
|
+
* const citations = extractCitations(text, {
|
|
165
|
+
* resolve: true,
|
|
166
|
+
* resolutionOptions: {
|
|
167
|
+
* scopeStrategy: 'paragraph',
|
|
168
|
+
* fuzzyPartyMatching: true
|
|
169
|
+
* }
|
|
170
|
+
* })
|
|
171
|
+
* ```
|
|
172
|
+
*/
|
|
173
|
+
resolutionOptions?: ResolutionOptions;
|
|
174
|
+
}
|
|
175
|
+
/**
|
|
176
|
+
* Extracts legal citations from text using the full parsing pipeline.
|
|
177
|
+
*
|
|
178
|
+
* Pipeline flow:
|
|
179
|
+
* 1. **Clean:** Remove HTML tags, normalize Unicode, fix smart quotes
|
|
180
|
+
* 2. **Tokenize:** Apply regex patterns to find citation candidates
|
|
181
|
+
* 3. **Extract:** Parse metadata (volume, reporter, page, etc.)
|
|
182
|
+
* 4. **Translate:** Map positions from cleaned text back to original text
|
|
183
|
+
*
|
|
184
|
+
* This function is synchronous because all stages (cleaning, tokenization,
|
|
185
|
+
* extraction) are synchronous. For async operations (e.g., future reporters-db
|
|
186
|
+
* lookups), use extractCitationsAsync().
|
|
187
|
+
*
|
|
188
|
+
* Position tracking:
|
|
189
|
+
* - TransformationMap is built during cleaning
|
|
190
|
+
* - Tokens contain positions in cleaned text (cleanStart/cleanEnd)
|
|
191
|
+
* - Extraction translates cleaned positions → original positions
|
|
192
|
+
* - Final citations have originalStart/originalEnd pointing to input text
|
|
193
|
+
*
|
|
194
|
+
* Warnings from cleaning layer are attached to all extracted citations.
|
|
195
|
+
*
|
|
196
|
+
* @param text - Raw text to extract citations from (may contain HTML, Unicode)
|
|
197
|
+
* @param options - Optional customization (cleaners, patterns)
|
|
198
|
+
* @returns Array of citations with parsed metadata and accurate positions
|
|
199
|
+
*
|
|
200
|
+
* @example
|
|
201
|
+
* ```typescript
|
|
202
|
+
* const text = "See Smith v. Doe, 500 F.2d 123 (9th Cir. 2020)"
|
|
203
|
+
* const citations = extractCitations(text)
|
|
204
|
+
* // citations[0] = {
|
|
205
|
+
* // type: "case",
|
|
206
|
+
* // volume: 500,
|
|
207
|
+
* // reporter: "F.2d",
|
|
208
|
+
* // page: 123,
|
|
209
|
+
* // court: "9th Cir.",
|
|
210
|
+
* // year: 2020,
|
|
211
|
+
* // span: { originalStart: 18, originalEnd: 30, ... }
|
|
212
|
+
* // }
|
|
213
|
+
* ```
|
|
214
|
+
*
|
|
215
|
+
* @example
|
|
216
|
+
* ```typescript
|
|
217
|
+
* // Extract from HTML
|
|
218
|
+
* const html = "<p>In <b>Smith</b>, 500 F.2d 123, the court held...</p>"
|
|
219
|
+
* const citations = extractCitations(html)
|
|
220
|
+
* // HTML is stripped, positions point to original HTML
|
|
221
|
+
* ```
|
|
222
|
+
*
|
|
223
|
+
* @example
|
|
224
|
+
* ```typescript
|
|
225
|
+
* // Extract multiple citation types
|
|
226
|
+
* const text = "See 42 U.S.C. § 1983; Smith, 500 F.2d 123; 123 Harv. L. Rev. 456"
|
|
227
|
+
* const citations = extractCitations(text)
|
|
228
|
+
* // citations[0].type === "statute"
|
|
229
|
+
* // citations[1].type === "case"
|
|
230
|
+
* // citations[2].type === "journal"
|
|
231
|
+
* ```
|
|
232
|
+
*/
|
|
233
|
+
declare function extractCitations(text: string, options?: ExtractOptions): Citation[] | ResolvedCitation[];
|
|
234
|
+
/**
|
|
235
|
+
* Asynchronous version of extractCitations().
|
|
236
|
+
*
|
|
237
|
+
* Currently wraps the synchronous extractCitations() function. This API
|
|
238
|
+
* exists for future extensibility when async operations are added:
|
|
239
|
+
* - Async reporters-db lookups (Phase 3)
|
|
240
|
+
* - Async resolution/annotation services
|
|
241
|
+
* - Web Workers for parallel processing
|
|
242
|
+
*
|
|
243
|
+
* For now, this function immediately resolves with the same results as
|
|
244
|
+
* the synchronous version.
|
|
245
|
+
*
|
|
246
|
+
* @param text - Raw text to extract citations from
|
|
247
|
+
* @param options - Optional customization (cleaners, patterns, resolve)
|
|
248
|
+
* @returns Promise resolving to array of citations (or ResolvedCitation[] if resolve: true)
|
|
249
|
+
*
|
|
250
|
+
* @example
|
|
251
|
+
* ```typescript
|
|
252
|
+
* const citations = await extractCitationsAsync(text, { resolve: true })
|
|
253
|
+
* // Returns ResolvedCitation[] with resolution metadata
|
|
254
|
+
* ```
|
|
255
|
+
*/
|
|
256
|
+
declare function extractCitationsAsync(text: string, options?: ExtractOptions): Promise<Citation[] | ResolvedCitation[]>;
|
|
257
|
+
//#endregion
|
|
258
|
+
//#region src/clean/cleanText.d.ts
|
|
259
|
+
/**
|
|
260
|
+
* Result of text cleaning operation.
|
|
261
|
+
*/
|
|
262
|
+
interface CleanTextResult {
|
|
263
|
+
/** Cleaned text after all transformations */
|
|
264
|
+
cleaned: string;
|
|
265
|
+
/** Position mappings between cleaned and original text */
|
|
266
|
+
transformationMap: TransformationMap;
|
|
267
|
+
/** Warnings generated during cleaning (currently unused) */
|
|
268
|
+
warnings: Warning[];
|
|
269
|
+
}
|
|
270
|
+
/**
|
|
271
|
+
* Warning generated during text cleaning.
|
|
272
|
+
*/
|
|
273
|
+
interface Warning {
|
|
274
|
+
level: "error" | "warning" | "info";
|
|
275
|
+
message: string;
|
|
276
|
+
position: {
|
|
277
|
+
start: number;
|
|
278
|
+
end: number;
|
|
279
|
+
};
|
|
280
|
+
}
|
|
281
|
+
/**
|
|
282
|
+
* Clean text using a pipeline of transformation functions.
|
|
283
|
+
*
|
|
284
|
+
* Applies cleaners sequentially while maintaining accurate position mappings
|
|
285
|
+
* between the original and cleaned text. This enables citation extraction from
|
|
286
|
+
* cleaned text while reporting positions in the original text.
|
|
287
|
+
*
|
|
288
|
+
* @param original - Original input text
|
|
289
|
+
* @param cleaners - Array of cleaner functions to apply (default: stripHtmlTags, normalizeWhitespace, normalizeUnicode, fixSmartQuotes)
|
|
290
|
+
* @returns Cleaned text with position mappings and warnings
|
|
291
|
+
*
|
|
292
|
+
* @example
|
|
293
|
+
* const result = cleanText("Smith v. <b>Doe</b>, 500 F.2d 123")
|
|
294
|
+
* // result.cleaned: "Smith v. Doe, 500 F.2d 123"
|
|
295
|
+
* // result.transformationMap tracks position shifts from HTML removal
|
|
296
|
+
*/
|
|
297
|
+
declare function cleanText(original: string, cleaners?: Array<(text: string) => string>): CleanTextResult;
|
|
298
|
+
//#endregion
|
|
299
|
+
//#region src/tokenize/tokenizer.d.ts
|
|
300
|
+
/**
|
|
301
|
+
* A token representing a potential citation found in cleaned text.
|
|
302
|
+
*
|
|
303
|
+
* Tokens are produced by applying regex patterns to cleaned text.
|
|
304
|
+
* They include matched text, position in cleaned text, and pattern metadata
|
|
305
|
+
* for use in the extraction layer.
|
|
306
|
+
*/
|
|
307
|
+
interface Token {
|
|
308
|
+
/** Matched text from input */
|
|
309
|
+
text: string;
|
|
310
|
+
/** Position in cleaned text (cleanStart/cleanEnd only, no original positions yet) */
|
|
311
|
+
span: Pick<Span, "cleanStart" | "cleanEnd">;
|
|
312
|
+
/** Pattern type that matched this token */
|
|
313
|
+
type: Pattern["type"];
|
|
314
|
+
/** Pattern ID that matched this token */
|
|
315
|
+
patternId: string;
|
|
316
|
+
}
|
|
317
|
+
/**
|
|
318
|
+
* Tokenizes cleaned text by applying regex patterns to find citation candidates.
|
|
319
|
+
*
|
|
320
|
+
* For each pattern in the patterns array:
|
|
321
|
+
* 1. Apply pattern.regex.matchAll(cleanedText)
|
|
322
|
+
* 2. Create Token for each match with position, text, and pattern metadata
|
|
323
|
+
* 3. Collect all tokens from all patterns
|
|
324
|
+
* 4. Sort by cleanStart position (ascending)
|
|
325
|
+
*
|
|
326
|
+
* Timeout protection: If a pattern throws (e.g., ReDoS), skip it and continue
|
|
327
|
+
* with remaining patterns. Logs warning to console.
|
|
328
|
+
*
|
|
329
|
+
* Note: This function is synchronous because regex matching is inherently
|
|
330
|
+
* synchronous. This enables both sync (extractCitations) and async
|
|
331
|
+
* (extractCitationsAsync) APIs in Plan 6.
|
|
332
|
+
*
|
|
333
|
+
* @param cleanedText - Text that has been cleaned by cleanText() from Plan 1
|
|
334
|
+
* @param patterns - Regex patterns to apply (defaults to all patterns from Plan 2)
|
|
335
|
+
* @returns Array of tokens sorted by position (cleanStart ascending)
|
|
336
|
+
*
|
|
337
|
+
* @example
|
|
338
|
+
* ```typescript
|
|
339
|
+
* import { tokenize } from '@/tokenize'
|
|
340
|
+
* import { cleanText } from '@/clean'
|
|
341
|
+
*
|
|
342
|
+
* const original = "See Smith v. Doe, 500 F.2d 123 (9th Cir. 2020)"
|
|
343
|
+
* const { cleanedText } = cleanText(original)
|
|
344
|
+
* const tokens = tokenize(cleanedText)
|
|
345
|
+
* // tokens[0] = {
|
|
346
|
+
* // text: "500 F.2d 123",
|
|
347
|
+
* // span: { cleanStart: 18, cleanEnd: 30 },
|
|
348
|
+
* // type: "case",
|
|
349
|
+
* // patternId: "federal-reporter"
|
|
350
|
+
* // }
|
|
351
|
+
* ```
|
|
352
|
+
*/
|
|
353
|
+
declare function tokenize(cleanedText: string, patterns?: Pattern[]): Token[];
|
|
354
|
+
//#endregion
|
|
355
|
+
//#region src/extract/extractCase.d.ts
|
|
356
|
+
/**
|
|
357
|
+
* Extracts case citation metadata from a tokenized citation.
|
|
358
|
+
*
|
|
359
|
+
* Parses token text to extract:
|
|
360
|
+
* - Volume: Leading digits (e.g., "500" from "500 F.2d 123")
|
|
361
|
+
* - Reporter: Alphabetic abbreviation (e.g., "F.2d")
|
|
362
|
+
* - Page: Trailing digits after reporter (e.g., "123")
|
|
363
|
+
* - Pincite: Optional page reference after comma (e.g., ", 125")
|
|
364
|
+
* - Court: Optional court abbreviation in parentheses (e.g., "(9th Cir.)")
|
|
365
|
+
* - Year: Optional year in parentheses (e.g., "(2020)")
|
|
366
|
+
*
|
|
367
|
+
* Confidence scoring:
|
|
368
|
+
* - Base: 0.5
|
|
369
|
+
* - Common reporter pattern (F., U.S., etc.): +0.3
|
|
370
|
+
* - Valid year (not future): +0.2
|
|
371
|
+
* - Capped at 1.0
|
|
372
|
+
*
|
|
373
|
+
* Position translation:
|
|
374
|
+
* - Uses TransformationMap to convert clean positions → original positions
|
|
375
|
+
* - cleanStart/cleanEnd from token span
|
|
376
|
+
* - originalStart/originalEnd via transformationMap.cleanToOriginal
|
|
377
|
+
*
|
|
378
|
+
* Note: This function does NOT validate against reporters-db. That happens
|
|
379
|
+
* in Phase 3 (resolution layer). Phase 2 extraction only parses structure.
|
|
380
|
+
*
|
|
381
|
+
* @param token - Token from tokenizer containing matched text and clean positions
|
|
382
|
+
* @param transformationMap - Position mapping from clean → original text
|
|
383
|
+
* @returns FullCaseCitation with parsed metadata and translated positions
|
|
384
|
+
*
|
|
385
|
+
* @example
|
|
386
|
+
* ```typescript
|
|
387
|
+
* const token = {
|
|
388
|
+
* text: "500 F.2d 123, 125",
|
|
389
|
+
* span: { cleanStart: 10, cleanEnd: 27 },
|
|
390
|
+
* type: "case",
|
|
391
|
+
* patternId: "federal-reporter"
|
|
392
|
+
* }
|
|
393
|
+
* const citation = extractCase(token, transformationMap)
|
|
394
|
+
* // citation = {
|
|
395
|
+
* // type: "case",
|
|
396
|
+
* // text: "500 F.2d 123, 125",
|
|
397
|
+
* // volume: 500,
|
|
398
|
+
* // reporter: "F.2d",
|
|
399
|
+
* // page: 123,
|
|
400
|
+
* // pincite: 125,
|
|
401
|
+
* // span: { cleanStart: 10, cleanEnd: 27, originalStart: 10, originalEnd: 27 },
|
|
402
|
+
* // confidence: 0.8,
|
|
403
|
+
* // ...
|
|
404
|
+
* // }
|
|
405
|
+
* ```
|
|
406
|
+
*/
|
|
407
|
+
declare function extractCase(token: Token, transformationMap: TransformationMap): FullCaseCitation;
|
|
408
|
+
//#endregion
|
|
409
|
+
//#region src/extract/extractStatute.d.ts
|
|
410
|
+
/**
|
|
411
|
+
* Extracts statute citation metadata from a tokenized citation.
|
|
412
|
+
*
|
|
413
|
+
* Parses token text to extract:
|
|
414
|
+
* - Title: Optional leading digits (e.g., "42" from "42 U.S.C. § 1983")
|
|
415
|
+
* - Code: Statutory code abbreviation (e.g., "U.S.C.", "Cal. Civ. Code")
|
|
416
|
+
* - Section: Section number after § symbol (e.g., "1983")
|
|
417
|
+
* - Subsections: Optional parenthetical subdivisions (e.g., "(a)(1)")
|
|
418
|
+
*
|
|
419
|
+
* Confidence scoring:
|
|
420
|
+
* - Base: 0.5
|
|
421
|
+
* - Known code pattern (U.S.C., C.F.R., state codes): +0.3
|
|
422
|
+
* - Capped at 1.0
|
|
423
|
+
*
|
|
424
|
+
* @param token - Token from tokenizer containing matched text and clean positions
|
|
425
|
+
* @param transformationMap - Position mapping from clean → original text
|
|
426
|
+
* @returns StatuteCitation with parsed metadata and translated positions
|
|
427
|
+
*
|
|
428
|
+
* @example
|
|
429
|
+
* ```typescript
|
|
430
|
+
* const token = {
|
|
431
|
+
* text: "42 U.S.C. § 1983",
|
|
432
|
+
* span: { cleanStart: 10, cleanEnd: 26 },
|
|
433
|
+
* type: "statute",
|
|
434
|
+
* patternId: "usc"
|
|
435
|
+
* }
|
|
436
|
+
* const citation = extractStatute(token, transformationMap)
|
|
437
|
+
* // citation = {
|
|
438
|
+
* // type: "statute",
|
|
439
|
+
* // title: 42,
|
|
440
|
+
* // code: "U.S.C.",
|
|
441
|
+
* // section: "1983",
|
|
442
|
+
* // ...
|
|
443
|
+
* // }
|
|
444
|
+
* ```
|
|
445
|
+
*/
|
|
446
|
+
declare function extractStatute(token: Token, transformationMap: TransformationMap): StatuteCitation;
|
|
447
|
+
//#endregion
|
|
448
|
+
//#region src/extract/extractJournal.d.ts
|
|
449
|
+
/**
|
|
450
|
+
* Extracts journal citation metadata from a tokenized citation.
|
|
451
|
+
*
|
|
452
|
+
* Parses token text to extract:
|
|
453
|
+
* - Volume: Leading digits (e.g., "123" from "123 Harv. L. Rev. 456")
|
|
454
|
+
* - Journal: Journal abbreviation (e.g., "Harv. L. Rev.")
|
|
455
|
+
* - Page: Starting page number (e.g., "456")
|
|
456
|
+
* - Pincite: Optional specific page reference after comma (e.g., ", 458")
|
|
457
|
+
*
|
|
458
|
+
* Confidence scoring:
|
|
459
|
+
* - Base: 0.6 (journal validation happens in Phase 3)
|
|
460
|
+
*
|
|
461
|
+
* Note: Author and title extraction from preceding text is not implemented
|
|
462
|
+
* in Phase 2. That requires context analysis in Phase 3.
|
|
463
|
+
*
|
|
464
|
+
* @param token - Token from tokenizer containing matched text and clean positions
|
|
465
|
+
* @param transformationMap - Position mapping from clean → original text
|
|
466
|
+
* @returns JournalCitation with parsed metadata and translated positions
|
|
467
|
+
*
|
|
468
|
+
* @example
|
|
469
|
+
* ```typescript
|
|
470
|
+
* const token = {
|
|
471
|
+
* text: "123 Harv. L. Rev. 456",
|
|
472
|
+
* span: { cleanStart: 10, cleanEnd: 31 },
|
|
473
|
+
* type: "journal",
|
|
474
|
+
* patternId: "journal-standard"
|
|
475
|
+
* }
|
|
476
|
+
* const citation = extractJournal(token, transformationMap)
|
|
477
|
+
* // citation = {
|
|
478
|
+
* // type: "journal",
|
|
479
|
+
* // volume: 123,
|
|
480
|
+
* // journal: "Harv. L. Rev.",
|
|
481
|
+
* // abbreviation: "Harv. L. Rev.",
|
|
482
|
+
* // page: 456,
|
|
483
|
+
* // ...
|
|
484
|
+
* // }
|
|
485
|
+
* ```
|
|
486
|
+
*/
|
|
487
|
+
declare function extractJournal(token: Token, transformationMap: TransformationMap): JournalCitation;
|
|
488
|
+
//#endregion
|
|
489
|
+
//#region src/extract/extractNeutral.d.ts
|
|
490
|
+
/**
|
|
491
|
+
* Extracts neutral citation metadata from a tokenized citation.
|
|
492
|
+
*
|
|
493
|
+
* Parses token text to extract:
|
|
494
|
+
* - Year: 4-digit year (e.g., "2020")
|
|
495
|
+
* - Court: Vendor identifier (e.g., "WL", "U.S. LEXIS")
|
|
496
|
+
* - Document number: Unique document identifier (e.g., "123456")
|
|
497
|
+
*
|
|
498
|
+
* Confidence scoring:
|
|
499
|
+
* - 1.0 (neutral format is unambiguous and standardized)
|
|
500
|
+
*
|
|
501
|
+
* @param token - Token from tokenizer containing matched text and clean positions
|
|
502
|
+
* @param transformationMap - Position mapping from clean → original text
|
|
503
|
+
* @returns NeutralCitation with parsed metadata and translated positions
|
|
504
|
+
*
|
|
505
|
+
* @example
|
|
506
|
+
* ```typescript
|
|
507
|
+
* const token = {
|
|
508
|
+
* text: "2020 WL 123456",
|
|
509
|
+
* span: { cleanStart: 10, cleanEnd: 24 },
|
|
510
|
+
* type: "neutral",
|
|
511
|
+
* patternId: "westlaw-neutral"
|
|
512
|
+
* }
|
|
513
|
+
* const citation = extractNeutral(token, transformationMap)
|
|
514
|
+
* // citation = {
|
|
515
|
+
* // type: "neutral",
|
|
516
|
+
* // year: 2020,
|
|
517
|
+
* // court: "WL",
|
|
518
|
+
* // documentNumber: "123456",
|
|
519
|
+
* // confidence: 1.0,
|
|
520
|
+
* // ...
|
|
521
|
+
* // }
|
|
522
|
+
* ```
|
|
523
|
+
*/
|
|
524
|
+
declare function extractNeutral(token: Token, transformationMap: TransformationMap): NeutralCitation;
|
|
525
|
+
//#endregion
|
|
526
|
+
//#region src/extract/extractPublicLaw.d.ts
|
|
527
|
+
/**
|
|
528
|
+
* Extracts public law citation metadata from a tokenized citation.
|
|
529
|
+
*
|
|
530
|
+
* Parses token text to extract:
|
|
531
|
+
* - Congress: Congress number (e.g., "116" from "Pub. L. No. 116-283")
|
|
532
|
+
* - Law number: Law number within that Congress (e.g., "283")
|
|
533
|
+
*
|
|
534
|
+
* Confidence scoring:
|
|
535
|
+
* - 0.9 (public law format is fairly standard)
|
|
536
|
+
*
|
|
537
|
+
* Note: Bill title extraction from nearby text is not implemented in Phase 2.
|
|
538
|
+
* That requires context analysis in Phase 3.
|
|
539
|
+
*
|
|
540
|
+
* @param token - Token from tokenizer containing matched text and clean positions
|
|
541
|
+
* @param transformationMap - Position mapping from clean → original text
|
|
542
|
+
* @returns PublicLawCitation with parsed metadata and translated positions
|
|
543
|
+
*
|
|
544
|
+
* @example
|
|
545
|
+
* ```typescript
|
|
546
|
+
* const token = {
|
|
547
|
+
* text: "Pub. L. No. 116-283",
|
|
548
|
+
* span: { cleanStart: 10, cleanEnd: 29 },
|
|
549
|
+
* type: "publicLaw",
|
|
550
|
+
* patternId: "public-law"
|
|
551
|
+
* }
|
|
552
|
+
* const citation = extractPublicLaw(token, transformationMap)
|
|
553
|
+
* // citation = {
|
|
554
|
+
* // type: "publicLaw",
|
|
555
|
+
* // congress: 116,
|
|
556
|
+
* // lawNumber: 283,
|
|
557
|
+
* // confidence: 0.9,
|
|
558
|
+
* // ...
|
|
559
|
+
* // }
|
|
560
|
+
* ```
|
|
561
|
+
*/
|
|
562
|
+
declare function extractPublicLaw(token: Token, transformationMap: TransformationMap): PublicLawCitation;
|
|
563
|
+
//#endregion
|
|
564
|
+
//#region src/extract/extractFederalRegister.d.ts
|
|
565
|
+
/**
|
|
566
|
+
* Extracts Federal Register citation metadata from a tokenized citation.
|
|
567
|
+
*
|
|
568
|
+
* Parses token text to extract:
|
|
569
|
+
* - Volume: Federal Register volume number (e.g., "85")
|
|
570
|
+
* - Page: Page number (e.g., "12345")
|
|
571
|
+
* - Year: Optional publication year in parentheses (e.g., "(2021)")
|
|
572
|
+
*
|
|
573
|
+
* Confidence scoring:
|
|
574
|
+
* - 0.9 (Federal Register format is standardized)
|
|
575
|
+
*
|
|
576
|
+
* @param token - Token from tokenizer containing matched text and clean positions
|
|
577
|
+
* @param transformationMap - Position mapping from clean → original text
|
|
578
|
+
* @returns FederalRegisterCitation with parsed metadata and translated positions
|
|
579
|
+
*
|
|
580
|
+
* @example
|
|
581
|
+
* ```typescript
|
|
582
|
+
* const token = {
|
|
583
|
+
* text: "85 Fed. Reg. 12345",
|
|
584
|
+
* span: { cleanStart: 10, cleanEnd: 28 },
|
|
585
|
+
* type: "federalRegister",
|
|
586
|
+
* patternId: "federal-register"
|
|
587
|
+
* }
|
|
588
|
+
* const citation = extractFederalRegister(token, transformationMap)
|
|
589
|
+
* // citation = {
|
|
590
|
+
* // type: "federalRegister",
|
|
591
|
+
* // volume: 85,
|
|
592
|
+
* // page: 12345,
|
|
593
|
+
* // confidence: 0.9,
|
|
594
|
+
* // ...
|
|
595
|
+
* // }
|
|
596
|
+
* ```
|
|
597
|
+
*/
|
|
598
|
+
declare function extractFederalRegister(token: Token, transformationMap: TransformationMap): FederalRegisterCitation;
|
|
599
|
+
//#endregion
|
|
600
|
+
//#region src/resolve/DocumentResolver.d.ts
|
|
601
|
+
/**
|
|
602
|
+
* Document-scoped resolver that processes citations sequentially
|
|
603
|
+
* and resolves short-form citations to their antecedents.
|
|
604
|
+
*/
|
|
605
|
+
declare class DocumentResolver {
|
|
606
|
+
private readonly citations;
|
|
607
|
+
private readonly text;
|
|
608
|
+
private readonly options;
|
|
609
|
+
private readonly context;
|
|
610
|
+
/**
|
|
611
|
+
* Creates a new DocumentResolver.
|
|
612
|
+
*
|
|
613
|
+
* @param citations - All citations in document (in order of appearance)
|
|
614
|
+
* @param text - Original document text
|
|
615
|
+
* @param options - Resolution options
|
|
616
|
+
*/
|
|
617
|
+
constructor(citations: Citation[], text: string, options?: ResolutionOptions);
|
|
618
|
+
/**
|
|
619
|
+
* Resolves all citations in the document.
|
|
620
|
+
*
|
|
621
|
+
* @returns Array of citations with resolution metadata
|
|
622
|
+
*/
|
|
623
|
+
resolve(): ResolvedCitation[];
|
|
624
|
+
/**
|
|
625
|
+
* Resolves Id. citation to immediately preceding full case citation.
|
|
626
|
+
*/
|
|
627
|
+
private resolveId;
|
|
628
|
+
/**
|
|
629
|
+
* Resolves supra citation by matching party name.
|
|
630
|
+
*/
|
|
631
|
+
private resolveSupra;
|
|
632
|
+
/**
|
|
633
|
+
* Resolves short-form case citation by matching volume/reporter.
|
|
634
|
+
*/
|
|
635
|
+
private resolveShortFormCase;
|
|
636
|
+
/**
|
|
637
|
+
* Checks if a citation is a full citation (not short-form).
|
|
638
|
+
*/
|
|
639
|
+
private isFullCitation;
|
|
640
|
+
/**
|
|
641
|
+
* Tracks a full citation in the resolution history.
|
|
642
|
+
* Extracts party name for supra resolution.
|
|
643
|
+
*/
|
|
644
|
+
private trackFullCitation;
|
|
645
|
+
/**
|
|
646
|
+
* Extracts party name from full case citation text.
|
|
647
|
+
* Handles "Party v. Party" format by looking at text before citation span.
|
|
648
|
+
*/
|
|
649
|
+
private extractPartyName;
|
|
650
|
+
/**
|
|
651
|
+
* Normalizes party name for matching.
|
|
652
|
+
*/
|
|
653
|
+
private normalizePartyName;
|
|
654
|
+
/**
|
|
655
|
+
* Normalizes reporter abbreviation for matching.
|
|
656
|
+
*/
|
|
657
|
+
private normalizeReporter;
|
|
658
|
+
/**
|
|
659
|
+
* Checks if antecedent citation is within scope boundary.
|
|
660
|
+
*/
|
|
661
|
+
private isWithinScope;
|
|
662
|
+
/**
|
|
663
|
+
* Creates a failure result for unresolved citations.
|
|
664
|
+
*/
|
|
665
|
+
private createFailureResult;
|
|
666
|
+
}
|
|
667
|
+
//#endregion
|
|
668
|
+
//#region src/resolve/index.d.ts
|
|
669
|
+
/**
|
|
670
|
+
* Resolves short-form citations to their full antecedents.
|
|
671
|
+
*
|
|
672
|
+
* Convenience wrapper around DocumentResolver that handles common use cases.
|
|
673
|
+
*
|
|
674
|
+
* @param citations - Extracted citations in order of appearance
|
|
675
|
+
* @param text - Original document text
|
|
676
|
+
* @param options - Resolution options
|
|
677
|
+
* @returns Citations with resolution metadata
|
|
678
|
+
*/
|
|
679
|
+
declare function resolveCitations(citations: Citation[], text: string, options?: ResolutionOptions): ResolvedCitation[];
|
|
680
|
+
//#endregion
|
|
681
|
+
export { type Citation, type CitationBase, type CitationType, type CleanTextResult, DocumentResolver, type ExtractOptions, type FederalRegisterCitation, type FullCaseCitation, type IdCitation, type JournalCitation, type NeutralCitation, type PublicLawCitation, type ResolutionOptions, type ResolutionResult, type ResolvedCitation, type ScopeStrategy, type ShortFormCaseCitation, type Span, type StatuteCitation, type SupraCitation, type Token, type TransformationMap, type Warning, cleanText, extractCase, extractCitations, extractCitationsAsync, extractFederalRegister, extractJournal, extractNeutral, extractPublicLaw, extractStatute, resolveCitations, tokenize };
|
|
682
|
+
//# sourceMappingURL=index.d.cts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.cts","names":[],"sources":["../src/patterns/casePatterns.ts","../src/resolve/types.ts","../src/extract/extractCitations.ts","../src/clean/cleanText.ts","../src/tokenize/tokenizer.ts","../src/extract/extractCase.ts","../src/extract/extractStatute.ts","../src/extract/extractJournal.ts","../src/extract/extractNeutral.ts","../src/extract/extractPublicLaw.ts","../src/extract/extractFederalRegister.ts","../src/resolve/DocumentResolver.ts","../src/resolve/index.ts"],"mappings":";;;;;;AAcA;;;;;;;;;;UAAiB,OAAA;EACf,EAAA;EACA,KAAA,EAAO,MAAA;EACP,WAAA;EACA,IAAA;AAAA;;;;;;;KCLU,aAAA;;;;UAKK,iBAAA;;AALjB;;;;;AAKA;EAQE,aAAA,GAAgB,aAAA;;;;;EAMhB,oBAAA;;;;;EAMA,wBAAA,GAA2B,MAAA;;;;AAgC7B;EA1BE,kBAAA;;;;;;EAOA,mBAAA;;;AA+CF;;;EAxCE,qBAAA;;;;;EAMA,gBAAA;AAAA;;;ACzBF;UD+BiB,gBAAA;;;;;EAKf,UAAA;EC0BmB;;;EDrBnB,aAAA;;;;EAKA,QAAA;;;;AC6EF;EDvEE,UAAA;AAAA;;;;;KAOU,gBAAA,GAAmB,QAAA;;;;;EAK7B,UAAA,GAAa,gBAAA;AAAA;;;ADzFf;;;AAAA,UEyBiB,cAAA;;;;;;;;;;;AD1BjB;;;;ECyCC,QAAA,GAAW,KAAA,EAAO,IAAA;EDpCnB;;;;;;;;;;;;;;ECoDC,QAAA,GAAW,OAAA;EDAZ;;;;;;;;;;AA4BA;;;ECbC,OAAA;;;;;;;;;AA9CD;;;;;;EA8DC,iBAAA,GAAoB,iBAAA;AAAA;;;;;;;;;;;AA6DrB;;;;;;;;;;;;;;;AAoHA;;;;;;;;;;;;;;;;;;;;AC3QA;;;;;;;;;;;AAcA;;iBDyIgB,gBAAA,CACf,IAAA,UACA,OAAA,GAAU,cAAA,GACR,QAAA,KAAa,gBAAA;;;;;;;;;ACtHhB;;;;;;;;;;;;;;iBDuOsB,qBAAA,CACrB,IAAA,UACA,OAAA,GAAU,cAAA,GACR,OAAA,CAAQ,QAAA,KAAa,gBAAA;;;;;AF3QxB;UGHiB,eAAA;;EAEhB,OAAA;;EAGA,iBAAA,EAAmB,iBAAA;;EAGnB,QAAA,EAAU,OAAA;AAAA;;;;UAMM,OAAA;EAChB,KAAA;EACA,OAAA;EACA,QAAA;IAAY,KAAA;IAAe,GAAA;EAAA;AAAA;;;;;;;;;;;;;;;AF0C5B;;iBEvBgB,SAAA,CACf,QAAA,UACA,QAAA,GAAU,KAAA,EAAO,IAAA,uBAMf,eAAA;;;AF1CH;;;;;AAKA;;AALA,UGoBiB,KAAA;EHKY;EGH3B,IAAA;;EAGA,IAAA,EAAM,IAAA,CAAK,IAAA;;EAGX,IAAA,EAAM,OAAA;;EAGN,SAAA;AAAA;;;;AH0BF;;;;;;;;;;AA4BA;;;;;;;;;;;;AC3DA;;;;;;;;;;;iBE4CgB,QAAA,CACd,WAAA,UACA,QAAA,GAAU,OAAA,KAOT,KAAA;;;;;;;AH1EH;;;;;;;;;;;;;;;AAoDA;;;;;;;;;;AA4BA;;;;;;;;;;;;AC3DA;;;;;;;;;;iBGgCgB,WAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,gBAAA;;;;;;;;;AJ7DH;;;;;AAKA;;;;;;;;;;;;;;;AAoDA;;;;;;;;;;iBKrBgB,cAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,eAAA;;;;;;;;;ALvCH;;;;;AAKA;;;;;;;;;;;;;;;AAoDA;;;;;;;;;;AA4BA;;iBM/CgB,cAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,eAAA;;;;;;;;;ANzCH;;;;;AAKA;;;;;;;;;;;;;;;AAoDA;;;;;;;;iBOvBgB,cAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,eAAA;;;;;;;;;APrCH;;;;;AAKA;;;;;;;;;;;;;;;AAoDA;;;;;;;;;iBQtBgB,gBAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,iBAAA;;;;;;;;;ARtCH;;;;;AAKA;;;;;;;;;;;;;;;AAoDA;;;;;;;iBSxBgB,sBAAA,CACf,KAAA,EAAO,KAAA,EACP,iBAAA,EAAmB,iBAAA,GACjB,uBAAA;;;;;;;cCjBU,gBAAA;EAAA,iBACM,SAAA;EAAA,iBACA,IAAA;EAAA,iBACA,OAAA;EAAA,iBACA,OAAA;EVlBnB;;;;;;;EU2BE,WAAA,CACE,SAAA,EAAW,QAAA,IACX,IAAA,UACA,OAAA,GAAS,iBAAA;;;;;;EAwCX,OAAA,CAAA,GAAW,gBAAA;EVxBX;AAMF;;EANE,QUkEQ,SAAA;EV5DO;;;EAAA,QU4FP,YAAA;;;;UAiDA,oBAAA;EVjHE;;;EAAA,QUuJF,cAAA;;;;;UAeA,iBAAA;;;ATjOV;;USiPU,gBAAA;;;;UAyBA,kBAAA;ET5MW;;;EAAA,QSsNX,iBAAA;;;;UAUA,aAAA;;;;UAYA,mBAAA;AAAA;;;;;AV/TV;;;;;;;;iBWegB,gBAAA,CACd,SAAA,EAAW,QAAA,IACX,IAAA,UACA,OAAA,GAAU,iBAAA,GACT,gBAAA"}
|