eyecite-ts 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.mjs","names":[],"sources":["../../src/annotate/annotate.ts"],"sourcesContent":["import type { Citation } from '../types/citation'\nimport type { AnnotationOptions, AnnotationResult } from './types'\n\n/**\n * Annotate citations in text with custom markup.\n *\n * Supports two modes:\n * - **Template mode**: Simple before/after wrapping (set `options.template`)\n * - **Callback mode**: Custom logic with full citation context (set `options.callback`)\n *\n * Citations are processed in reverse order to avoid position shifts invalidating\n * subsequent annotations. Position tracking maps original positions to new positions\n * after markup insertion.\n *\n * @param text - Original or cleaned text to annotate\n * @param citations - Citations to mark up (from extraction pipeline)\n * @param options - Annotation configuration\n * @returns Annotated text with position mapping\n *\n * @example Template mode\n * ```typescript\n * const result = annotate(text, citations, {\n * template: { before: '<cite>', after: '</cite>' }\n * })\n * // Result: \"See <cite>500 F.2d 123</cite>\"\n * ```\n *\n * @example Callback mode\n * ```typescript\n * const result = annotate(text, citations, {\n * callback: (citation) => {\n * if (citation.type === 'case') {\n * return `<a href=\"/cases/${citation.volume}\">${citation.matchedText}</a>`\n * }\n * return citation.matchedText\n * }\n * })\n * ```\n *\n * @example Position tracking\n * ```typescript\n * const result = annotate(text, citations, { template: { before: '<mark>', after: '</mark>' } })\n * // result.positionMap tracks how positions shifted\n * const originalPos = 10\n * const newPos = result.positionMap.get(originalPos)\n * ```\n */\nexport function annotate(\n text: string,\n citations: Citation[],\n options: AnnotationOptions = {}\n): AnnotationResult {\n const {\n useCleanText = false,\n autoEscape = true, // Secure by default\n template,\n callback,\n } = options\n\n // Sort reverse to avoid position shifts invalidating subsequent annotations\n const sorted = [...citations].sort((a, b) => {\n const aPos = useCleanText ? a.span.cleanStart : a.span.originalStart\n const bPos = useCleanText ? b.span.cleanStart : b.span.originalStart\n return bPos - aPos // Reverse for backward iteration\n })\n\n let result = text\n const positionMap = new Map<number, number>()\n\n for (const citation of sorted) {\n const start = useCleanText ? citation.span.cleanStart : citation.span.originalStart\n const end = useCleanText ? citation.span.cleanEnd : citation.span.originalEnd\n\n let markup = ''\n\n if (callback) {\n // Callback mode: developer provides full logic\n const surrounding = text.substring(\n Math.max(0, start - 30),\n Math.min(text.length, end + 30)\n )\n markup = callback(citation, surrounding)\n } else if (template) {\n // Template mode: simple before/after wrapping\n const citationText = result.substring(start, end)\n const escaped = autoEscape ? escapeHtmlEntities(citationText) : citationText\n markup = template.before + escaped + template.after\n } else {\n // No annotation specified\n continue\n }\n\n // Insert annotation (working backwards preserves positions for later citations)\n result = result.slice(0, start) + markup + result.slice(end)\n\n // Track original position to new position (before this annotation was added)\n positionMap.set(start, start)\n }\n\n return { text: result, positionMap, skipped: [] }\n}\n\n/**\n * Escape HTML entities to prevent XSS injection.\n *\n * Converts special HTML characters to their entity equivalents:\n * - `&` → `&amp;`\n * - `<` → `&lt;`\n * - `>` → `&gt;`\n * - `\"` → `&quot;`\n * - `'` → `&#39;`\n * - `/` → `&#x2F;`\n *\n * @param text - Text to escape\n * @returns Escaped text safe for HTML insertion\n */\nfunction escapeHtmlEntities(text: string): string {\n const map: Record<string, string> = {\n '&': '&amp;',\n '<': '&lt;',\n '>': '&gt;',\n '\"': '&quot;',\n \"'\": '&#39;',\n '/': '&#x2F;',\n }\n return text.replace(/[&<>\"'\\/]/g, (char) => map[char])\n}\n"],"mappings":"AA+CA,SAAgB,EACd,EACA,EACA,EAA6B,EAAE,CACb,CAClB,GAAM,CACJ,eAAe,GACf,aAAa,GACb,WACA,YACE,EAGE,EAAS,CAAC,GAAG,EAAU,CAAC,MAAM,EAAG,IAAM,CAC3C,IAAM,EAAO,EAAe,EAAE,KAAK,WAAa,EAAE,KAAK,cAEvD,OADa,EAAe,EAAE,KAAK,WAAa,EAAE,KAAK,eACzC,GACd,CAEE,EAAS,EACP,EAAc,IAAI,IAExB,IAAK,IAAM,KAAY,EAAQ,CAC7B,IAAM,EAAQ,EAAe,EAAS,KAAK,WAAa,EAAS,KAAK,cAChE,EAAM,EAAe,EAAS,KAAK,SAAW,EAAS,KAAK,YAE9D,EAAS,GAEb,GAAI,EAMF,EAAS,EAAS,EAJE,EAAK,UACvB,KAAK,IAAI,EAAG,EAAQ,GAAG,CACvB,KAAK,IAAI,EAAK,OAAQ,EAAM,GAAG,CAChC,CACuC,SAC/B,EAAU,CAEnB,IAAM,EAAe,EAAO,UAAU,EAAO,EAAI,CAC3C,EAAU,EAAa,EAAmB,EAAa,CAAG,EAChE,EAAS,EAAS,OAAS,EAAU,EAAS,WAG9C,SAIF,EAAS,EAAO,MAAM,EAAG,EAAM,CAAG,EAAS,EAAO,MAAM,EAAI,CAG5D,EAAY,IAAI,EAAO,EAAM,CAG/B,MAAO,CAAE,KAAM,EAAQ,cAAa,QAAS,EAAE,CAAE,CAiBnD,SAAS,EAAmB,EAAsB,CAChD,IAAM,EAA8B,CAClC,IAAK,QACL,IAAK,OACL,IAAK,OACL,IAAK,SACL,IAAK,QACL,IAAK,SACN,CACD,OAAO,EAAK,QAAQ,aAAe,GAAS,EAAI,GAAM"}
@@ -0,0 +1,286 @@
1
+ //#region src/types/span.d.ts
2
+ /**
3
+ * Represents a text span with positions tracked through transformations.
4
+ *
5
+ * During text cleaning (HTML removal, whitespace normalization), positions
6
+ * shift. Span tracks BOTH cleaned positions (for parsing) and original
7
+ * positions (for user-facing results).
8
+ *
9
+ * @example
10
+ * const original = "Smith v. Doe, 500 F.2d 123 (2020)"
11
+ * // After cleaning, positions may shift
12
+ * const span: Span = {
13
+ * cleanStart: 14, // Position in cleaned text
14
+ * cleanEnd: 27,
15
+ * originalStart: 14, // Position in original text
16
+ * originalEnd: 27
17
+ * }
18
+ */
19
+ interface Span {
20
+ /** Start position in cleaned/tokenized text (used during parsing) */
21
+ cleanStart: number;
22
+ /** End position in cleaned/tokenized text (used during parsing) */
23
+ cleanEnd: number;
24
+ /** Start position in original input text (returned to user) */
25
+ originalStart: number;
26
+ /** End position in original input text (returned to user) */
27
+ originalEnd: number;
28
+ }
29
+ /**
30
+ * Maps positions between cleaned and original text.
31
+ *
32
+ * Built during text transformation to track how character positions shift
33
+ * when HTML entities are removed, whitespace is normalized, etc.
34
+ */
35
+ interface TransformationMap {
36
+ /** Maps cleaned text position to original text position */
37
+ cleanToOriginal: Map<number, number>;
38
+ /** Maps original text position to cleaned text position */
39
+ originalToClean: Map<number, number>;
40
+ }
41
+ //#endregion
42
+ //#region src/types/citation.d.ts
43
+ /**
44
+ * Citation type discriminator for type-safe pattern matching.
45
+ */
46
+ type CitationType = "case" | "statute" | "journal" | "neutral" | "publicLaw" | "federalRegister" | "id" | "supra" | "shortFormCase";
47
+ /**
48
+ * Warning generated during citation parsing.
49
+ */
50
+ interface Warning {
51
+ /** Severity level */
52
+ level: "error" | "warning" | "info";
53
+ /** Description of the issue */
54
+ message: string;
55
+ /** Position of the problematic region */
56
+ position: {
57
+ start: number;
58
+ end: number;
59
+ };
60
+ /** Additional context about the warning */
61
+ context?: string;
62
+ }
63
+ /**
64
+ * Base fields shared by all citation types.
65
+ */
66
+ interface CitationBase {
67
+ /** Original matched text */
68
+ text: string;
69
+ /** Position span in document (originalStart/End point to original text) */
70
+ span: Span;
71
+ /**
72
+ * Confidence score indicating match certainty (0-1).
73
+ * - 1.0: Certain match (e.g., exact reporter abbreviation in reporters-db)
74
+ * - 0.8-0.99: High confidence (e.g., common pattern, missing pincite)
75
+ * - 0.5-0.79: Medium confidence (e.g., ambiguous reporter abbreviation)
76
+ * - <0.5: Low confidence (e.g., unusual formatting)
77
+ */
78
+ confidence: number;
79
+ /** Exact substring matched from the original text */
80
+ matchedText: string;
81
+ /** Time spent processing this citation (milliseconds) */
82
+ processTimeMs: number;
83
+ /** Number of regex patterns checked before match */
84
+ patternsChecked: number;
85
+ /** Warnings for malformed or ambiguous regions */
86
+ warnings?: Warning[];
87
+ }
88
+ /**
89
+ * Full case citation (volume-reporter-page format).
90
+ *
91
+ * @example "500 F.2d 123"
92
+ * @example "410 U.S. 113, 115"
93
+ */
94
+ interface FullCaseCitation extends CitationBase {
95
+ type: "case";
96
+ volume: number;
97
+ reporter: string;
98
+ page: number;
99
+ pincite?: number;
100
+ court?: string;
101
+ year?: number;
102
+ /** Normalized reporter abbreviation from reporters-db (e.g., "F.2d" vs "F. 2d") */
103
+ normalizedReporter?: string;
104
+ /** Parallel citations for same case in different reporters */
105
+ parallelCitations?: Array<{
106
+ volume: number;
107
+ reporter: string;
108
+ page: number;
109
+ }>;
110
+ /** Citation signal (introductory phrase) */
111
+ signal?: "see" | "see also" | "cf" | "but see" | "compare";
112
+ /** Parenthetical explanation following the citation */
113
+ parenthetical?: string;
114
+ /** Subsequent procedural history (e.g., "aff'd", "rev'd", "cert. denied") */
115
+ subsequentHistory?: string;
116
+ /**
117
+ * Date information in multiple formats.
118
+ * - iso: ISO 8601 format (YYYY-MM-DD or YYYY-MM-DDTHH:mm:ssZ)
119
+ * - parsed: Structured date components
120
+ */
121
+ date?: {
122
+ iso: string;
123
+ parsed?: {
124
+ year: number;
125
+ month?: number;
126
+ day?: number;
127
+ };
128
+ };
129
+ /**
130
+ * Alternative interpretations for ambiguous citations.
131
+ * Used when reporter abbreviation matches multiple reporters or format is unclear.
132
+ */
133
+ possibleInterpretations?: Array<{
134
+ volume: number;
135
+ reporter: string;
136
+ page: number;
137
+ confidence: number;
138
+ reason: string;
139
+ }>;
140
+ }
141
+ /**
142
+ * Statute citation (U.S. Code, state codes, etc.).
143
+ *
144
+ * @example "42 U.S.C. § 1983"
145
+ */
146
+ interface StatuteCitation extends CitationBase {
147
+ type: "statute";
148
+ title?: number;
149
+ code: string;
150
+ section: string;
151
+ }
152
+ /**
153
+ * Journal citation (law review, legal periodical).
154
+ *
155
+ * Format: [Author,] [Title,] Volume Journal Page [, Pincite] [(Year)]
156
+ *
157
+ * @example "100 Harv. L. Rev. 1234"
158
+ * @example "Jane Doe, Article Title, 75 Yale L.J. 456, 460 (2020)"
159
+ */
160
+ interface JournalCitation extends CitationBase {
161
+ type: "journal";
162
+ /** Author name (if extracted) */
163
+ author?: string;
164
+ /** Article title (if extracted) */
165
+ title?: string;
166
+ /** Volume number */
167
+ volume?: number;
168
+ /** Full journal name */
169
+ journal: string;
170
+ /** Standard journal abbreviation (e.g., "Harv. L. Rev.") */
171
+ abbreviation: string;
172
+ /** Starting page of article */
173
+ page?: number;
174
+ /** Specific page reference */
175
+ pincite?: number;
176
+ /** Publication year */
177
+ year?: number;
178
+ }
179
+ /**
180
+ * Neutral citation (vendor-neutral format).
181
+ *
182
+ * Format: Year Court DocumentNumber
183
+ *
184
+ * @example "2020 WL 123456" (Westlaw)
185
+ * @example "2020 U.S. LEXIS 456" (Lexis)
186
+ */
187
+ interface NeutralCitation extends CitationBase {
188
+ type: "neutral";
189
+ /** Year of decision */
190
+ year: number;
191
+ /** Court identifier (e.g., "WL", "U.S. LEXIS") */
192
+ court: string;
193
+ /** Document number */
194
+ documentNumber: string;
195
+ }
196
+ /**
197
+ * Public law citation (federal legislation).
198
+ *
199
+ * Format: Pub. L. No. Congress-LawNumber
200
+ *
201
+ * @example "Pub. L. No. 116-283"
202
+ * @example "Pub. L. 117-58 (Infrastructure Investment and Jobs Act)"
203
+ */
204
+ interface PublicLawCitation extends CitationBase {
205
+ type: "publicLaw";
206
+ /** Congress number (e.g., 116) */
207
+ congress: number;
208
+ /** Law number within that Congress */
209
+ lawNumber: number;
210
+ /** Optional bill title extracted from nearby text */
211
+ title?: string;
212
+ }
213
+ /**
214
+ * Federal Register citation.
215
+ *
216
+ * Format: Volume Fed. Reg. Page
217
+ *
218
+ * @example "85 Fed. Reg. 12345"
219
+ * @example "86 Fed. Reg. 56789 (Jan. 15, 2021)"
220
+ */
221
+ interface FederalRegisterCitation extends CitationBase {
222
+ type: "federalRegister";
223
+ /** Federal Register volume */
224
+ volume: number;
225
+ /** Page number */
226
+ page: number;
227
+ /** Publication year (if extracted) */
228
+ year?: number;
229
+ }
230
+ /**
231
+ * Id. citation (refers to immediately preceding citation).
232
+ *
233
+ * @example "Id."
234
+ * @example "Id. at 125"
235
+ */
236
+ interface IdCitation extends CitationBase {
237
+ type: "id";
238
+ pincite?: number;
239
+ }
240
+ /**
241
+ * Supra citation (refers to earlier citation by party name).
242
+ *
243
+ * @example "Smith, supra"
244
+ * @example "Smith, supra, at 460"
245
+ */
246
+ interface SupraCitation extends CitationBase {
247
+ type: "supra";
248
+ /** Party name extracted from citation text */
249
+ partyName: string;
250
+ /** Specific page reference */
251
+ pincite?: number;
252
+ }
253
+ /**
254
+ * Short-form case citation (abbreviated reference to earlier full citation).
255
+ * Distinguished from full case by lack of case name.
256
+ *
257
+ * @example "500 F.2d at 125" (refers to earlier full citation at different page)
258
+ */
259
+ interface ShortFormCaseCitation extends CitationBase {
260
+ type: "shortFormCase";
261
+ volume: number;
262
+ reporter: string;
263
+ page?: number;
264
+ pincite?: number;
265
+ }
266
+ /**
267
+ * Union type of all citation types.
268
+ *
269
+ * Use type guards via discriminated union:
270
+ * @example
271
+ * if (citation.type === "case") {
272
+ * console.log(citation.volume) // TypeScript knows this exists
273
+ * }
274
+ * @example
275
+ * switch (citation.type) {
276
+ * case "journal":
277
+ * return citation.abbreviation // Type-safe access
278
+ * case "neutral":
279
+ * return citation.court
280
+ * // ...
281
+ * }
282
+ */
283
+ type Citation = FullCaseCitation | StatuteCitation | JournalCitation | NeutralCitation | PublicLawCitation | FederalRegisterCitation | IdCitation | SupraCitation | ShortFormCaseCitation;
284
+ //#endregion
285
+ export { FullCaseCitation as a, NeutralCitation as c, StatuteCitation as d, SupraCitation as f, FederalRegisterCitation as i, PublicLawCitation as l, TransformationMap as m, CitationBase as n, IdCitation as o, Span as p, CitationType as r, JournalCitation as s, Citation as t, ShortFormCaseCitation as u };
286
+ //# sourceMappingURL=citation-8_GvfEuj.d.mts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"citation-8_GvfEuj.d.mts","names":[],"sources":["../src/types/span.ts","../src/types/citation.ts"],"mappings":";;AAiBA;;;;;;;;;;AAoBA;;;;;;UApBiB,IAAA;;EAEf,UAAA;EAuBiB;EApBjB,QAAA;;EAGA,aAAA;ECpBF;EDuBE,WAAA;AAAA;;;AClBF;;;;UD2BiB,iBAAA;;EAEf,eAAA,EAAiB,GAAA;;EAGjB,eAAA,EAAiB,GAAA;AAAA;;;AAzBnB;;;AAAA,KCZY,YAAA;;;;UAKK,OAAA;;EAEf,KAAA;EDyBF;ECvBE,OAAA;;EAEA,QAAA;IAAY,KAAA;IAAe,GAAA;EAAA;;EAE3B,OAAA;AAAA;;;;UAMe,YAAA;EAnBL;EAqBV,IAAA;EArBU;EAwBV,IAAA,EAAM,IAAA;EAnBR;;;;;;;EA4BE,UAAA;;EAGA,WAAA;;EAGA,aAAA;EApBF;EAuBE,eAAA;;EAGA,QAAA,GAAW,OAAA;AAAA;;;;;;;UASI,gBAAA,SAAyB,YAAA;EACxC,IAAA;EACA,MAAA;EACA,QAAA;EACA,IAAA;EACA,OAAA;EACA,KAAA;EACA,IAAA;;EAGA,kBAAA;;EAGA,iBAAA,GAAoB,KAAA;IAClB,MAAA;IACA,QAAA;IACA,IAAA;EAAA;;EAIF,MAAA;;EAGA,aAAA;;EAGA,iBAAA;;;;;;EAOA,IAAA;IACE,GAAA;IACA,MAAA;MAAW,IAAA;MAAc,KAAA;MAAgB,GAAA;IAAA;EAAA;;;;;EAO3C,uBAAA,GAA0B,KAAA;IACxB,MAAA;IACA,QAAA;IACA,IAAA;IACA,UAAA;IACA,MAAA;EAAA;AAAA;;;;;;UASa,eAAA,SAAwB,YAAA;EACvC,IAAA;EACA,KAAA;EACA,IAAA;EACA,OAAA;AAAA;;;;;;;;;UAWe,eAAA,SAAwB,YAAA;EACvC,IAAA;;EAEA,MAAA;EAcA;EAZA,KAAA;EAuBe;EArBf,MAAA;EAqBuC;EAnBvC,OAAA;;EAEA,YAAA;;EAEA,IAAA;;EAEA,OAAA;EA+BF;EA7BE,IAAA;AAAA;;;;;;;;;UAWe,eAAA,SAAwB,YAAA;EACvC,IAAA;;EAEA,IAAA;;EAEA,KAAA;;EAEA,cAAA;AAAA;;;AA6CF;;;;;;UAlCiB,iBAAA,SAA0B,YAAA;EACzC,IAAA;EAmCA;EAjCA,QAAA;EA0Ce;EAxCf,SAAA;EAwCqC;EAtCrC,KAAA;AAAA;;;;;AAoDF;;;;UAzCiB,uBAAA,SAAgC,YAAA;EAC/C,IAAA;;EAEA,MAAA;;EAEA,IAAA;;EAEA,IAAA;AAAA;;;;;;;UASe,UAAA,SAAmB,YAAA;EAClC,IAAA;EACA,OAAA;AAAA;;;;;;;UASe,aAAA,SAAsB,YAAA;EACrC,IAAA;;EAEA,SAAA;;EAEA,OAAA;AAAA;;;;;;;UASe,qBAAA,SAA8B,YAAA;EAC7C,IAAA;EACA,MAAA;EACA,QAAA;EACA,IAAA;EACA,OAAA;AAAA;;;;;;;;;;;;;;;;;;KAoBU,QAAA,GACR,gBAAA,GACA,eAAA,GACA,eAAA,GACA,eAAA,GACA,iBAAA,GACA,uBAAA,GACA,UAAA,GACA,aAAA,GACA,qBAAA"}
@@ -0,0 +1,286 @@
1
+ //#region src/types/span.d.ts
2
+ /**
3
+ * Represents a text span with positions tracked through transformations.
4
+ *
5
+ * During text cleaning (HTML removal, whitespace normalization), positions
6
+ * shift. Span tracks BOTH cleaned positions (for parsing) and original
7
+ * positions (for user-facing results).
8
+ *
9
+ * @example
10
+ * const original = "Smith v. Doe, 500 F.2d 123 (2020)"
11
+ * // After cleaning, positions may shift
12
+ * const span: Span = {
13
+ * cleanStart: 14, // Position in cleaned text
14
+ * cleanEnd: 27,
15
+ * originalStart: 14, // Position in original text
16
+ * originalEnd: 27
17
+ * }
18
+ */
19
+ interface Span {
20
+ /** Start position in cleaned/tokenized text (used during parsing) */
21
+ cleanStart: number;
22
+ /** End position in cleaned/tokenized text (used during parsing) */
23
+ cleanEnd: number;
24
+ /** Start position in original input text (returned to user) */
25
+ originalStart: number;
26
+ /** End position in original input text (returned to user) */
27
+ originalEnd: number;
28
+ }
29
+ /**
30
+ * Maps positions between cleaned and original text.
31
+ *
32
+ * Built during text transformation to track how character positions shift
33
+ * when HTML entities are removed, whitespace is normalized, etc.
34
+ */
35
+ interface TransformationMap {
36
+ /** Maps cleaned text position to original text position */
37
+ cleanToOriginal: Map<number, number>;
38
+ /** Maps original text position to cleaned text position */
39
+ originalToClean: Map<number, number>;
40
+ }
41
+ //#endregion
42
+ //#region src/types/citation.d.ts
43
+ /**
44
+ * Citation type discriminator for type-safe pattern matching.
45
+ */
46
+ type CitationType = "case" | "statute" | "journal" | "neutral" | "publicLaw" | "federalRegister" | "id" | "supra" | "shortFormCase";
47
+ /**
48
+ * Warning generated during citation parsing.
49
+ */
50
+ interface Warning {
51
+ /** Severity level */
52
+ level: "error" | "warning" | "info";
53
+ /** Description of the issue */
54
+ message: string;
55
+ /** Position of the problematic region */
56
+ position: {
57
+ start: number;
58
+ end: number;
59
+ };
60
+ /** Additional context about the warning */
61
+ context?: string;
62
+ }
63
+ /**
64
+ * Base fields shared by all citation types.
65
+ */
66
+ interface CitationBase {
67
+ /** Original matched text */
68
+ text: string;
69
+ /** Position span in document (originalStart/End point to original text) */
70
+ span: Span;
71
+ /**
72
+ * Confidence score indicating match certainty (0-1).
73
+ * - 1.0: Certain match (e.g., exact reporter abbreviation in reporters-db)
74
+ * - 0.8-0.99: High confidence (e.g., common pattern, missing pincite)
75
+ * - 0.5-0.79: Medium confidence (e.g., ambiguous reporter abbreviation)
76
+ * - <0.5: Low confidence (e.g., unusual formatting)
77
+ */
78
+ confidence: number;
79
+ /** Exact substring matched from the original text */
80
+ matchedText: string;
81
+ /** Time spent processing this citation (milliseconds) */
82
+ processTimeMs: number;
83
+ /** Number of regex patterns checked before match */
84
+ patternsChecked: number;
85
+ /** Warnings for malformed or ambiguous regions */
86
+ warnings?: Warning[];
87
+ }
88
+ /**
89
+ * Full case citation (volume-reporter-page format).
90
+ *
91
+ * @example "500 F.2d 123"
92
+ * @example "410 U.S. 113, 115"
93
+ */
94
+ interface FullCaseCitation extends CitationBase {
95
+ type: "case";
96
+ volume: number;
97
+ reporter: string;
98
+ page: number;
99
+ pincite?: number;
100
+ court?: string;
101
+ year?: number;
102
+ /** Normalized reporter abbreviation from reporters-db (e.g., "F.2d" vs "F. 2d") */
103
+ normalizedReporter?: string;
104
+ /** Parallel citations for same case in different reporters */
105
+ parallelCitations?: Array<{
106
+ volume: number;
107
+ reporter: string;
108
+ page: number;
109
+ }>;
110
+ /** Citation signal (introductory phrase) */
111
+ signal?: "see" | "see also" | "cf" | "but see" | "compare";
112
+ /** Parenthetical explanation following the citation */
113
+ parenthetical?: string;
114
+ /** Subsequent procedural history (e.g., "aff'd", "rev'd", "cert. denied") */
115
+ subsequentHistory?: string;
116
+ /**
117
+ * Date information in multiple formats.
118
+ * - iso: ISO 8601 format (YYYY-MM-DD or YYYY-MM-DDTHH:mm:ssZ)
119
+ * - parsed: Structured date components
120
+ */
121
+ date?: {
122
+ iso: string;
123
+ parsed?: {
124
+ year: number;
125
+ month?: number;
126
+ day?: number;
127
+ };
128
+ };
129
+ /**
130
+ * Alternative interpretations for ambiguous citations.
131
+ * Used when reporter abbreviation matches multiple reporters or format is unclear.
132
+ */
133
+ possibleInterpretations?: Array<{
134
+ volume: number;
135
+ reporter: string;
136
+ page: number;
137
+ confidence: number;
138
+ reason: string;
139
+ }>;
140
+ }
141
+ /**
142
+ * Statute citation (U.S. Code, state codes, etc.).
143
+ *
144
+ * @example "42 U.S.C. § 1983"
145
+ */
146
+ interface StatuteCitation extends CitationBase {
147
+ type: "statute";
148
+ title?: number;
149
+ code: string;
150
+ section: string;
151
+ }
152
+ /**
153
+ * Journal citation (law review, legal periodical).
154
+ *
155
+ * Format: [Author,] [Title,] Volume Journal Page [, Pincite] [(Year)]
156
+ *
157
+ * @example "100 Harv. L. Rev. 1234"
158
+ * @example "Jane Doe, Article Title, 75 Yale L.J. 456, 460 (2020)"
159
+ */
160
+ interface JournalCitation extends CitationBase {
161
+ type: "journal";
162
+ /** Author name (if extracted) */
163
+ author?: string;
164
+ /** Article title (if extracted) */
165
+ title?: string;
166
+ /** Volume number */
167
+ volume?: number;
168
+ /** Full journal name */
169
+ journal: string;
170
+ /** Standard journal abbreviation (e.g., "Harv. L. Rev.") */
171
+ abbreviation: string;
172
+ /** Starting page of article */
173
+ page?: number;
174
+ /** Specific page reference */
175
+ pincite?: number;
176
+ /** Publication year */
177
+ year?: number;
178
+ }
179
+ /**
180
+ * Neutral citation (vendor-neutral format).
181
+ *
182
+ * Format: Year Court DocumentNumber
183
+ *
184
+ * @example "2020 WL 123456" (Westlaw)
185
+ * @example "2020 U.S. LEXIS 456" (Lexis)
186
+ */
187
+ interface NeutralCitation extends CitationBase {
188
+ type: "neutral";
189
+ /** Year of decision */
190
+ year: number;
191
+ /** Court identifier (e.g., "WL", "U.S. LEXIS") */
192
+ court: string;
193
+ /** Document number */
194
+ documentNumber: string;
195
+ }
196
+ /**
197
+ * Public law citation (federal legislation).
198
+ *
199
+ * Format: Pub. L. No. Congress-LawNumber
200
+ *
201
+ * @example "Pub. L. No. 116-283"
202
+ * @example "Pub. L. 117-58 (Infrastructure Investment and Jobs Act)"
203
+ */
204
+ interface PublicLawCitation extends CitationBase {
205
+ type: "publicLaw";
206
+ /** Congress number (e.g., 116) */
207
+ congress: number;
208
+ /** Law number within that Congress */
209
+ lawNumber: number;
210
+ /** Optional bill title extracted from nearby text */
211
+ title?: string;
212
+ }
213
+ /**
214
+ * Federal Register citation.
215
+ *
216
+ * Format: Volume Fed. Reg. Page
217
+ *
218
+ * @example "85 Fed. Reg. 12345"
219
+ * @example "86 Fed. Reg. 56789 (Jan. 15, 2021)"
220
+ */
221
+ interface FederalRegisterCitation extends CitationBase {
222
+ type: "federalRegister";
223
+ /** Federal Register volume */
224
+ volume: number;
225
+ /** Page number */
226
+ page: number;
227
+ /** Publication year (if extracted) */
228
+ year?: number;
229
+ }
230
+ /**
231
+ * Id. citation (refers to immediately preceding citation).
232
+ *
233
+ * @example "Id."
234
+ * @example "Id. at 125"
235
+ */
236
+ interface IdCitation extends CitationBase {
237
+ type: "id";
238
+ pincite?: number;
239
+ }
240
+ /**
241
+ * Supra citation (refers to earlier citation by party name).
242
+ *
243
+ * @example "Smith, supra"
244
+ * @example "Smith, supra, at 460"
245
+ */
246
+ interface SupraCitation extends CitationBase {
247
+ type: "supra";
248
+ /** Party name extracted from citation text */
249
+ partyName: string;
250
+ /** Specific page reference */
251
+ pincite?: number;
252
+ }
253
+ /**
254
+ * Short-form case citation (abbreviated reference to earlier full citation).
255
+ * Distinguished from full case by lack of case name.
256
+ *
257
+ * @example "500 F.2d at 125" (refers to earlier full citation at different page)
258
+ */
259
+ interface ShortFormCaseCitation extends CitationBase {
260
+ type: "shortFormCase";
261
+ volume: number;
262
+ reporter: string;
263
+ page?: number;
264
+ pincite?: number;
265
+ }
266
+ /**
267
+ * Union type of all citation types.
268
+ *
269
+ * Use type guards via discriminated union:
270
+ * @example
271
+ * if (citation.type === "case") {
272
+ * console.log(citation.volume) // TypeScript knows this exists
273
+ * }
274
+ * @example
275
+ * switch (citation.type) {
276
+ * case "journal":
277
+ * return citation.abbreviation // Type-safe access
278
+ * case "neutral":
279
+ * return citation.court
280
+ * // ...
281
+ * }
282
+ */
283
+ type Citation = FullCaseCitation | StatuteCitation | JournalCitation | NeutralCitation | PublicLawCitation | FederalRegisterCitation | IdCitation | SupraCitation | ShortFormCaseCitation;
284
+ //#endregion
285
+ export { FullCaseCitation as a, NeutralCitation as c, StatuteCitation as d, SupraCitation as f, FederalRegisterCitation as i, PublicLawCitation as l, TransformationMap as m, CitationBase as n, IdCitation as o, Span as p, CitationType as r, JournalCitation as s, Citation as t, ShortFormCaseCitation as u };
286
+ //# sourceMappingURL=citation-BcY5zzWb.d.cts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"citation-BcY5zzWb.d.cts","names":[],"sources":["../src/types/span.ts","../src/types/citation.ts"],"mappings":";;AAiBA;;;;;;;;;;AAoBA;;;;;;UApBiB,IAAA;;EAEf,UAAA;EAuBiB;EApBjB,QAAA;;EAGA,aAAA;ECpBF;EDuBE,WAAA;AAAA;;;AClBF;;;;UD2BiB,iBAAA;;EAEf,eAAA,EAAiB,GAAA;;EAGjB,eAAA,EAAiB,GAAA;AAAA;;;AAzBnB;;;AAAA,KCZY,YAAA;;;;UAKK,OAAA;;EAEf,KAAA;EDyBF;ECvBE,OAAA;;EAEA,QAAA;IAAY,KAAA;IAAe,GAAA;EAAA;;EAE3B,OAAA;AAAA;;;;UAMe,YAAA;EAnBL;EAqBV,IAAA;EArBU;EAwBV,IAAA,EAAM,IAAA;EAnBR;;;;;;;EA4BE,UAAA;;EAGA,WAAA;;EAGA,aAAA;EApBF;EAuBE,eAAA;;EAGA,QAAA,GAAW,OAAA;AAAA;;;;;;;UASI,gBAAA,SAAyB,YAAA;EACxC,IAAA;EACA,MAAA;EACA,QAAA;EACA,IAAA;EACA,OAAA;EACA,KAAA;EACA,IAAA;;EAGA,kBAAA;;EAGA,iBAAA,GAAoB,KAAA;IAClB,MAAA;IACA,QAAA;IACA,IAAA;EAAA;;EAIF,MAAA;;EAGA,aAAA;;EAGA,iBAAA;;;;;;EAOA,IAAA;IACE,GAAA;IACA,MAAA;MAAW,IAAA;MAAc,KAAA;MAAgB,GAAA;IAAA;EAAA;;;;;EAO3C,uBAAA,GAA0B,KAAA;IACxB,MAAA;IACA,QAAA;IACA,IAAA;IACA,UAAA;IACA,MAAA;EAAA;AAAA;;;;;;UASa,eAAA,SAAwB,YAAA;EACvC,IAAA;EACA,KAAA;EACA,IAAA;EACA,OAAA;AAAA;;;;;;;;;UAWe,eAAA,SAAwB,YAAA;EACvC,IAAA;;EAEA,MAAA;EAcA;EAZA,KAAA;EAuBe;EArBf,MAAA;EAqBuC;EAnBvC,OAAA;;EAEA,YAAA;;EAEA,IAAA;;EAEA,OAAA;EA+BF;EA7BE,IAAA;AAAA;;;;;;;;;UAWe,eAAA,SAAwB,YAAA;EACvC,IAAA;;EAEA,IAAA;;EAEA,KAAA;;EAEA,cAAA;AAAA;;;AA6CF;;;;;;UAlCiB,iBAAA,SAA0B,YAAA;EACzC,IAAA;EAmCA;EAjCA,QAAA;EA0Ce;EAxCf,SAAA;EAwCqC;EAtCrC,KAAA;AAAA;;;;;AAoDF;;;;UAzCiB,uBAAA,SAAgC,YAAA;EAC/C,IAAA;;EAEA,MAAA;;EAEA,IAAA;;EAEA,IAAA;AAAA;;;;;;;UASe,UAAA,SAAmB,YAAA;EAClC,IAAA;EACA,OAAA;AAAA;;;;;;;UASe,aAAA,SAAsB,YAAA;EACrC,IAAA;;EAEA,SAAA;;EAEA,OAAA;AAAA;;;;;;;UASe,qBAAA,SAA8B,YAAA;EAC7C,IAAA;EACA,MAAA;EACA,QAAA;EACA,IAAA;EACA,OAAA;AAAA;;;;;;;;;;;;;;;;;;KAoBU,QAAA,GACR,gBAAA,GACA,eAAA,GACA,eAAA,GACA,eAAA,GACA,iBAAA,GACA,uBAAA,GACA,UAAA,GACA,aAAA,GACA,qBAAA"}
@@ -0,0 +1,2 @@
1
+ Object.defineProperty(exports,Symbol.toStringTag,{value:`Module`});let e=null;async function t(){if(e)return e;let t=await import(`../../data/reporters.json`,{assert:{type:`json`}}),n=new Map,r=[],i=t.default||t;for(let[e,t]of Object.entries(i))for(let e of t){r.push(e);for(let t of Object.keys(e.editions)){let r=t.toLowerCase();n.has(r)||n.set(r,[]),n.get(r).push(e)}for(let[t,r]of Object.entries(e.variations||{})){let r=t.toLowerCase();n.has(r)||n.set(r,[]),n.get(r).push(e)}}return e={byAbbreviation:n,all:r},e}function n(){return e}async function r(e){return(await t()).byAbbreviation.get(e.toLowerCase())??[]}exports.findReportersByAbbreviation=r,exports.getReportersSync=n,exports.loadReporters=t;
2
+ //# sourceMappingURL=index.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.cjs","names":[],"sources":["../../src/data/reporters.ts"],"sourcesContent":["/**\n * Reporter database integration for citation validation\n *\n * This module provides lazy-loadable access to the reporters-db database,\n * containing 1200+ court reporters with variant forms. The library works\n * in degraded mode (pattern-based extraction only) if reporters are not loaded.\n *\n * @example\n * // Degraded mode: extraction works without reporter data\n * const citations = await extract(text)\n *\n * @example\n * // Full mode: load reporters for validation\n * await loadReporters()\n * const citations = await extract(text) // Now with reporter validation\n */\n\n/**\n * Edition entry from reporters-db\n *\n * Represents a specific edition of a reporter with start/end dates.\n */\nexport interface ReporterEdition {\n /** Start date in ISO 8601 format */\n start: string | null\n /** End date in ISO 8601 format (null if ongoing) */\n end: string | null\n}\n\n/**\n * Reporter entry from reporters-db\n *\n * Represents a single court reporter with all metadata needed for\n * citation validation and enrichment.\n *\n * Note: The reporters-db structure has the actual data; this interface\n * represents it flexibly to handle all variations in the JSON.\n */\nexport interface ReporterEntry {\n /** Full reporter name (e.g., \"Federal Reporter\") */\n name: string\n /** Citation type: state, federal, specialty, neutral, state_regional, etc. */\n cite_type: string\n /** Editions keyed by abbreviation (e.g., {\"F.2d\": {...}, \"F.3d\": {...}}) */\n editions: Record<string, ReporterEdition>\n /** Variant forms mapped to canonical form (e.g., {\"F. 2d\": \"F.2d\"}) */\n variations?: Record<string, string | undefined>\n /** MLZ jurisdiction identifiers (optional) */\n mlz_jurisdiction?: string[]\n /** Publisher (optional) */\n publisher?: string\n /** Notes (optional) */\n notes?: string\n}\n\n/**\n * In-memory reporter database with fast O(1) lookup\n *\n * Uses Map-based indexing for case-insensitive abbreviation lookup.\n * All variant forms are indexed to support fuzzy matching.\n */\nexport interface ReportersDatabase {\n /** Fast O(1) lookup by abbreviation (lowercase normalized keys) */\n byAbbreviation: Map<string, ReporterEntry[]>\n /** All reporters (for iteration/filtering) */\n all: ReporterEntry[]\n}\n\n/**\n * Cached database instance (null until loadReporters() called)\n */\nlet cached: ReportersDatabase | null = null\n\n/**\n * Load reporter database asynchronously with lazy loading\n *\n * Dynamic import prevents loading 1200+ reporters until explicitly requested.\n * Result is cached after first load for subsequent calls.\n *\n * @returns Promise resolving to indexed reporter database\n *\n * @example\n * const db = await loadReporters()\n * const reporters = db.byAbbreviation.get('f.2d') // Fast O(1) lookup\n */\nexport async function loadReporters(): Promise<ReportersDatabase> {\n if (cached) return cached\n\n // Dynamic import prevents loading until requested (keeps core bundle small)\n const data = await import(\"../../data/reporters.json\", {\n assert: { type: \"json\" },\n })\n\n const byAbbreviation = new Map<string, ReporterEntry[]>()\n const all: ReporterEntry[] = []\n\n // reporters.json structure: { \"A.\": [...], \"F.2d\": [...], ... }\n const reportersData = (data.default || data) as Record<\n string,\n ReporterEntry[]\n >\n\n // Build fast lookup index with lowercase normalization\n for (const [canonicalAbbr, reporters] of Object.entries(reportersData)) {\n for (const reporter of reporters) {\n all.push(reporter)\n\n // Index by all edition abbreviations\n for (const editionAbbr of Object.keys(reporter.editions)) {\n const key = editionAbbr.toLowerCase()\n if (!byAbbreviation.has(key)) {\n byAbbreviation.set(key, [])\n }\n byAbbreviation.get(key)!.push(reporter)\n }\n\n // Index all variations for fuzzy matching\n for (const [variant, canonical] of Object.entries(\n reporter.variations || {},\n )) {\n const variantKey = variant.toLowerCase()\n if (!byAbbreviation.has(variantKey)) {\n byAbbreviation.set(variantKey, [])\n }\n byAbbreviation.get(variantKey)!.push(reporter)\n }\n }\n }\n\n cached = {\n byAbbreviation,\n all,\n }\n return cached\n}\n\n/**\n * Get cached reporter database synchronously (degraded mode support)\n *\n * Returns null if reporters not loaded yet. This enables the library to\n * work in degraded mode without reporter validation.\n *\n * @returns Cached database or null if not loaded\n *\n * @example\n * const db = getReportersSync()\n * if (db) {\n * // Full mode: validate citations\n * } else {\n * // Degraded mode: extract without validation\n * }\n */\nexport function getReportersSync(): ReportersDatabase | null {\n return cached\n}\n\n/**\n * Find reporters by abbreviation (case-insensitive)\n *\n * Loads reporter database if not already loaded. Returns all reporters\n * matching the abbreviation (including variant forms).\n *\n * @param abbr - Reporter abbreviation to look up\n * @returns Promise resolving to matching reporters (empty array if none)\n *\n * @example\n * const reporters = await findReportersByAbbreviation('F.2d')\n * // [{ abbreviation: 'F.2d', name: 'Federal Reporter, Second Series', ... }]\n *\n * @example\n * const unknown = await findReportersByAbbreviation('NONEXISTENT')\n * // [] (empty array, not error)\n */\nexport async function findReportersByAbbreviation(\n abbr: string,\n): Promise<ReporterEntry[]> {\n const db = await loadReporters()\n return db.byAbbreviation.get(abbr.toLowerCase()) ?? []\n}\n"],"mappings":"mEAuEA,IAAI,EAAmC,KAcvC,eAAsB,GAA4C,CAChE,GAAI,EAAQ,OAAO,EAGnB,IAAM,EAAO,MAAM,OAAO,4BAA6B,CACrD,OAAQ,CAAE,KAAM,OAAQ,CACzB,EAEK,EAAiB,IAAI,IACrB,EAAuB,EAAE,CAGzB,EAAiB,EAAK,SAAW,EAMvC,IAAK,GAAM,CAAC,EAAe,KAAc,OAAO,QAAQ,EAAc,CACpE,IAAK,IAAM,KAAY,EAAW,CAChC,EAAI,KAAK,EAAS,CAGlB,IAAK,IAAM,KAAe,OAAO,KAAK,EAAS,SAAS,CAAE,CACxD,IAAM,EAAM,EAAY,aAAa,CAChC,EAAe,IAAI,EAAI,EAC1B,EAAe,IAAI,EAAK,EAAE,CAAC,CAE7B,EAAe,IAAI,EAAI,CAAE,KAAK,EAAS,CAIzC,IAAK,GAAM,CAAC,EAAS,KAAc,OAAO,QACxC,EAAS,YAAc,EAAE,CAC1B,CAAE,CACD,IAAM,EAAa,EAAQ,aAAa,CACnC,EAAe,IAAI,EAAW,EACjC,EAAe,IAAI,EAAY,EAAE,CAAC,CAEpC,EAAe,IAAI,EAAW,CAAE,KAAK,EAAS,EASpD,MAJA,GAAS,CACP,iBACA,MACD,CACM,EAmBT,SAAgB,GAA6C,CAC3D,OAAO,EAoBT,eAAsB,EACpB,EAC0B,CAE1B,OADW,MAAM,GAAe,EACtB,eAAe,IAAI,EAAK,aAAa,CAAC,EAAI,EAAE"}