eyecite-ts 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,338 @@
1
+ # eyecite-ts
2
+
3
+ TypeScript legal citation extraction library - port of Python [eyecite](https://github.com/freelawproject/eyecite).
4
+
5
+ Extract, validate, annotate, and resolve legal citations from court opinions and legal documents with zero runtime dependencies and a <50KB bundle size.
6
+
7
+ ## Features
8
+
9
+ - **Full citation extraction**: Case citations, statutes, journal articles, neutral citations, public laws, federal register
10
+ - **Short-form resolution**: Id./Ibid., supra, and short-form case citations resolved to their full antecedents
11
+ - **Reporter database**: 1235 reporters with variant matching and confidence scoring
12
+ - **Citation annotation**: HTML/Markdown markup with auto-escape and position tracking
13
+ - **Bundle optimization**: Tree-shakeable exports, lazy-loaded data, separate entry points
14
+ - **TypeScript native**: Discriminated unions, strict types, full IntelliSense
15
+
16
+ ## Installation
17
+
18
+ ```bash
19
+ npm install eyecite-ts
20
+ ```
21
+
22
+ ## Quick Start
23
+
24
+ ```typescript
25
+ import { extractCitations } from 'eyecite-ts'
26
+
27
+ const text = 'See Smith v. Jones, 500 F.2d 123 (9th Cir. 2020)'
28
+ const citations = extractCitations(text)
29
+
30
+ console.log(citations[0])
31
+ // {
32
+ // type: 'case',
33
+ // volume: 500,
34
+ // reporter: 'F.2d',
35
+ // page: 123,
36
+ // court: '9th Cir.',
37
+ // year: 2020,
38
+ // confidence: 0.85,
39
+ // span: { originalStart: 4, originalEnd: 48 }
40
+ // }
41
+ ```
42
+
43
+ ## Citation Extraction
44
+
45
+ ### Basic Usage
46
+
47
+ ```typescript
48
+ import { extractCitations } from 'eyecite-ts'
49
+
50
+ const text = `
51
+ See Smith v. Jones, 500 F.2d 123 (9th Cir. 2020).
52
+ Also 42 U.S.C. § 1983.
53
+ Compare 123 Harv. L. Rev. 456.
54
+ `
55
+ const citations = extractCitations(text)
56
+
57
+ citations.forEach(citation => {
58
+ console.log(citation.type) // 'case', 'statute', 'journal', etc.
59
+ })
60
+ ```
61
+
62
+ ### Async API
63
+
64
+ ```typescript
65
+ import { extractCitationsAsync } from 'eyecite-ts'
66
+
67
+ const citations = await extractCitationsAsync(text)
68
+ ```
69
+
70
+ ### Custom Patterns
71
+
72
+ ```typescript
73
+ import { extractCitations, casePatterns } from 'eyecite-ts'
74
+
75
+ // Extract only case citations
76
+ const citations = extractCitations(text, {
77
+ patterns: casePatterns
78
+ })
79
+ ```
80
+
81
+ ### Custom Cleaners
82
+
83
+ ```typescript
84
+ import { extractCitations, stripHtmlTags } from 'eyecite-ts'
85
+
86
+ // Use only HTML stripping, skip Unicode normalization
87
+ const citations = extractCitations(html, {
88
+ cleaners: [stripHtmlTags]
89
+ })
90
+ ```
91
+
92
+ ## Resolving Short-Form Citations
93
+
94
+ Short-form citations (Id., supra, short-form case) refer to earlier citations in the document. The resolution engine automatically links them to their full antecedents.
95
+
96
+ ### Convenience API
97
+
98
+ ```typescript
99
+ import { extractCitations } from 'eyecite-ts'
100
+
101
+ const text = `
102
+ Smith v. Jones, 500 F.2d 123 (2020).
103
+ Id. at 125.
104
+ Smith, supra, at 130.
105
+ 500 F.2d at 140.
106
+ `
107
+
108
+ // Convenience: extract + resolve in one call
109
+ const citations = extractCitations(text, { resolve: true })
110
+
111
+ // citations[1] is Id. citation
112
+ console.log(citations[1].resolution)
113
+ // {
114
+ // resolvedTo: 0, // Points to Smith v. Jones (index 0)
115
+ // confidence: 1.0,
116
+ // warnings: []
117
+ // }
118
+ ```
119
+
120
+ ### Power-User API
121
+
122
+ ```typescript
123
+ import { extractCitations, resolveCitations } from 'eyecite-ts'
124
+
125
+ // Step 1: Extract citations
126
+ const citations = extractCitations(text)
127
+
128
+ // Step 2: Resolve short-form citations
129
+ const resolved = resolveCitations(citations, text, {
130
+ scopeStrategy: 'paragraph', // Only resolve within paragraphs
131
+ fuzzyPartyMatching: true, // Enable fuzzy supra matching
132
+ partyMatchThreshold: 0.8, // Similarity threshold (0-1)
133
+ reportUnresolved: true // Report failure reasons
134
+ })
135
+ ```
136
+
137
+ ### Resolution Options
138
+
139
+ | Option | Type | Default | Description |
140
+ |--------|------|---------|-------------|
141
+ | `scopeStrategy` | `'paragraph'` \| `'section'` \| `'footnote'` \| `'none'` | `'paragraph'` | How far back to search for antecedents |
142
+ | `autoDetectParagraphs` | `boolean` | `true` | Auto-detect paragraph boundaries from text |
143
+ | `paragraphBoundaryPattern` | `RegExp` | `/\n\n+/` | Pattern to detect paragraphs |
144
+ | `fuzzyPartyMatching` | `boolean` | `true` | Enable fuzzy party name matching for supra |
145
+ | `partyMatchThreshold` | `number` | `0.8` | Similarity threshold (0-1) for fuzzy matching |
146
+ | `allowNestedResolution` | `boolean` | `false` | Allow Id. to resolve to other short-form citations |
147
+ | `reportUnresolved` | `boolean` | `true` | Report failure reasons for unresolved citations |
148
+
149
+ ### Resolution Examples
150
+
151
+ **Id. citations:**
152
+
153
+ ```typescript
154
+ const text = 'Smith v. Jones, 500 F.2d 123. Id. at 125.'
155
+ const citations = extractCitations(text, { resolve: true })
156
+
157
+ // citations[1].resolution.resolvedTo === 0 (points to Smith v. Jones)
158
+ ```
159
+
160
+ **Supra citations:**
161
+
162
+ ```typescript
163
+ const text = 'Smith v. Jones, 500 F.2d 123. See also Smith, supra, at 130.'
164
+ const citations = extractCitations(text, { resolve: true })
165
+
166
+ // citations[1].resolution.resolvedTo === 0 (party name matches "Smith")
167
+ ```
168
+
169
+ **Short-form case citations:**
170
+
171
+ ```typescript
172
+ const text = 'Brown v. Board, 347 U.S. 483 (1954). See 347 U.S. at 495.'
173
+ const citations = extractCitations(text, { resolve: true })
174
+
175
+ // citations[1].resolution.resolvedTo === 0 (volume/reporter matches)
176
+ ```
177
+
178
+ ### Handling Unresolved Citations
179
+
180
+ ```typescript
181
+ const text = 'Id. at 100.' // Orphan Id. with no preceding citation
182
+
183
+ const citations = extractCitations(text, { resolve: true })
184
+
185
+ console.log(citations[0].resolution)
186
+ // {
187
+ // resolvedTo: undefined,
188
+ // failureReason: 'No preceding full citation found',
189
+ // confidence: 0,
190
+ // warnings: []
191
+ // }
192
+ ```
193
+
194
+ To suppress unresolved warnings:
195
+
196
+ ```typescript
197
+ const citations = extractCitations(text, {
198
+ resolve: true,
199
+ resolutionOptions: {
200
+ reportUnresolved: false // Omits resolution field for unresolved citations
201
+ }
202
+ })
203
+ ```
204
+
205
+ ## Citation Validation
206
+
207
+ Validate case citations against the reporters database:
208
+
209
+ ```typescript
210
+ import { validateCitation } from 'eyecite-ts/data'
211
+
212
+ // Returns citations with adjusted confidence scores
213
+ const validated = await validateCitation(citations)
214
+
215
+ // Confidence adjustments:
216
+ // - +0.2 boost for reporter match
217
+ // - -0.3 penalty for reporter mismatch
218
+ // - -0.1 penalty for ambiguous reporter
219
+ ```
220
+
221
+ ## Citation Annotation
222
+
223
+ Add HTML/Markdown markup to citations:
224
+
225
+ ```typescript
226
+ import { annotate } from 'eyecite-ts/annotate'
227
+
228
+ // Template mode (simple)
229
+ const html = annotate(
230
+ text,
231
+ citations,
232
+ '<a href="{{url}}">{{text}}</a>'
233
+ )
234
+
235
+ // Callback mode (full control)
236
+ const html = annotate(text, citations, (citation, text) => {
237
+ const url = `https://example.com/${citation.volume}/${citation.reporter}/${citation.page}`
238
+ return `<a href="${url}">${text}</a>`
239
+ })
240
+ ```
241
+
242
+ Auto-escape is enabled by default for XSS protection:
243
+
244
+ ```typescript
245
+ // User input is automatically escaped
246
+ const html = annotate(text, citations, '<a>{{text}}</a>', {
247
+ autoEscape: true // default
248
+ })
249
+ ```
250
+
251
+ ## Bundle Size
252
+
253
+ Core library is optimized for tree-shaking:
254
+
255
+ - **Core extraction**: 2.5 KB gzipped
256
+ - **Reporter database**: 88.5 KB gzipped (lazy-loaded)
257
+ - **Annotation**: 0.5 KB gzipped
258
+
259
+ Import only what you need:
260
+
261
+ ```typescript
262
+ // Tree-shakeable imports
263
+ import { extractCitations } from 'eyecite-ts' // Core only
264
+ import { validateCitation } from 'eyecite-ts/data' // Core + data
265
+ import { annotate } from 'eyecite-ts/annotate' // Core + annotate
266
+ ```
267
+
268
+ ## Citation Types
269
+
270
+ All citation types are exported with full TypeScript types:
271
+
272
+ ```typescript
273
+ import type {
274
+ Citation,
275
+ FullCaseCitation,
276
+ StatuteCitation,
277
+ JournalCitation,
278
+ NeutralCitation,
279
+ PublicLawCitation,
280
+ FederalRegisterCitation,
281
+ IdCitation,
282
+ SupraCitation,
283
+ ShortFormCaseCitation
284
+ } from 'eyecite-ts'
285
+
286
+ // Discriminated union - switch on type
287
+ citations.forEach(citation => {
288
+ switch (citation.type) {
289
+ case 'case':
290
+ console.log(citation.reporter) // FullCaseCitation
291
+ break
292
+ case 'statute':
293
+ console.log(citation.title) // StatuteCitation
294
+ break
295
+ case 'id':
296
+ console.log(citation.pincite) // IdCitation
297
+ break
298
+ // etc.
299
+ }
300
+ })
301
+ ```
302
+
303
+ ## Architecture
304
+
305
+ Citation extraction follows a 4-stage pipeline:
306
+
307
+ 1. **Clean**: Remove HTML, normalize Unicode, fix smart quotes
308
+ 2. **Tokenize**: Apply regex patterns to find citation candidates
309
+ 3. **Extract**: Parse metadata (volume, reporter, page, etc.)
310
+ 4. **Translate**: Map positions from cleaned text → original text
311
+
312
+ All positions (spans) track both cleaned and original text offsets.
313
+
314
+ See [ARCHITECTURE.md](ARCHITECTURE.md) for details.
315
+
316
+ ## Development
317
+
318
+ ```bash
319
+ # Install dependencies
320
+ npm install
321
+
322
+ # Run tests
323
+ npm test
324
+
325
+ # Type checking
326
+ npm run typecheck
327
+
328
+ # Build
329
+ npm run build
330
+ ```
331
+
332
+ ## License
333
+
334
+ MIT
335
+
336
+ ## Credits
337
+
338
+ Ported from [eyecite](https://github.com/freelawproject/eyecite) (Python) by Free Law Project.
@@ -0,0 +1,2 @@
1
+ Object.defineProperty(exports,Symbol.toStringTag,{value:`Module`});function e(e,n,r={}){let{useCleanText:i=!1,autoEscape:a=!0,template:o,callback:s}=r,c=[...n].sort((e,t)=>{let n=i?e.span.cleanStart:e.span.originalStart;return(i?t.span.cleanStart:t.span.originalStart)-n}),l=e,u=new Map;for(let n of c){let r=i?n.span.cleanStart:n.span.originalStart,c=i?n.span.cleanEnd:n.span.originalEnd,d=``;if(s)d=s(n,e.substring(Math.max(0,r-30),Math.min(e.length,c+30)));else if(o){let e=l.substring(r,c),n=a?t(e):e;d=o.before+n+o.after}else continue;l=l.slice(0,r)+d+l.slice(c),u.set(r,r)}return{text:l,positionMap:u,skipped:[]}}function t(e){let t={"&":`&amp;`,"<":`&lt;`,">":`&gt;`,'"':`&quot;`,"'":`&#39;`,"/":`&#x2F;`};return e.replace(/[&<>"'\/]/g,e=>t[e])}exports.annotate=e;
2
+ //# sourceMappingURL=index.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.cjs","names":[],"sources":["../../src/annotate/annotate.ts"],"sourcesContent":["import type { Citation } from '../types/citation'\nimport type { AnnotationOptions, AnnotationResult } from './types'\n\n/**\n * Annotate citations in text with custom markup.\n *\n * Supports two modes:\n * - **Template mode**: Simple before/after wrapping (set `options.template`)\n * - **Callback mode**: Custom logic with full citation context (set `options.callback`)\n *\n * Citations are processed in reverse order to avoid position shifts invalidating\n * subsequent annotations. Position tracking maps original positions to new positions\n * after markup insertion.\n *\n * @param text - Original or cleaned text to annotate\n * @param citations - Citations to mark up (from extraction pipeline)\n * @param options - Annotation configuration\n * @returns Annotated text with position mapping\n *\n * @example Template mode\n * ```typescript\n * const result = annotate(text, citations, {\n * template: { before: '<cite>', after: '</cite>' }\n * })\n * // Result: \"See <cite>500 F.2d 123</cite>\"\n * ```\n *\n * @example Callback mode\n * ```typescript\n * const result = annotate(text, citations, {\n * callback: (citation) => {\n * if (citation.type === 'case') {\n * return `<a href=\"/cases/${citation.volume}\">${citation.matchedText}</a>`\n * }\n * return citation.matchedText\n * }\n * })\n * ```\n *\n * @example Position tracking\n * ```typescript\n * const result = annotate(text, citations, { template: { before: '<mark>', after: '</mark>' } })\n * // result.positionMap tracks how positions shifted\n * const originalPos = 10\n * const newPos = result.positionMap.get(originalPos)\n * ```\n */\nexport function annotate(\n text: string,\n citations: Citation[],\n options: AnnotationOptions = {}\n): AnnotationResult {\n const {\n useCleanText = false,\n autoEscape = true, // Secure by default\n template,\n callback,\n } = options\n\n // Sort reverse to avoid position shifts invalidating subsequent annotations\n const sorted = [...citations].sort((a, b) => {\n const aPos = useCleanText ? a.span.cleanStart : a.span.originalStart\n const bPos = useCleanText ? b.span.cleanStart : b.span.originalStart\n return bPos - aPos // Reverse for backward iteration\n })\n\n let result = text\n const positionMap = new Map<number, number>()\n\n for (const citation of sorted) {\n const start = useCleanText ? citation.span.cleanStart : citation.span.originalStart\n const end = useCleanText ? citation.span.cleanEnd : citation.span.originalEnd\n\n let markup = ''\n\n if (callback) {\n // Callback mode: developer provides full logic\n const surrounding = text.substring(\n Math.max(0, start - 30),\n Math.min(text.length, end + 30)\n )\n markup = callback(citation, surrounding)\n } else if (template) {\n // Template mode: simple before/after wrapping\n const citationText = result.substring(start, end)\n const escaped = autoEscape ? escapeHtmlEntities(citationText) : citationText\n markup = template.before + escaped + template.after\n } else {\n // No annotation specified\n continue\n }\n\n // Insert annotation (working backwards preserves positions for later citations)\n result = result.slice(0, start) + markup + result.slice(end)\n\n // Track original position to new position (before this annotation was added)\n positionMap.set(start, start)\n }\n\n return { text: result, positionMap, skipped: [] }\n}\n\n/**\n * Escape HTML entities to prevent XSS injection.\n *\n * Converts special HTML characters to their entity equivalents:\n * - `&` → `&amp;`\n * - `<` → `&lt;`\n * - `>` → `&gt;`\n * - `\"` → `&quot;`\n * - `'` → `&#39;`\n * - `/` → `&#x2F;`\n *\n * @param text - Text to escape\n * @returns Escaped text safe for HTML insertion\n */\nfunction escapeHtmlEntities(text: string): string {\n const map: Record<string, string> = {\n '&': '&amp;',\n '<': '&lt;',\n '>': '&gt;',\n '\"': '&quot;',\n \"'\": '&#39;',\n '/': '&#x2F;',\n }\n return text.replace(/[&<>\"'\\/]/g, (char) => map[char])\n}\n"],"mappings":"mEA+CA,SAAgB,EACd,EACA,EACA,EAA6B,EAAE,CACb,CAClB,GAAM,CACJ,eAAe,GACf,aAAa,GACb,WACA,YACE,EAGE,EAAS,CAAC,GAAG,EAAU,CAAC,MAAM,EAAG,IAAM,CAC3C,IAAM,EAAO,EAAe,EAAE,KAAK,WAAa,EAAE,KAAK,cAEvD,OADa,EAAe,EAAE,KAAK,WAAa,EAAE,KAAK,eACzC,GACd,CAEE,EAAS,EACP,EAAc,IAAI,IAExB,IAAK,IAAM,KAAY,EAAQ,CAC7B,IAAM,EAAQ,EAAe,EAAS,KAAK,WAAa,EAAS,KAAK,cAChE,EAAM,EAAe,EAAS,KAAK,SAAW,EAAS,KAAK,YAE9D,EAAS,GAEb,GAAI,EAMF,EAAS,EAAS,EAJE,EAAK,UACvB,KAAK,IAAI,EAAG,EAAQ,GAAG,CACvB,KAAK,IAAI,EAAK,OAAQ,EAAM,GAAG,CAChC,CACuC,SAC/B,EAAU,CAEnB,IAAM,EAAe,EAAO,UAAU,EAAO,EAAI,CAC3C,EAAU,EAAa,EAAmB,EAAa,CAAG,EAChE,EAAS,EAAS,OAAS,EAAU,EAAS,WAG9C,SAIF,EAAS,EAAO,MAAM,EAAG,EAAM,CAAG,EAAS,EAAO,MAAM,EAAI,CAG5D,EAAY,IAAI,EAAO,EAAM,CAG/B,MAAO,CAAE,KAAM,EAAQ,cAAa,QAAS,EAAE,CAAE,CAiBnD,SAAS,EAAmB,EAAsB,CAChD,IAAM,EAA8B,CAClC,IAAK,QACL,IAAK,OACL,IAAK,OACL,IAAK,SACL,IAAK,QACL,IAAK,SACN,CACD,OAAO,EAAK,QAAQ,aAAe,GAAS,EAAI,GAAM"}
@@ -0,0 +1,163 @@
1
+ import { t as Citation } from "../citation-BcY5zzWb.cjs";
2
+
3
+ //#region src/annotate/types.d.ts
4
+ /**
5
+ * Options for annotating citations in text.
6
+ *
7
+ * Supports two modes:
8
+ * - **Template mode**: Simple before/after string wrapping (e.g., `<cite>...</cite>`)
9
+ * - **Callback mode**: Full custom annotation logic with access to citation and surrounding context
10
+ *
11
+ * @example Template mode
12
+ * ```typescript
13
+ * annotate(text, citations, {
14
+ * template: { before: '<mark data-type="case">', after: '</mark>' }
15
+ * })
16
+ * ```
17
+ *
18
+ * @example Callback mode
19
+ * ```typescript
20
+ * annotate(text, citations, {
21
+ * callback: (citation, surrounding) => {
22
+ * if (citation.type === 'case') {
23
+ * return `<a href="/cases/${citation.volume}-${citation.page}">${citation.matchedText}</a>`
24
+ * }
25
+ * return `<span>${citation.matchedText}</span>`
26
+ * }
27
+ * })
28
+ * ```
29
+ */
30
+ interface AnnotationOptions {
31
+ /**
32
+ * Apply annotations to cleaned text (true) or original text (false).
33
+ *
34
+ * - `true`: Use citation.span.cleanStart/End positions
35
+ * - `false`: Use citation.span.originalStart/End positions
36
+ *
37
+ * @default false
38
+ */
39
+ useCleanText?: boolean;
40
+ /**
41
+ * Auto-escape HTML entities to prevent XSS injection.
42
+ *
43
+ * When enabled, special HTML characters are escaped:
44
+ * - `<` → `&lt;`
45
+ * - `>` → `&gt;`
46
+ * - `&` → `&amp;`
47
+ * - `"` → `&quot;`
48
+ * - `'` → `&#39;`
49
+ * - `/` → `&#x2F;`
50
+ *
51
+ * **SECURITY WARNING:** Disabling this option introduces XSS vulnerability
52
+ * if the text contains untrusted user input. Only disable if you are certain
53
+ * the text comes from a trusted source.
54
+ *
55
+ * @default true (secure by default)
56
+ */
57
+ autoEscape?: boolean;
58
+ /**
59
+ * Callback for custom annotation logic.
60
+ *
61
+ * Receives each citation and surrounding context (±30 characters),
62
+ * returns the complete markup string to replace the citation text.
63
+ *
64
+ * @param citation - The citation to annotate
65
+ * @param surrounding - Text around the citation (for context-aware markup)
66
+ * @returns Complete markup string (replaces citation.matchedText)
67
+ */
68
+ callback?: (citation: Citation, surrounding: string) => string;
69
+ /**
70
+ * Template mode: simple before/after markup strings.
71
+ *
72
+ * The citation text (with auto-escaping applied if enabled) is wrapped
73
+ * with these strings: `template.before + citationText + template.after`
74
+ *
75
+ * @example
76
+ * ```typescript
77
+ * template: {
78
+ * before: '<cite data-type="case">',
79
+ * after: '</cite>'
80
+ * }
81
+ * // Result: <cite data-type="case">500 F.2d 123</cite>
82
+ * ```
83
+ */
84
+ template?: {
85
+ /** Markup inserted before citation text */before: string; /** Markup inserted after citation text */
86
+ after: string;
87
+ };
88
+ }
89
+ /**
90
+ * Result of annotation operation.
91
+ */
92
+ interface AnnotationResult {
93
+ /**
94
+ * Annotated text with markup inserted at citation positions.
95
+ */
96
+ text: string;
97
+ /**
98
+ * Position mapping from original positions to annotated positions.
99
+ *
100
+ * Tracks how citation positions shift after markup insertion.
101
+ * Useful for updating external indices (search, highlighting, etc.)
102
+ *
103
+ * Maps: original position → new position after annotation
104
+ */
105
+ positionMap: Map<number, number>;
106
+ /**
107
+ * Citations that couldn't be annotated.
108
+ *
109
+ * Currently empty (all citations are annotated if callback/template provided).
110
+ * Future versions may skip overlapping citations or invalid positions.
111
+ */
112
+ skipped: Citation[];
113
+ }
114
+ //#endregion
115
+ //#region src/annotate/annotate.d.ts
116
+ /**
117
+ * Annotate citations in text with custom markup.
118
+ *
119
+ * Supports two modes:
120
+ * - **Template mode**: Simple before/after wrapping (set `options.template`)
121
+ * - **Callback mode**: Custom logic with full citation context (set `options.callback`)
122
+ *
123
+ * Citations are processed in reverse order to avoid position shifts invalidating
124
+ * subsequent annotations. Position tracking maps original positions to new positions
125
+ * after markup insertion.
126
+ *
127
+ * @param text - Original or cleaned text to annotate
128
+ * @param citations - Citations to mark up (from extraction pipeline)
129
+ * @param options - Annotation configuration
130
+ * @returns Annotated text with position mapping
131
+ *
132
+ * @example Template mode
133
+ * ```typescript
134
+ * const result = annotate(text, citations, {
135
+ * template: { before: '<cite>', after: '</cite>' }
136
+ * })
137
+ * // Result: "See <cite>500 F.2d 123</cite>"
138
+ * ```
139
+ *
140
+ * @example Callback mode
141
+ * ```typescript
142
+ * const result = annotate(text, citations, {
143
+ * callback: (citation) => {
144
+ * if (citation.type === 'case') {
145
+ * return `<a href="/cases/${citation.volume}">${citation.matchedText}</a>`
146
+ * }
147
+ * return citation.matchedText
148
+ * }
149
+ * })
150
+ * ```
151
+ *
152
+ * @example Position tracking
153
+ * ```typescript
154
+ * const result = annotate(text, citations, { template: { before: '<mark>', after: '</mark>' } })
155
+ * // result.positionMap tracks how positions shifted
156
+ * const originalPos = 10
157
+ * const newPos = result.positionMap.get(originalPos)
158
+ * ```
159
+ */
160
+ declare function annotate(text: string, citations: Citation[], options?: AnnotationOptions): AnnotationResult;
161
+ //#endregion
162
+ export { AnnotationOptions, AnnotationResult, annotate };
163
+ //# sourceMappingURL=index.d.cts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.cts","names":[],"sources":["../../src/annotate/types.ts","../../src/annotate/annotate.ts"],"mappings":";;;;;AA4BA;;;;;;;;;;;;;;;AAoEA;;;;;;;;;UApEiB,iBAAA;EA0FN;;;;ACvEX;;;;EDVE,YAAA;;;;;;;;;;;;;;;;;;EAmBA,UAAA;;;;;;;;;;;EAYA,QAAA,IAAY,QAAA,EAAU,QAAA,EAAU,WAAA;;;;;;;;;;;;;;;;EAiBhC,QAAA;+CAEE,MAAA;IAEA,KAAA;EAAA;AAAA;;;;UAOa,gBAAA;;;;EAIf,IAAA;;;;;;;;;EAUA,WAAA,EAAa,GAAA;;;;;;;EAQb,OAAA,EAAS,QAAA;AAAA;;;;AA1FX;;;;;;;;;;;;;;;AAoEA;;;;;;;;;;;;;;ACjDA;;;;;;;;;;;;;;iBAAgB,QAAA,CACd,IAAA,UACA,SAAA,EAAW,QAAA,IACX,OAAA,GAAS,iBAAA,GACR,gBAAA"}
@@ -0,0 +1,163 @@
1
+ import { t as Citation } from "../citation-8_GvfEuj.mjs";
2
+
3
+ //#region src/annotate/types.d.ts
4
+ /**
5
+ * Options for annotating citations in text.
6
+ *
7
+ * Supports two modes:
8
+ * - **Template mode**: Simple before/after string wrapping (e.g., `<cite>...</cite>`)
9
+ * - **Callback mode**: Full custom annotation logic with access to citation and surrounding context
10
+ *
11
+ * @example Template mode
12
+ * ```typescript
13
+ * annotate(text, citations, {
14
+ * template: { before: '<mark data-type="case">', after: '</mark>' }
15
+ * })
16
+ * ```
17
+ *
18
+ * @example Callback mode
19
+ * ```typescript
20
+ * annotate(text, citations, {
21
+ * callback: (citation, surrounding) => {
22
+ * if (citation.type === 'case') {
23
+ * return `<a href="/cases/${citation.volume}-${citation.page}">${citation.matchedText}</a>`
24
+ * }
25
+ * return `<span>${citation.matchedText}</span>`
26
+ * }
27
+ * })
28
+ * ```
29
+ */
30
+ interface AnnotationOptions {
31
+ /**
32
+ * Apply annotations to cleaned text (true) or original text (false).
33
+ *
34
+ * - `true`: Use citation.span.cleanStart/End positions
35
+ * - `false`: Use citation.span.originalStart/End positions
36
+ *
37
+ * @default false
38
+ */
39
+ useCleanText?: boolean;
40
+ /**
41
+ * Auto-escape HTML entities to prevent XSS injection.
42
+ *
43
+ * When enabled, special HTML characters are escaped:
44
+ * - `<` → `&lt;`
45
+ * - `>` → `&gt;`
46
+ * - `&` → `&amp;`
47
+ * - `"` → `&quot;`
48
+ * - `'` → `&#39;`
49
+ * - `/` → `&#x2F;`
50
+ *
51
+ * **SECURITY WARNING:** Disabling this option introduces XSS vulnerability
52
+ * if the text contains untrusted user input. Only disable if you are certain
53
+ * the text comes from a trusted source.
54
+ *
55
+ * @default true (secure by default)
56
+ */
57
+ autoEscape?: boolean;
58
+ /**
59
+ * Callback for custom annotation logic.
60
+ *
61
+ * Receives each citation and surrounding context (±30 characters),
62
+ * returns the complete markup string to replace the citation text.
63
+ *
64
+ * @param citation - The citation to annotate
65
+ * @param surrounding - Text around the citation (for context-aware markup)
66
+ * @returns Complete markup string (replaces citation.matchedText)
67
+ */
68
+ callback?: (citation: Citation, surrounding: string) => string;
69
+ /**
70
+ * Template mode: simple before/after markup strings.
71
+ *
72
+ * The citation text (with auto-escaping applied if enabled) is wrapped
73
+ * with these strings: `template.before + citationText + template.after`
74
+ *
75
+ * @example
76
+ * ```typescript
77
+ * template: {
78
+ * before: '<cite data-type="case">',
79
+ * after: '</cite>'
80
+ * }
81
+ * // Result: <cite data-type="case">500 F.2d 123</cite>
82
+ * ```
83
+ */
84
+ template?: {
85
+ /** Markup inserted before citation text */before: string; /** Markup inserted after citation text */
86
+ after: string;
87
+ };
88
+ }
89
+ /**
90
+ * Result of annotation operation.
91
+ */
92
+ interface AnnotationResult {
93
+ /**
94
+ * Annotated text with markup inserted at citation positions.
95
+ */
96
+ text: string;
97
+ /**
98
+ * Position mapping from original positions to annotated positions.
99
+ *
100
+ * Tracks how citation positions shift after markup insertion.
101
+ * Useful for updating external indices (search, highlighting, etc.)
102
+ *
103
+ * Maps: original position → new position after annotation
104
+ */
105
+ positionMap: Map<number, number>;
106
+ /**
107
+ * Citations that couldn't be annotated.
108
+ *
109
+ * Currently empty (all citations are annotated if callback/template provided).
110
+ * Future versions may skip overlapping citations or invalid positions.
111
+ */
112
+ skipped: Citation[];
113
+ }
114
+ //#endregion
115
+ //#region src/annotate/annotate.d.ts
116
+ /**
117
+ * Annotate citations in text with custom markup.
118
+ *
119
+ * Supports two modes:
120
+ * - **Template mode**: Simple before/after wrapping (set `options.template`)
121
+ * - **Callback mode**: Custom logic with full citation context (set `options.callback`)
122
+ *
123
+ * Citations are processed in reverse order to avoid position shifts invalidating
124
+ * subsequent annotations. Position tracking maps original positions to new positions
125
+ * after markup insertion.
126
+ *
127
+ * @param text - Original or cleaned text to annotate
128
+ * @param citations - Citations to mark up (from extraction pipeline)
129
+ * @param options - Annotation configuration
130
+ * @returns Annotated text with position mapping
131
+ *
132
+ * @example Template mode
133
+ * ```typescript
134
+ * const result = annotate(text, citations, {
135
+ * template: { before: '<cite>', after: '</cite>' }
136
+ * })
137
+ * // Result: "See <cite>500 F.2d 123</cite>"
138
+ * ```
139
+ *
140
+ * @example Callback mode
141
+ * ```typescript
142
+ * const result = annotate(text, citations, {
143
+ * callback: (citation) => {
144
+ * if (citation.type === 'case') {
145
+ * return `<a href="/cases/${citation.volume}">${citation.matchedText}</a>`
146
+ * }
147
+ * return citation.matchedText
148
+ * }
149
+ * })
150
+ * ```
151
+ *
152
+ * @example Position tracking
153
+ * ```typescript
154
+ * const result = annotate(text, citations, { template: { before: '<mark>', after: '</mark>' } })
155
+ * // result.positionMap tracks how positions shifted
156
+ * const originalPos = 10
157
+ * const newPos = result.positionMap.get(originalPos)
158
+ * ```
159
+ */
160
+ declare function annotate(text: string, citations: Citation[], options?: AnnotationOptions): AnnotationResult;
161
+ //#endregion
162
+ export { AnnotationOptions, AnnotationResult, annotate };
163
+ //# sourceMappingURL=index.d.mts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.mts","names":[],"sources":["../../src/annotate/types.ts","../../src/annotate/annotate.ts"],"mappings":";;;;;AA4BA;;;;;;;;;;;;;;;AAoEA;;;;;;;;;UApEiB,iBAAA;EA0FN;;;;ACvEX;;;;EDVE,YAAA;;;;;;;;;;;;;;;;;;EAmBA,UAAA;;;;;;;;;;;EAYA,QAAA,IAAY,QAAA,EAAU,QAAA,EAAU,WAAA;;;;;;;;;;;;;;;;EAiBhC,QAAA;+CAEE,MAAA;IAEA,KAAA;EAAA;AAAA;;;;UAOa,gBAAA;;;;EAIf,IAAA;;;;;;;;;EAUA,WAAA,EAAa,GAAA;;;;;;;EAQb,OAAA,EAAS,QAAA;AAAA;;;;AA1FX;;;;;;;;;;;;;;;AAoEA;;;;;;;;;;;;;;ACjDA;;;;;;;;;;;;;;iBAAgB,QAAA,CACd,IAAA,UACA,SAAA,EAAW,QAAA,IACX,OAAA,GAAS,iBAAA,GACR,gBAAA"}
@@ -0,0 +1,2 @@
1
+ function e(e,n,r={}){let{useCleanText:i=!1,autoEscape:a=!0,template:o,callback:s}=r,c=[...n].sort((e,t)=>{let n=i?e.span.cleanStart:e.span.originalStart;return(i?t.span.cleanStart:t.span.originalStart)-n}),l=e,u=new Map;for(let n of c){let r=i?n.span.cleanStart:n.span.originalStart,c=i?n.span.cleanEnd:n.span.originalEnd,d=``;if(s)d=s(n,e.substring(Math.max(0,r-30),Math.min(e.length,c+30)));else if(o){let e=l.substring(r,c),n=a?t(e):e;d=o.before+n+o.after}else continue;l=l.slice(0,r)+d+l.slice(c),u.set(r,r)}return{text:l,positionMap:u,skipped:[]}}function t(e){let t={"&":`&amp;`,"<":`&lt;`,">":`&gt;`,'"':`&quot;`,"'":`&#39;`,"/":`&#x2F;`};return e.replace(/[&<>"'\/]/g,e=>t[e])}export{e as annotate};
2
+ //# sourceMappingURL=index.mjs.map