eyecite-ts 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,17 +1,27 @@
1
1
  # eyecite-ts
2
2
 
3
- TypeScript legal citation extraction library - port of Python [eyecite](https://github.com/freelawproject/eyecite).
3
+ [![CI](https://github.com/medelman17/eyecite-ts/actions/workflows/ci.yml/badge.svg)](https://github.com/medelman17/eyecite-ts/actions/workflows/ci.yml)
4
+ [![codecov](https://codecov.io/gh/medelman17/eyecite-ts/branch/main/graph/badge.svg)](https://codecov.io/gh/medelman17/eyecite-ts)
5
+ [![npm version](https://img.shields.io/npm/v/eyecite-ts.svg)](https://www.npmjs.com/package/eyecite-ts)
6
+ [![npm bundle size](https://img.shields.io/bundlephobia/minzip/eyecite-ts)](https://bundlephobia.com/package/eyecite-ts)
7
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
8
+ [![Node.js](https://img.shields.io/node/v/eyecite-ts.svg)](https://nodejs.org)
9
+ [![TypeScript](https://img.shields.io/badge/TypeScript-5.9-blue.svg)](https://www.typescriptlang.org/)
10
+ [![Zero Dependencies](https://img.shields.io/badge/dependencies-0-brightgreen.svg)](https://www.npmjs.com/package/eyecite-ts)
4
11
 
5
- Extract, validate, annotate, and resolve legal citations from court opinions and legal documents with zero runtime dependencies and a <50KB bundle size.
12
+ TypeScript legal citation extraction library port of Python [eyecite](https://github.com/freelawproject/eyecite).
13
+
14
+ Extract, resolve, and annotate legal citations from court opinions and legal documents with zero runtime dependencies.
6
15
 
7
16
  ## Features
8
17
 
9
18
  - **Full citation extraction**: Case citations, statutes, journal articles, neutral citations, public laws, federal register
10
19
  - **Short-form resolution**: Id./Ibid., supra, and short-form case citations resolved to their full antecedents
11
- - **Reporter database**: 1235 reporters with variant matching and confidence scoring
12
- - **Citation annotation**: HTML/Markdown markup with auto-escape and position tracking
13
- - **Bundle optimization**: Tree-shakeable exports, lazy-loaded data, separate entry points
14
- - **TypeScript native**: Discriminated unions, strict types, full IntelliSense
20
+ - **Reporter database**: 1,200+ reporters with variant matching and confidence scoring
21
+ - **Citation annotation**: HTML markup with auto-escape XSS protection and position tracking
22
+ - **Bundle optimization**: Tree-shakeable exports, lazy-loaded reporter data, separate entry points
23
+ - **TypeScript native**: Discriminated unions, conditional types, type guards, full IntelliSense
24
+ - **Zero dependencies**: No runtime dependencies, 4.4KB gzipped core bundle
15
25
 
16
26
  ## Installation
17
27
 
@@ -36,13 +46,13 @@ console.log(citations[0])
36
46
  // court: '9th Cir.',
37
47
  // year: 2020,
38
48
  // confidence: 0.85,
39
- // span: { originalStart: 4, originalEnd: 48 }
49
+ // span: { originalStart: 4, originalEnd: 48, cleanStart: 4, cleanEnd: 48 }
40
50
  // }
41
51
  ```
42
52
 
43
53
  ## Citation Extraction
44
54
 
45
- ### Basic Usage
55
+ ### Multiple Citation Types
46
56
 
47
57
  ```typescript
48
58
  import { extractCitations } from 'eyecite-ts'
@@ -70,7 +80,8 @@ const citations = await extractCitationsAsync(text)
70
80
  ### Custom Patterns
71
81
 
72
82
  ```typescript
73
- import { extractCitations, casePatterns } from 'eyecite-ts'
83
+ import { extractCitations } from 'eyecite-ts'
84
+ import { casePatterns } from 'eyecite-ts'
74
85
 
75
86
  // Extract only case citations
76
87
  const citations = extractCitations(text, {
@@ -81,17 +92,17 @@ const citations = extractCitations(text, {
81
92
  ### Custom Cleaners
82
93
 
83
94
  ```typescript
84
- import { extractCitations, stripHtmlTags } from 'eyecite-ts'
95
+ import { extractCitations, cleanText } from 'eyecite-ts'
85
96
 
86
- // Use only HTML stripping, skip Unicode normalization
97
+ // Use only HTML stripping
87
98
  const citations = extractCitations(html, {
88
- cleaners: [stripHtmlTags]
99
+ cleaners: [(text) => text.replace(/<[^>]+>/g, '')]
89
100
  })
90
101
  ```
91
102
 
92
103
  ## Resolving Short-Form Citations
93
104
 
94
- Short-form citations (Id., supra, short-form case) refer to earlier citations in the document. The resolution engine automatically links them to their full antecedents.
105
+ Short-form citations (Id., supra, short-form case) refer to earlier citations in the document. The resolution engine links them to their full antecedents.
95
106
 
96
107
  ### Convenience API
97
108
 
@@ -105,16 +116,11 @@ const text = `
105
116
  500 F.2d at 140.
106
117
  `
107
118
 
108
- // Convenience: extract + resolve in one call
109
119
  const citations = extractCitations(text, { resolve: true })
110
120
 
111
121
  // citations[1] is Id. citation
112
122
  console.log(citations[1].resolution)
113
- // {
114
- // resolvedTo: 0, // Points to Smith v. Jones (index 0)
115
- // confidence: 1.0,
116
- // warnings: []
117
- // }
123
+ // { resolvedTo: 0, confidence: 1.0 }
118
124
  ```
119
125
 
120
126
  ### Power-User API
@@ -122,15 +128,13 @@ console.log(citations[1].resolution)
122
128
  ```typescript
123
129
  import { extractCitations, resolveCitations } from 'eyecite-ts'
124
130
 
125
- // Step 1: Extract citations
126
131
  const citations = extractCitations(text)
127
132
 
128
- // Step 2: Resolve short-form citations
129
133
  const resolved = resolveCitations(citations, text, {
130
- scopeStrategy: 'paragraph', // Only resolve within paragraphs
131
- fuzzyPartyMatching: true, // Enable fuzzy supra matching
132
- partyMatchThreshold: 0.8, // Similarity threshold (0-1)
133
- reportUnresolved: true // Report failure reasons
134
+ scopeStrategy: 'paragraph',
135
+ fuzzyPartyMatching: true,
136
+ partyMatchThreshold: 0.8,
137
+ reportUnresolved: true
134
138
  })
135
139
  ```
136
140
 
@@ -153,125 +157,91 @@ const resolved = resolveCitations(citations, text, {
153
157
  ```typescript
154
158
  const text = 'Smith v. Jones, 500 F.2d 123. Id. at 125.'
155
159
  const citations = extractCitations(text, { resolve: true })
156
-
157
- // citations[1].resolution.resolvedTo === 0 (points to Smith v. Jones)
160
+ // citations[1].resolution.resolvedTo === 0
158
161
  ```
159
162
 
160
163
  **Supra citations:**
161
164
 
162
165
  ```typescript
163
- const text = 'Smith v. Jones, 500 F.2d 123. See also Smith, supra, at 130.'
166
+ const text = 'Smith v. Jones, 500 F.2d 123. Smith, supra, at 130.'
164
167
  const citations = extractCitations(text, { resolve: true })
165
-
166
168
  // citations[1].resolution.resolvedTo === 0 (party name matches "Smith")
167
169
  ```
168
170
 
169
171
  **Short-form case citations:**
170
172
 
171
173
  ```typescript
172
- const text = 'Brown v. Board, 347 U.S. 483 (1954). See 347 U.S. at 495.'
174
+ const text = 'Brown v. Board, 347 U.S. 483. See 347 U.S. at 495.'
173
175
  const citations = extractCitations(text, { resolve: true })
174
-
175
176
  // citations[1].resolution.resolvedTo === 0 (volume/reporter matches)
176
177
  ```
177
178
 
178
- ### Handling Unresolved Citations
179
+ **Unresolved citations:**
179
180
 
180
181
  ```typescript
181
182
  const text = 'Id. at 100.' // Orphan Id. with no preceding citation
182
-
183
183
  const citations = extractCitations(text, { resolve: true })
184
-
185
- console.log(citations[0].resolution)
186
- // {
187
- // resolvedTo: undefined,
188
- // failureReason: 'No preceding full citation found',
189
- // confidence: 0,
190
- // warnings: []
191
- // }
192
- ```
193
-
194
- To suppress unresolved warnings:
195
-
196
- ```typescript
197
- const citations = extractCitations(text, {
198
- resolve: true,
199
- resolutionOptions: {
200
- reportUnresolved: false // Omits resolution field for unresolved citations
201
- }
202
- })
203
- ```
204
-
205
- ## Citation Validation
206
-
207
- Validate case citations against the reporters database:
208
-
209
- ```typescript
210
- import { validateCitation } from 'eyecite-ts/data'
211
-
212
- // Returns citations with adjusted confidence scores
213
- const validated = await validateCitation(citations)
214
-
215
- // Confidence adjustments:
216
- // - +0.2 boost for reporter match
217
- // - -0.3 penalty for reporter mismatch
218
- // - -0.1 penalty for ambiguous reporter
184
+ // citations[0].resolution.failureReason === 'No preceding full case citation found'
219
185
  ```
220
186
 
221
187
  ## Citation Annotation
222
188
 
223
- Add HTML/Markdown markup to citations:
189
+ Add HTML markup to citations in text:
224
190
 
225
191
  ```typescript
226
192
  import { annotate } from 'eyecite-ts/annotate'
193
+ import { extractCitations } from 'eyecite-ts'
194
+
195
+ const text = 'See Smith v. Jones, 500 F.2d 123 (2020).'
196
+ const citations = extractCitations(text)
227
197
 
228
- // Template mode (simple)
229
- const html = annotate(
230
- text,
231
- citations,
232
- '<a href="{{url}}">{{text}}</a>'
233
- )
198
+ // Template mode
199
+ const result = annotate(text, citations, {
200
+ template: { before: '<cite>', after: '</cite>' }
201
+ })
202
+ // result.text === 'See Smith v. Jones, <cite>500 F.2d 123</cite> (2020).'
234
203
 
235
204
  // Callback mode (full control)
236
- const html = annotate(text, citations, (citation, text) => {
237
- const url = `https://example.com/${citation.volume}/${citation.reporter}/${citation.page}`
238
- return `<a href="${url}">${text}</a>`
205
+ const result2 = annotate(text, citations, {
206
+ callback: (citation, surrounding) => {
207
+ if (citation.type === 'case') {
208
+ return `<a href="/cases/${citation.volume}">${citation.matchedText}</a>`
209
+ }
210
+ return citation.matchedText
211
+ }
239
212
  })
240
213
  ```
241
214
 
242
215
  Auto-escape is enabled by default for XSS protection:
243
216
 
244
217
  ```typescript
245
- // User input is automatically escaped
246
- const html = annotate(text, citations, '<a>{{text}}</a>', {
247
- autoEscape: true // default
218
+ const result = annotate(text, citations, {
219
+ template: { before: '<cite>', after: '</cite>' },
220
+ autoEscape: true // default — escapes &, <, >, ", ', /
248
221
  })
249
222
  ```
250
223
 
251
- ## Bundle Size
224
+ ## Reporter Validation
252
225
 
253
- Core library is optimized for tree-shaking:
254
-
255
- - **Core extraction**: 2.5 KB gzipped
256
- - **Reporter database**: 88.5 KB gzipped (lazy-loaded)
257
- - **Annotation**: 0.5 KB gzipped
258
-
259
- Import only what you need:
226
+ Validate case citations against the reporters database:
260
227
 
261
228
  ```typescript
262
- // Tree-shakeable imports
263
- import { extractCitations } from 'eyecite-ts' // Core only
264
- import { validateCitation } from 'eyecite-ts/data' // Core + data
265
- import { annotate } from 'eyecite-ts/annotate' // Core + annotate
229
+ import { extractWithValidation } from 'eyecite-ts'
230
+
231
+ const validated = await extractWithValidation(text, { validate: true })
232
+ // Confidence adjustments:
233
+ // +0.2 boost for reporter match
234
+ // -0.3 penalty for unknown reporter
235
+ // -0.1 per extra match for ambiguous reporter
266
236
  ```
267
237
 
268
- ## Citation Types
238
+ ## Type System
269
239
 
270
- All citation types are exported with full TypeScript types:
240
+ All citation types use a discriminated union on the `type` field:
271
241
 
272
242
  ```typescript
273
243
  import type {
274
- Citation,
244
+ Citation, // Union of all 9 types
275
245
  FullCaseCitation,
276
246
  StatuteCitation,
277
247
  JournalCitation,
@@ -280,24 +250,69 @@ import type {
280
250
  FederalRegisterCitation,
281
251
  IdCitation,
282
252
  SupraCitation,
283
- ShortFormCaseCitation
253
+ ShortFormCaseCitation,
254
+ CitationOfType, // Extract subtype: CitationOfType<'case'> = FullCaseCitation
255
+ ExtractorMap, // Maps FullCitationType keys to citation subtypes
256
+ FullCitation, // Union of full citation types
257
+ ShortFormCitation, // Union of short-form types
284
258
  } from 'eyecite-ts'
259
+ ```
285
260
 
286
- // Discriminated union - switch on type
287
- citations.forEach(citation => {
288
- switch (citation.type) {
289
- case 'case':
290
- console.log(citation.reporter) // FullCaseCitation
291
- break
292
- case 'statute':
293
- console.log(citation.title) // StatuteCitation
294
- break
295
- case 'id':
296
- console.log(citation.pincite) // IdCitation
297
- break
298
- // etc.
299
- }
300
- })
261
+ ### Type Guards
262
+
263
+ ```typescript
264
+ import {
265
+ isFullCitation,
266
+ isShortFormCitation,
267
+ isCaseCitation,
268
+ isCitationType,
269
+ assertUnreachable
270
+ } from 'eyecite-ts'
271
+
272
+ // Specific guards
273
+ if (isFullCitation(citation)) {
274
+ // citation: FullCitation
275
+ }
276
+
277
+ // Generic guard — narrows to any specific type
278
+ if (isCitationType(citation, 'statute')) {
279
+ // citation: StatuteCitation
280
+ }
281
+
282
+ // Exhaustiveness check in switch statements
283
+ switch (citation.type) {
284
+ case 'case': /* ... */ break
285
+ case 'statute': /* ... */ break
286
+ // ... all 9 types ...
287
+ default: assertUnreachable(citation.type)
288
+ }
289
+ ```
290
+
291
+ ### Resolved Citation Types
292
+
293
+ `ResolvedCitation` uses a conditional type — `resolution` is only meaningfully present on short-form citations:
294
+
295
+ ```typescript
296
+ import type { ResolvedCitation } from 'eyecite-ts'
297
+
298
+ // On short-form citations: resolution: ResolutionResult | undefined
299
+ // On full citations: resolution?: undefined
300
+ ```
301
+
302
+ ## Bundle Size
303
+
304
+ Three entry points for optimal tree-shaking:
305
+
306
+ | Entry Point | Import | Gzipped |
307
+ |------------|--------|---------|
308
+ | Core extraction | `eyecite-ts` | 4.4 KB |
309
+ | Annotation | `eyecite-ts/annotate` | 0.5 KB |
310
+ | Reporter data | `eyecite-ts/data` | 88.5 KB (lazy-loaded) |
311
+
312
+ ```typescript
313
+ import { extractCitations } from 'eyecite-ts' // Core only
314
+ import { annotate } from 'eyecite-ts/annotate' // Annotation
315
+ import { loadReporters } from 'eyecite-ts/data' // Reporter database
301
316
  ```
302
317
 
303
318
  ## Architecture
@@ -307,28 +322,26 @@ Citation extraction follows a 4-stage pipeline:
307
322
  1. **Clean**: Remove HTML, normalize Unicode, fix smart quotes
308
323
  2. **Tokenize**: Apply regex patterns to find citation candidates
309
324
  3. **Extract**: Parse metadata (volume, reporter, page, etc.)
310
- 4. **Translate**: Map positions from cleaned text → original text
325
+ 4. **Resolve** (optional): Link short-form citations to antecedents
311
326
 
312
- All positions (spans) track both cleaned and original text offsets.
327
+ All positions (spans) track both cleaned and original text offsets via `TransformationMap`.
313
328
 
314
329
  See [ARCHITECTURE.md](ARCHITECTURE.md) for details.
315
330
 
316
331
  ## Development
317
332
 
318
333
  ```bash
319
- # Install dependencies
320
- npm install
321
-
322
- # Run tests
323
- npm test
324
-
325
- # Type checking
326
- npm run typecheck
327
-
328
- # Build
329
- npm run build
334
+ pnpm install # Install dependencies
335
+ pnpm test # Run tests (vitest, watch mode)
336
+ pnpm exec vitest run # Run tests once
337
+ pnpm typecheck # Type-check with tsc
338
+ pnpm build # Build (ESM + CJS + DTS)
339
+ pnpm lint # Lint with Biome
340
+ pnpm format # Format with Biome
330
341
  ```
331
342
 
343
+ 304 tests, 97% statement coverage, 91% branch coverage.
344
+
332
345
  ## License
333
346
 
334
347
  MIT
@@ -1,2 +1,2 @@
1
- Object.defineProperty(exports,Symbol.toStringTag,{value:`Module`});function e(e,n,r={}){let{useCleanText:i=!1,autoEscape:a=!0,template:o,callback:s}=r,c=[...n].sort((e,t)=>{let n=i?e.span.cleanStart:e.span.originalStart;return(i?t.span.cleanStart:t.span.originalStart)-n}),l=e,u=new Map;for(let n of c){let r=i?n.span.cleanStart:n.span.originalStart,c=i?n.span.cleanEnd:n.span.originalEnd,d=``;if(s)d=s(n,e.substring(Math.max(0,r-30),Math.min(e.length,c+30)));else if(o){let e=l.substring(r,c),n=a?t(e):e;d=o.before+n+o.after}else continue;l=l.slice(0,r)+d+l.slice(c),u.set(r,r)}return{text:l,positionMap:u,skipped:[]}}function t(e){let t={"&":`&amp;`,"<":`&lt;`,">":`&gt;`,'"':`&quot;`,"'":`&#39;`,"/":`&#x2F;`};return e.replace(/[&<>"'\/]/g,e=>t[e])}exports.annotate=e;
1
+ Object.defineProperty(exports,Symbol.toStringTag,{value:`Module`});function e(e,t,i={}){let{useCleanText:a=!1,autoEscape:o=!0,template:s,callback:c}=i,l=[...t].sort((e,t)=>{let n=a?e.span.cleanStart:e.span.originalStart;return(a?t.span.cleanStart:t.span.originalStart)-n}),u=e,d=new Map,f=[];for(let t of l){let i=a?t.span.cleanStart:t.span.originalStart,l=a?t.span.cleanEnd:t.span.originalEnd;if(!a){let e=n(u,i,l);if(e===null){f.push(t);continue}i=e.start,l=e.end}let p=``;if(c)p=c(t,e.substring(Math.max(0,i-30),Math.min(e.length,l+30)));else if(s){let e=u.substring(i,l),t=o?r(e):e;p=s.before+t+s.after}else continue;u=u.slice(0,i)+p+u.slice(l),d.set(i,i)}return{text:u,positionMap:d,skipped:f}}function t(e,t){let n=t-1;for(;n>=0;){if(e[n]===`>`)return null;if(e[n]===`<`){let r=t;for(;r<e.length;){if(e[r]===`>`)return{tagStart:n,tagEnd:r+1};r++}return{tagStart:n,tagEnd:e.length}}n--}return null}function n(e,n,r){let i=n,a=r,o=t(e,n);o&&(i=o.tagStart);let s=t(e,r);return s&&(a=s.tagEnd),i>=a?null:{start:i,end:a}}function r(e){let t={"&":`&amp;`,"<":`&lt;`,">":`&gt;`,'"':`&quot;`,"'":`&#39;`,"/":`&#x2F;`};return e.replace(/[&<>"'/]/g,e=>t[e])}exports.annotate=e;
2
2
  //# sourceMappingURL=index.cjs.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.cjs","names":[],"sources":["../../src/annotate/annotate.ts"],"sourcesContent":["import type { Citation } from '../types/citation'\nimport type { AnnotationOptions, AnnotationResult } from './types'\n\n/**\n * Annotate citations in text with custom markup.\n *\n * Supports two modes:\n * - **Template mode**: Simple before/after wrapping (set `options.template`)\n * - **Callback mode**: Custom logic with full citation context (set `options.callback`)\n *\n * Citations are processed in reverse order to avoid position shifts invalidating\n * subsequent annotations. Position tracking maps original positions to new positions\n * after markup insertion.\n *\n * @param text - Original or cleaned text to annotate\n * @param citations - Citations to mark up (from extraction pipeline)\n * @param options - Annotation configuration\n * @returns Annotated text with position mapping\n *\n * @example Template mode\n * ```typescript\n * const result = annotate(text, citations, {\n * template: { before: '<cite>', after: '</cite>' }\n * })\n * // Result: \"See <cite>500 F.2d 123</cite>\"\n * ```\n *\n * @example Callback mode\n * ```typescript\n * const result = annotate(text, citations, {\n * callback: (citation) => {\n * if (citation.type === 'case') {\n * return `<a href=\"/cases/${citation.volume}\">${citation.matchedText}</a>`\n * }\n * return citation.matchedText\n * }\n * })\n * ```\n *\n * @example Position tracking\n * ```typescript\n * const result = annotate(text, citations, { template: { before: '<mark>', after: '</mark>' } })\n * // result.positionMap tracks how positions shifted\n * const originalPos = 10\n * const newPos = result.positionMap.get(originalPos)\n * ```\n */\nexport function annotate(\n text: string,\n citations: Citation[],\n options: AnnotationOptions = {}\n): AnnotationResult {\n const {\n useCleanText = false,\n autoEscape = true, // Secure by default\n template,\n callback,\n } = options\n\n // Sort reverse to avoid position shifts invalidating subsequent annotations\n const sorted = [...citations].sort((a, b) => {\n const aPos = useCleanText ? a.span.cleanStart : a.span.originalStart\n const bPos = useCleanText ? b.span.cleanStart : b.span.originalStart\n return bPos - aPos // Reverse for backward iteration\n })\n\n let result = text\n const positionMap = new Map<number, number>()\n\n for (const citation of sorted) {\n const start = useCleanText ? citation.span.cleanStart : citation.span.originalStart\n const end = useCleanText ? citation.span.cleanEnd : citation.span.originalEnd\n\n let markup = ''\n\n if (callback) {\n // Callback mode: developer provides full logic\n const surrounding = text.substring(\n Math.max(0, start - 30),\n Math.min(text.length, end + 30)\n )\n markup = callback(citation, surrounding)\n } else if (template) {\n // Template mode: simple before/after wrapping\n const citationText = result.substring(start, end)\n const escaped = autoEscape ? escapeHtmlEntities(citationText) : citationText\n markup = template.before + escaped + template.after\n } else {\n // No annotation specified\n continue\n }\n\n // Insert annotation (working backwards preserves positions for later citations)\n result = result.slice(0, start) + markup + result.slice(end)\n\n // Track original position to new position (before this annotation was added)\n positionMap.set(start, start)\n }\n\n return { text: result, positionMap, skipped: [] }\n}\n\n/**\n * Escape HTML entities to prevent XSS injection.\n *\n * Converts special HTML characters to their entity equivalents:\n * - `&` → `&amp;`\n * - `<` → `&lt;`\n * - `>` → `&gt;`\n * - `\"` → `&quot;`\n * - `'` → `&#39;`\n * - `/` → `&#x2F;`\n *\n * @param text - Text to escape\n * @returns Escaped text safe for HTML insertion\n */\nfunction escapeHtmlEntities(text: string): string {\n const map: Record<string, string> = {\n '&': '&amp;',\n '<': '&lt;',\n '>': '&gt;',\n '\"': '&quot;',\n \"'\": '&#39;',\n '/': '&#x2F;',\n }\n return text.replace(/[&<>\"'\\/]/g, (char) => map[char])\n}\n"],"mappings":"mEA+CA,SAAgB,EACd,EACA,EACA,EAA6B,EAAE,CACb,CAClB,GAAM,CACJ,eAAe,GACf,aAAa,GACb,WACA,YACE,EAGE,EAAS,CAAC,GAAG,EAAU,CAAC,MAAM,EAAG,IAAM,CAC3C,IAAM,EAAO,EAAe,EAAE,KAAK,WAAa,EAAE,KAAK,cAEvD,OADa,EAAe,EAAE,KAAK,WAAa,EAAE,KAAK,eACzC,GACd,CAEE,EAAS,EACP,EAAc,IAAI,IAExB,IAAK,IAAM,KAAY,EAAQ,CAC7B,IAAM,EAAQ,EAAe,EAAS,KAAK,WAAa,EAAS,KAAK,cAChE,EAAM,EAAe,EAAS,KAAK,SAAW,EAAS,KAAK,YAE9D,EAAS,GAEb,GAAI,EAMF,EAAS,EAAS,EAJE,EAAK,UACvB,KAAK,IAAI,EAAG,EAAQ,GAAG,CACvB,KAAK,IAAI,EAAK,OAAQ,EAAM,GAAG,CAChC,CACuC,SAC/B,EAAU,CAEnB,IAAM,EAAe,EAAO,UAAU,EAAO,EAAI,CAC3C,EAAU,EAAa,EAAmB,EAAa,CAAG,EAChE,EAAS,EAAS,OAAS,EAAU,EAAS,WAG9C,SAIF,EAAS,EAAO,MAAM,EAAG,EAAM,CAAG,EAAS,EAAO,MAAM,EAAI,CAG5D,EAAY,IAAI,EAAO,EAAM,CAG/B,MAAO,CAAE,KAAM,EAAQ,cAAa,QAAS,EAAE,CAAE,CAiBnD,SAAS,EAAmB,EAAsB,CAChD,IAAM,EAA8B,CAClC,IAAK,QACL,IAAK,OACL,IAAK,OACL,IAAK,SACL,IAAK,QACL,IAAK,SACN,CACD,OAAO,EAAK,QAAQ,aAAe,GAAS,EAAI,GAAM"}
1
+ {"version":3,"file":"index.cjs","names":[],"sources":["../../src/annotate/annotate.ts"],"sourcesContent":["import type { Citation } from '../types/citation'\nimport type { AnnotationOptions, AnnotationResult } from './types'\n\n/**\n * Annotate citations in text with custom markup.\n *\n * Supports two modes:\n * - **Template mode**: Simple before/after wrapping (set `options.template`)\n * - **Callback mode**: Custom logic with full citation context (set `options.callback`)\n *\n * Citations are processed in reverse order to avoid position shifts invalidating\n * subsequent annotations. Position tracking maps original positions to new positions\n * after markup insertion.\n *\n * @param text - Original or cleaned text to annotate\n * @param citations - Citations to mark up (from extraction pipeline)\n * @param options - Annotation configuration\n * @returns Annotated text with position mapping\n *\n * @example Template mode\n * ```typescript\n * const result = annotate(text, citations, {\n * template: { before: '<cite>', after: '</cite>' }\n * })\n * // Result: \"See <cite>500 F.2d 123</cite>\"\n * ```\n *\n * @example Callback mode\n * ```typescript\n * const result = annotate(text, citations, {\n * callback: (citation) => {\n * if (citation.type === 'case') {\n * return `<a href=\"/cases/${citation.volume}\">${citation.matchedText}</a>`\n * }\n * return citation.matchedText\n * }\n * })\n * ```\n *\n * @example Position tracking\n * ```typescript\n * const result = annotate(text, citations, { template: { before: '<mark>', after: '</mark>' } })\n * // result.positionMap tracks how positions shifted\n * const originalPos = 10\n * const newPos = result.positionMap.get(originalPos)\n * ```\n */\nexport function annotate<C extends Citation = Citation>(\n text: string,\n citations: C[],\n options: AnnotationOptions<C> = {}\n): AnnotationResult {\n const {\n useCleanText = false,\n autoEscape = true, // Secure by default\n template,\n callback,\n } = options\n\n // Sort reverse to avoid position shifts invalidating subsequent annotations\n const sorted = [...citations].sort((a, b) => {\n const aPos = useCleanText ? a.span.cleanStart : a.span.originalStart\n const bPos = useCleanText ? b.span.cleanStart : b.span.originalStart\n return bPos - aPos // Reverse for backward iteration\n })\n\n let result = text\n const positionMap = new Map<number, number>()\n const skipped: Citation[] = []\n\n for (const citation of sorted) {\n let start = useCleanText ? citation.span.cleanStart : citation.span.originalStart\n let end = useCleanText ? citation.span.cleanEnd : citation.span.originalEnd\n\n // Snap positions out of HTML tags when annotating original text\n if (!useCleanText) {\n const snapped = snapOutOfHtmlTags(result, start, end)\n if (snapped === null) {\n // Could not safely snap — skip this citation\n skipped.push(citation)\n continue\n }\n start = snapped.start\n end = snapped.end\n }\n\n let markup = ''\n\n if (callback) {\n // Callback mode: developer provides full logic\n const surrounding = text.substring(\n Math.max(0, start - 30),\n Math.min(text.length, end + 30)\n )\n markup = callback(citation, surrounding)\n } else if (template) {\n // Template mode: simple before/after wrapping\n const citationText = result.substring(start, end)\n const escaped = autoEscape ? escapeHtmlEntities(citationText) : citationText\n markup = template.before + escaped + template.after\n } else {\n // No annotation specified\n continue\n }\n\n // Insert annotation (working backwards preserves positions for later citations)\n result = result.slice(0, start) + markup + result.slice(end)\n\n // Track original position to new position (before this annotation was added)\n positionMap.set(start, start)\n }\n\n return { text: result, positionMap, skipped }\n}\n\n/**\n * Check if a position falls inside an HTML tag (between `<` and `>`).\n * Returns the index of the opening `<` if inside a tag, otherwise -1.\n */\nfunction findContainingTag(text: string, pos: number): { tagStart: number; tagEnd: number } | null {\n // Search backwards from pos for '<' without encountering '>' first\n let i = pos - 1\n while (i >= 0) {\n if (text[i] === '>') return null // Hit a tag close — we're outside\n if (text[i] === '<') {\n // Found opening '<' — now find the closing '>'\n let j = pos\n while (j < text.length) {\n if (text[j] === '>') return { tagStart: i, tagEnd: j + 1 }\n j++\n }\n // Unclosed tag — treat as inside\n return { tagStart: i, tagEnd: text.length }\n }\n i--\n }\n return null\n}\n\n/**\n * Snap annotation start/end positions to avoid landing inside HTML tags.\n *\n * If a position falls inside an HTML tag, it is moved:\n * - Start position: snapped to before the tag's `<`\n * - End position: snapped to after the tag's `>`\n *\n * Returns null if the positions can't be safely adjusted (e.g., entirely\n * within a single tag).\n */\nfunction snapOutOfHtmlTags(\n text: string,\n start: number,\n end: number,\n): { start: number; end: number } | null {\n let snappedStart = start\n let snappedEnd = end\n\n const startTag = findContainingTag(text, start)\n if (startTag) {\n snappedStart = startTag.tagStart\n }\n\n const endTag = findContainingTag(text, end)\n if (endTag) {\n snappedEnd = endTag.tagEnd\n }\n\n // Sanity check: start must come before end\n if (snappedStart >= snappedEnd) return null\n\n return { start: snappedStart, end: snappedEnd }\n}\n\n/**\n * Escape HTML entities to prevent XSS injection.\n *\n * Converts special HTML characters to their entity equivalents:\n * - `&` → `&amp;`\n * - `<` → `&lt;`\n * - `>` → `&gt;`\n * - `\"` → `&quot;`\n * - `'` → `&#39;`\n * - `/` → `&#x2F;`\n *\n * @param text - Text to escape\n * @returns Escaped text safe for HTML insertion\n */\nfunction escapeHtmlEntities(text: string): string {\n const map: Record<string, string> = {\n '&': '&amp;',\n '<': '&lt;',\n '>': '&gt;',\n '\"': '&quot;',\n \"'\": '&#39;',\n '/': '&#x2F;',\n }\n return text.replace(/[&<>\"'/]/g, (char) => map[char])\n}\n"],"mappings":"mEA+CA,SAAgB,EACd,EACA,EACA,EAAgC,EAAE,CAChB,CAClB,GAAM,CACJ,eAAe,GACf,aAAa,GACb,WACA,YACE,EAGE,EAAS,CAAC,GAAG,EAAU,CAAC,MAAM,EAAG,IAAM,CAC3C,IAAM,EAAO,EAAe,EAAE,KAAK,WAAa,EAAE,KAAK,cAEvD,OADa,EAAe,EAAE,KAAK,WAAa,EAAE,KAAK,eACzC,GACd,CAEE,EAAS,EACP,EAAc,IAAI,IAClB,EAAsB,EAAE,CAE9B,IAAK,IAAM,KAAY,EAAQ,CAC7B,IAAI,EAAQ,EAAe,EAAS,KAAK,WAAa,EAAS,KAAK,cAChE,EAAM,EAAe,EAAS,KAAK,SAAW,EAAS,KAAK,YAGhE,GAAI,CAAC,EAAc,CACjB,IAAM,EAAU,EAAkB,EAAQ,EAAO,EAAI,CACrD,GAAI,IAAY,KAAM,CAEpB,EAAQ,KAAK,EAAS,CACtB,SAEF,EAAQ,EAAQ,MAChB,EAAM,EAAQ,IAGhB,IAAI,EAAS,GAEb,GAAI,EAMF,EAAS,EAAS,EAJE,EAAK,UACvB,KAAK,IAAI,EAAG,EAAQ,GAAG,CACvB,KAAK,IAAI,EAAK,OAAQ,EAAM,GAAG,CAChC,CACuC,SAC/B,EAAU,CAEnB,IAAM,EAAe,EAAO,UAAU,EAAO,EAAI,CAC3C,EAAU,EAAa,EAAmB,EAAa,CAAG,EAChE,EAAS,EAAS,OAAS,EAAU,EAAS,WAG9C,SAIF,EAAS,EAAO,MAAM,EAAG,EAAM,CAAG,EAAS,EAAO,MAAM,EAAI,CAG5D,EAAY,IAAI,EAAO,EAAM,CAG/B,MAAO,CAAE,KAAM,EAAQ,cAAa,UAAS,CAO/C,SAAS,EAAkB,EAAc,EAA0D,CAEjG,IAAI,EAAI,EAAM,EACd,KAAO,GAAK,GAAG,CACb,GAAI,EAAK,KAAO,IAAK,OAAO,KAC5B,GAAI,EAAK,KAAO,IAAK,CAEnB,IAAI,EAAI,EACR,KAAO,EAAI,EAAK,QAAQ,CACtB,GAAI,EAAK,KAAO,IAAK,MAAO,CAAE,SAAU,EAAG,OAAQ,EAAI,EAAG,CAC1D,IAGF,MAAO,CAAE,SAAU,EAAG,OAAQ,EAAK,OAAQ,CAE7C,IAEF,OAAO,KAaT,SAAS,EACP,EACA,EACA,EACuC,CACvC,IAAI,EAAe,EACf,EAAa,EAEX,EAAW,EAAkB,EAAM,EAAM,CAC3C,IACF,EAAe,EAAS,UAG1B,IAAM,EAAS,EAAkB,EAAM,EAAI,CAQ3C,OAPI,IACF,EAAa,EAAO,QAIlB,GAAgB,EAAmB,KAEhC,CAAE,MAAO,EAAc,IAAK,EAAY,CAiBjD,SAAS,EAAmB,EAAsB,CAChD,IAAM,EAA8B,CAClC,IAAK,QACL,IAAK,OACL,IAAK,OACL,IAAK,SACL,IAAK,QACL,IAAK,SACN,CACD,OAAO,EAAK,QAAQ,YAAc,GAAS,EAAI,GAAM"}
@@ -1,4 +1,4 @@
1
- import { t as Citation } from "../citation-BcY5zzWb.cjs";
1
+ import { t as Citation } from "../citation-qKSc_Myj.cjs";
2
2
 
3
3
  //#region src/annotate/types.d.ts
4
4
  /**
@@ -27,7 +27,7 @@ import { t as Citation } from "../citation-BcY5zzWb.cjs";
27
27
  * })
28
28
  * ```
29
29
  */
30
- interface AnnotationOptions {
30
+ interface AnnotationOptions<C extends Citation = Citation> {
31
31
  /**
32
32
  * Apply annotations to cleaned text (true) or original text (false).
33
33
  *
@@ -65,7 +65,7 @@ interface AnnotationOptions {
65
65
  * @param surrounding - Text around the citation (for context-aware markup)
66
66
  * @returns Complete markup string (replaces citation.matchedText)
67
67
  */
68
- callback?: (citation: Citation, surrounding: string) => string;
68
+ callback?: (citation: C, surrounding: string) => string;
69
69
  /**
70
70
  * Template mode: simple before/after markup strings.
71
71
  *
@@ -157,7 +157,7 @@ interface AnnotationResult {
157
157
  * const newPos = result.positionMap.get(originalPos)
158
158
  * ```
159
159
  */
160
- declare function annotate(text: string, citations: Citation[], options?: AnnotationOptions): AnnotationResult;
160
+ declare function annotate<C extends Citation = Citation>(text: string, citations: C[], options?: AnnotationOptions<C>): AnnotationResult;
161
161
  //#endregion
162
162
  export { AnnotationOptions, AnnotationResult, annotate };
163
163
  //# sourceMappingURL=index.d.cts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.cts","names":[],"sources":["../../src/annotate/types.ts","../../src/annotate/annotate.ts"],"mappings":";;;;;AA4BA;;;;;;;;;;;;;;;AAoEA;;;;;;;;;UApEiB,iBAAA;EA0FN;;;;ACvEX;;;;EDVE,YAAA;;;;;;;;;;;;;;;;;;EAmBA,UAAA;;;;;;;;;;;EAYA,QAAA,IAAY,QAAA,EAAU,QAAA,EAAU,WAAA;;;;;;;;;;;;;;;;EAiBhC,QAAA;+CAEE,MAAA;IAEA,KAAA;EAAA;AAAA;;;;UAOa,gBAAA;;;;EAIf,IAAA;;;;;;;;;EAUA,WAAA,EAAa,GAAA;;;;;;;EAQb,OAAA,EAAS,QAAA;AAAA;;;;AA1FX;;;;;;;;;;;;;;;AAoEA;;;;;;;;;;;;;;ACjDA;;;;;;;;;;;;;;iBAAgB,QAAA,CACd,IAAA,UACA,SAAA,EAAW,QAAA,IACX,OAAA,GAAS,iBAAA,GACR,gBAAA"}
1
+ {"version":3,"file":"index.d.cts","names":[],"sources":["../../src/annotate/types.ts","../../src/annotate/annotate.ts"],"mappings":";;;;;AA4BA;;;;;;;;;;;;;;;;;;;;;;AAoEA;;UApEiB,iBAAA,WAA4B,QAAA,GAAW,QAAA;EA0F7C;;;;;;;;EAjFT,YAAA;;;ACUF;;;;;;;;;;;;;;;EDSE,UAAA;;;;;;;;;;;EAYA,QAAA,IAAY,QAAA,EAAU,CAAA,EAAG,WAAA;;;;;;;;;;;;;;;;EAiBzB,QAAA;+CAEE,MAAA;IAEA,KAAA;EAAA;AAAA;;;;UAOa,gBAAA;;;;EAIf,IAAA;;;;;;;;;EAUA,WAAA,EAAa,GAAA;;;;;;;EAQb,OAAA,EAAS,QAAA;AAAA;;;;AA1FX;;;;;;;;;;;;;;;;;;;;;;AAoEA;;;;;;;;;;;;;;ACjDA;;;;;;;iBAAgB,QAAA,WAAmB,QAAA,GAAW,QAAA,CAAA,CAC5C,IAAA,UACA,SAAA,EAAW,CAAA,IACX,OAAA,GAAS,iBAAA,CAAkB,CAAA,IAC1B,gBAAA"}
@@ -1,4 +1,4 @@
1
- import { t as Citation } from "../citation-8_GvfEuj.mjs";
1
+ import { t as Citation } from "../citation-DAyM8kNA.mjs";
2
2
 
3
3
  //#region src/annotate/types.d.ts
4
4
  /**
@@ -27,7 +27,7 @@ import { t as Citation } from "../citation-8_GvfEuj.mjs";
27
27
  * })
28
28
  * ```
29
29
  */
30
- interface AnnotationOptions {
30
+ interface AnnotationOptions<C extends Citation = Citation> {
31
31
  /**
32
32
  * Apply annotations to cleaned text (true) or original text (false).
33
33
  *
@@ -65,7 +65,7 @@ interface AnnotationOptions {
65
65
  * @param surrounding - Text around the citation (for context-aware markup)
66
66
  * @returns Complete markup string (replaces citation.matchedText)
67
67
  */
68
- callback?: (citation: Citation, surrounding: string) => string;
68
+ callback?: (citation: C, surrounding: string) => string;
69
69
  /**
70
70
  * Template mode: simple before/after markup strings.
71
71
  *
@@ -157,7 +157,7 @@ interface AnnotationResult {
157
157
  * const newPos = result.positionMap.get(originalPos)
158
158
  * ```
159
159
  */
160
- declare function annotate(text: string, citations: Citation[], options?: AnnotationOptions): AnnotationResult;
160
+ declare function annotate<C extends Citation = Citation>(text: string, citations: C[], options?: AnnotationOptions<C>): AnnotationResult;
161
161
  //#endregion
162
162
  export { AnnotationOptions, AnnotationResult, annotate };
163
163
  //# sourceMappingURL=index.d.mts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.mts","names":[],"sources":["../../src/annotate/types.ts","../../src/annotate/annotate.ts"],"mappings":";;;;;AA4BA;;;;;;;;;;;;;;;AAoEA;;;;;;;;;UApEiB,iBAAA;EA0FN;;;;ACvEX;;;;EDVE,YAAA;;;;;;;;;;;;;;;;;;EAmBA,UAAA;;;;;;;;;;;EAYA,QAAA,IAAY,QAAA,EAAU,QAAA,EAAU,WAAA;;;;;;;;;;;;;;;;EAiBhC,QAAA;+CAEE,MAAA;IAEA,KAAA;EAAA;AAAA;;;;UAOa,gBAAA;;;;EAIf,IAAA;;;;;;;;;EAUA,WAAA,EAAa,GAAA;;;;;;;EAQb,OAAA,EAAS,QAAA;AAAA;;;;AA1FX;;;;;;;;;;;;;;;AAoEA;;;;;;;;;;;;;;ACjDA;;;;;;;;;;;;;;iBAAgB,QAAA,CACd,IAAA,UACA,SAAA,EAAW,QAAA,IACX,OAAA,GAAS,iBAAA,GACR,gBAAA"}
1
+ {"version":3,"file":"index.d.mts","names":[],"sources":["../../src/annotate/types.ts","../../src/annotate/annotate.ts"],"mappings":";;;;;AA4BA;;;;;;;;;;;;;;;;;;;;;;AAoEA;;UApEiB,iBAAA,WAA4B,QAAA,GAAW,QAAA;EA0F7C;;;;;;;;EAjFT,YAAA;;;ACUF;;;;;;;;;;;;;;;EDSE,UAAA;;;;;;;;;;;EAYA,QAAA,IAAY,QAAA,EAAU,CAAA,EAAG,WAAA;;;;;;;;;;;;;;;;EAiBzB,QAAA;+CAEE,MAAA;IAEA,KAAA;EAAA;AAAA;;;;UAOa,gBAAA;;;;EAIf,IAAA;;;;;;;;;EAUA,WAAA,EAAa,GAAA;;;;;;;EAQb,OAAA,EAAS,QAAA;AAAA;;;;AA1FX;;;;;;;;;;;;;;;;;;;;;;AAoEA;;;;;;;;;;;;;;ACjDA;;;;;;;iBAAgB,QAAA,WAAmB,QAAA,GAAW,QAAA,CAAA,CAC5C,IAAA,UACA,SAAA,EAAW,CAAA,IACX,OAAA,GAAS,iBAAA,CAAkB,CAAA,IAC1B,gBAAA"}
@@ -1,2 +1,2 @@
1
- function e(e,n,r={}){let{useCleanText:i=!1,autoEscape:a=!0,template:o,callback:s}=r,c=[...n].sort((e,t)=>{let n=i?e.span.cleanStart:e.span.originalStart;return(i?t.span.cleanStart:t.span.originalStart)-n}),l=e,u=new Map;for(let n of c){let r=i?n.span.cleanStart:n.span.originalStart,c=i?n.span.cleanEnd:n.span.originalEnd,d=``;if(s)d=s(n,e.substring(Math.max(0,r-30),Math.min(e.length,c+30)));else if(o){let e=l.substring(r,c),n=a?t(e):e;d=o.before+n+o.after}else continue;l=l.slice(0,r)+d+l.slice(c),u.set(r,r)}return{text:l,positionMap:u,skipped:[]}}function t(e){let t={"&":`&amp;`,"<":`&lt;`,">":`&gt;`,'"':`&quot;`,"'":`&#39;`,"/":`&#x2F;`};return e.replace(/[&<>"'\/]/g,e=>t[e])}export{e as annotate};
1
+ function e(e,t,i={}){let{useCleanText:a=!1,autoEscape:o=!0,template:s,callback:c}=i,l=[...t].sort((e,t)=>{let n=a?e.span.cleanStart:e.span.originalStart;return(a?t.span.cleanStart:t.span.originalStart)-n}),u=e,d=new Map,f=[];for(let t of l){let i=a?t.span.cleanStart:t.span.originalStart,l=a?t.span.cleanEnd:t.span.originalEnd;if(!a){let e=n(u,i,l);if(e===null){f.push(t);continue}i=e.start,l=e.end}let p=``;if(c)p=c(t,e.substring(Math.max(0,i-30),Math.min(e.length,l+30)));else if(s){let e=u.substring(i,l),t=o?r(e):e;p=s.before+t+s.after}else continue;u=u.slice(0,i)+p+u.slice(l),d.set(i,i)}return{text:u,positionMap:d,skipped:f}}function t(e,t){let n=t-1;for(;n>=0;){if(e[n]===`>`)return null;if(e[n]===`<`){let r=t;for(;r<e.length;){if(e[r]===`>`)return{tagStart:n,tagEnd:r+1};r++}return{tagStart:n,tagEnd:e.length}}n--}return null}function n(e,n,r){let i=n,a=r,o=t(e,n);o&&(i=o.tagStart);let s=t(e,r);return s&&(a=s.tagEnd),i>=a?null:{start:i,end:a}}function r(e){let t={"&":`&amp;`,"<":`&lt;`,">":`&gt;`,'"':`&quot;`,"'":`&#39;`,"/":`&#x2F;`};return e.replace(/[&<>"'/]/g,e=>t[e])}export{e as annotate};
2
2
  //# sourceMappingURL=index.mjs.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.mjs","names":[],"sources":["../../src/annotate/annotate.ts"],"sourcesContent":["import type { Citation } from '../types/citation'\nimport type { AnnotationOptions, AnnotationResult } from './types'\n\n/**\n * Annotate citations in text with custom markup.\n *\n * Supports two modes:\n * - **Template mode**: Simple before/after wrapping (set `options.template`)\n * - **Callback mode**: Custom logic with full citation context (set `options.callback`)\n *\n * Citations are processed in reverse order to avoid position shifts invalidating\n * subsequent annotations. Position tracking maps original positions to new positions\n * after markup insertion.\n *\n * @param text - Original or cleaned text to annotate\n * @param citations - Citations to mark up (from extraction pipeline)\n * @param options - Annotation configuration\n * @returns Annotated text with position mapping\n *\n * @example Template mode\n * ```typescript\n * const result = annotate(text, citations, {\n * template: { before: '<cite>', after: '</cite>' }\n * })\n * // Result: \"See <cite>500 F.2d 123</cite>\"\n * ```\n *\n * @example Callback mode\n * ```typescript\n * const result = annotate(text, citations, {\n * callback: (citation) => {\n * if (citation.type === 'case') {\n * return `<a href=\"/cases/${citation.volume}\">${citation.matchedText}</a>`\n * }\n * return citation.matchedText\n * }\n * })\n * ```\n *\n * @example Position tracking\n * ```typescript\n * const result = annotate(text, citations, { template: { before: '<mark>', after: '</mark>' } })\n * // result.positionMap tracks how positions shifted\n * const originalPos = 10\n * const newPos = result.positionMap.get(originalPos)\n * ```\n */\nexport function annotate(\n text: string,\n citations: Citation[],\n options: AnnotationOptions = {}\n): AnnotationResult {\n const {\n useCleanText = false,\n autoEscape = true, // Secure by default\n template,\n callback,\n } = options\n\n // Sort reverse to avoid position shifts invalidating subsequent annotations\n const sorted = [...citations].sort((a, b) => {\n const aPos = useCleanText ? a.span.cleanStart : a.span.originalStart\n const bPos = useCleanText ? b.span.cleanStart : b.span.originalStart\n return bPos - aPos // Reverse for backward iteration\n })\n\n let result = text\n const positionMap = new Map<number, number>()\n\n for (const citation of sorted) {\n const start = useCleanText ? citation.span.cleanStart : citation.span.originalStart\n const end = useCleanText ? citation.span.cleanEnd : citation.span.originalEnd\n\n let markup = ''\n\n if (callback) {\n // Callback mode: developer provides full logic\n const surrounding = text.substring(\n Math.max(0, start - 30),\n Math.min(text.length, end + 30)\n )\n markup = callback(citation, surrounding)\n } else if (template) {\n // Template mode: simple before/after wrapping\n const citationText = result.substring(start, end)\n const escaped = autoEscape ? escapeHtmlEntities(citationText) : citationText\n markup = template.before + escaped + template.after\n } else {\n // No annotation specified\n continue\n }\n\n // Insert annotation (working backwards preserves positions for later citations)\n result = result.slice(0, start) + markup + result.slice(end)\n\n // Track original position to new position (before this annotation was added)\n positionMap.set(start, start)\n }\n\n return { text: result, positionMap, skipped: [] }\n}\n\n/**\n * Escape HTML entities to prevent XSS injection.\n *\n * Converts special HTML characters to their entity equivalents:\n * - `&` → `&amp;`\n * - `<` → `&lt;`\n * - `>` → `&gt;`\n * - `\"` → `&quot;`\n * - `'` → `&#39;`\n * - `/` → `&#x2F;`\n *\n * @param text - Text to escape\n * @returns Escaped text safe for HTML insertion\n */\nfunction escapeHtmlEntities(text: string): string {\n const map: Record<string, string> = {\n '&': '&amp;',\n '<': '&lt;',\n '>': '&gt;',\n '\"': '&quot;',\n \"'\": '&#39;',\n '/': '&#x2F;',\n }\n return text.replace(/[&<>\"'\\/]/g, (char) => map[char])\n}\n"],"mappings":"AA+CA,SAAgB,EACd,EACA,EACA,EAA6B,EAAE,CACb,CAClB,GAAM,CACJ,eAAe,GACf,aAAa,GACb,WACA,YACE,EAGE,EAAS,CAAC,GAAG,EAAU,CAAC,MAAM,EAAG,IAAM,CAC3C,IAAM,EAAO,EAAe,EAAE,KAAK,WAAa,EAAE,KAAK,cAEvD,OADa,EAAe,EAAE,KAAK,WAAa,EAAE,KAAK,eACzC,GACd,CAEE,EAAS,EACP,EAAc,IAAI,IAExB,IAAK,IAAM,KAAY,EAAQ,CAC7B,IAAM,EAAQ,EAAe,EAAS,KAAK,WAAa,EAAS,KAAK,cAChE,EAAM,EAAe,EAAS,KAAK,SAAW,EAAS,KAAK,YAE9D,EAAS,GAEb,GAAI,EAMF,EAAS,EAAS,EAJE,EAAK,UACvB,KAAK,IAAI,EAAG,EAAQ,GAAG,CACvB,KAAK,IAAI,EAAK,OAAQ,EAAM,GAAG,CAChC,CACuC,SAC/B,EAAU,CAEnB,IAAM,EAAe,EAAO,UAAU,EAAO,EAAI,CAC3C,EAAU,EAAa,EAAmB,EAAa,CAAG,EAChE,EAAS,EAAS,OAAS,EAAU,EAAS,WAG9C,SAIF,EAAS,EAAO,MAAM,EAAG,EAAM,CAAG,EAAS,EAAO,MAAM,EAAI,CAG5D,EAAY,IAAI,EAAO,EAAM,CAG/B,MAAO,CAAE,KAAM,EAAQ,cAAa,QAAS,EAAE,CAAE,CAiBnD,SAAS,EAAmB,EAAsB,CAChD,IAAM,EAA8B,CAClC,IAAK,QACL,IAAK,OACL,IAAK,OACL,IAAK,SACL,IAAK,QACL,IAAK,SACN,CACD,OAAO,EAAK,QAAQ,aAAe,GAAS,EAAI,GAAM"}
1
+ {"version":3,"file":"index.mjs","names":[],"sources":["../../src/annotate/annotate.ts"],"sourcesContent":["import type { Citation } from '../types/citation'\nimport type { AnnotationOptions, AnnotationResult } from './types'\n\n/**\n * Annotate citations in text with custom markup.\n *\n * Supports two modes:\n * - **Template mode**: Simple before/after wrapping (set `options.template`)\n * - **Callback mode**: Custom logic with full citation context (set `options.callback`)\n *\n * Citations are processed in reverse order to avoid position shifts invalidating\n * subsequent annotations. Position tracking maps original positions to new positions\n * after markup insertion.\n *\n * @param text - Original or cleaned text to annotate\n * @param citations - Citations to mark up (from extraction pipeline)\n * @param options - Annotation configuration\n * @returns Annotated text with position mapping\n *\n * @example Template mode\n * ```typescript\n * const result = annotate(text, citations, {\n * template: { before: '<cite>', after: '</cite>' }\n * })\n * // Result: \"See <cite>500 F.2d 123</cite>\"\n * ```\n *\n * @example Callback mode\n * ```typescript\n * const result = annotate(text, citations, {\n * callback: (citation) => {\n * if (citation.type === 'case') {\n * return `<a href=\"/cases/${citation.volume}\">${citation.matchedText}</a>`\n * }\n * return citation.matchedText\n * }\n * })\n * ```\n *\n * @example Position tracking\n * ```typescript\n * const result = annotate(text, citations, { template: { before: '<mark>', after: '</mark>' } })\n * // result.positionMap tracks how positions shifted\n * const originalPos = 10\n * const newPos = result.positionMap.get(originalPos)\n * ```\n */\nexport function annotate<C extends Citation = Citation>(\n text: string,\n citations: C[],\n options: AnnotationOptions<C> = {}\n): AnnotationResult {\n const {\n useCleanText = false,\n autoEscape = true, // Secure by default\n template,\n callback,\n } = options\n\n // Sort reverse to avoid position shifts invalidating subsequent annotations\n const sorted = [...citations].sort((a, b) => {\n const aPos = useCleanText ? a.span.cleanStart : a.span.originalStart\n const bPos = useCleanText ? b.span.cleanStart : b.span.originalStart\n return bPos - aPos // Reverse for backward iteration\n })\n\n let result = text\n const positionMap = new Map<number, number>()\n const skipped: Citation[] = []\n\n for (const citation of sorted) {\n let start = useCleanText ? citation.span.cleanStart : citation.span.originalStart\n let end = useCleanText ? citation.span.cleanEnd : citation.span.originalEnd\n\n // Snap positions out of HTML tags when annotating original text\n if (!useCleanText) {\n const snapped = snapOutOfHtmlTags(result, start, end)\n if (snapped === null) {\n // Could not safely snap — skip this citation\n skipped.push(citation)\n continue\n }\n start = snapped.start\n end = snapped.end\n }\n\n let markup = ''\n\n if (callback) {\n // Callback mode: developer provides full logic\n const surrounding = text.substring(\n Math.max(0, start - 30),\n Math.min(text.length, end + 30)\n )\n markup = callback(citation, surrounding)\n } else if (template) {\n // Template mode: simple before/after wrapping\n const citationText = result.substring(start, end)\n const escaped = autoEscape ? escapeHtmlEntities(citationText) : citationText\n markup = template.before + escaped + template.after\n } else {\n // No annotation specified\n continue\n }\n\n // Insert annotation (working backwards preserves positions for later citations)\n result = result.slice(0, start) + markup + result.slice(end)\n\n // Track original position to new position (before this annotation was added)\n positionMap.set(start, start)\n }\n\n return { text: result, positionMap, skipped }\n}\n\n/**\n * Check if a position falls inside an HTML tag (between `<` and `>`).\n * Returns the index of the opening `<` if inside a tag, otherwise -1.\n */\nfunction findContainingTag(text: string, pos: number): { tagStart: number; tagEnd: number } | null {\n // Search backwards from pos for '<' without encountering '>' first\n let i = pos - 1\n while (i >= 0) {\n if (text[i] === '>') return null // Hit a tag close — we're outside\n if (text[i] === '<') {\n // Found opening '<' — now find the closing '>'\n let j = pos\n while (j < text.length) {\n if (text[j] === '>') return { tagStart: i, tagEnd: j + 1 }\n j++\n }\n // Unclosed tag — treat as inside\n return { tagStart: i, tagEnd: text.length }\n }\n i--\n }\n return null\n}\n\n/**\n * Snap annotation start/end positions to avoid landing inside HTML tags.\n *\n * If a position falls inside an HTML tag, it is moved:\n * - Start position: snapped to before the tag's `<`\n * - End position: snapped to after the tag's `>`\n *\n * Returns null if the positions can't be safely adjusted (e.g., entirely\n * within a single tag).\n */\nfunction snapOutOfHtmlTags(\n text: string,\n start: number,\n end: number,\n): { start: number; end: number } | null {\n let snappedStart = start\n let snappedEnd = end\n\n const startTag = findContainingTag(text, start)\n if (startTag) {\n snappedStart = startTag.tagStart\n }\n\n const endTag = findContainingTag(text, end)\n if (endTag) {\n snappedEnd = endTag.tagEnd\n }\n\n // Sanity check: start must come before end\n if (snappedStart >= snappedEnd) return null\n\n return { start: snappedStart, end: snappedEnd }\n}\n\n/**\n * Escape HTML entities to prevent XSS injection.\n *\n * Converts special HTML characters to their entity equivalents:\n * - `&` → `&amp;`\n * - `<` → `&lt;`\n * - `>` → `&gt;`\n * - `\"` → `&quot;`\n * - `'` → `&#39;`\n * - `/` → `&#x2F;`\n *\n * @param text - Text to escape\n * @returns Escaped text safe for HTML insertion\n */\nfunction escapeHtmlEntities(text: string): string {\n const map: Record<string, string> = {\n '&': '&amp;',\n '<': '&lt;',\n '>': '&gt;',\n '\"': '&quot;',\n \"'\": '&#39;',\n '/': '&#x2F;',\n }\n return text.replace(/[&<>\"'/]/g, (char) => map[char])\n}\n"],"mappings":"AA+CA,SAAgB,EACd,EACA,EACA,EAAgC,EAAE,CAChB,CAClB,GAAM,CACJ,eAAe,GACf,aAAa,GACb,WACA,YACE,EAGE,EAAS,CAAC,GAAG,EAAU,CAAC,MAAM,EAAG,IAAM,CAC3C,IAAM,EAAO,EAAe,EAAE,KAAK,WAAa,EAAE,KAAK,cAEvD,OADa,EAAe,EAAE,KAAK,WAAa,EAAE,KAAK,eACzC,GACd,CAEE,EAAS,EACP,EAAc,IAAI,IAClB,EAAsB,EAAE,CAE9B,IAAK,IAAM,KAAY,EAAQ,CAC7B,IAAI,EAAQ,EAAe,EAAS,KAAK,WAAa,EAAS,KAAK,cAChE,EAAM,EAAe,EAAS,KAAK,SAAW,EAAS,KAAK,YAGhE,GAAI,CAAC,EAAc,CACjB,IAAM,EAAU,EAAkB,EAAQ,EAAO,EAAI,CACrD,GAAI,IAAY,KAAM,CAEpB,EAAQ,KAAK,EAAS,CACtB,SAEF,EAAQ,EAAQ,MAChB,EAAM,EAAQ,IAGhB,IAAI,EAAS,GAEb,GAAI,EAMF,EAAS,EAAS,EAJE,EAAK,UACvB,KAAK,IAAI,EAAG,EAAQ,GAAG,CACvB,KAAK,IAAI,EAAK,OAAQ,EAAM,GAAG,CAChC,CACuC,SAC/B,EAAU,CAEnB,IAAM,EAAe,EAAO,UAAU,EAAO,EAAI,CAC3C,EAAU,EAAa,EAAmB,EAAa,CAAG,EAChE,EAAS,EAAS,OAAS,EAAU,EAAS,WAG9C,SAIF,EAAS,EAAO,MAAM,EAAG,EAAM,CAAG,EAAS,EAAO,MAAM,EAAI,CAG5D,EAAY,IAAI,EAAO,EAAM,CAG/B,MAAO,CAAE,KAAM,EAAQ,cAAa,UAAS,CAO/C,SAAS,EAAkB,EAAc,EAA0D,CAEjG,IAAI,EAAI,EAAM,EACd,KAAO,GAAK,GAAG,CACb,GAAI,EAAK,KAAO,IAAK,OAAO,KAC5B,GAAI,EAAK,KAAO,IAAK,CAEnB,IAAI,EAAI,EACR,KAAO,EAAI,EAAK,QAAQ,CACtB,GAAI,EAAK,KAAO,IAAK,MAAO,CAAE,SAAU,EAAG,OAAQ,EAAI,EAAG,CAC1D,IAGF,MAAO,CAAE,SAAU,EAAG,OAAQ,EAAK,OAAQ,CAE7C,IAEF,OAAO,KAaT,SAAS,EACP,EACA,EACA,EACuC,CACvC,IAAI,EAAe,EACf,EAAa,EAEX,EAAW,EAAkB,EAAM,EAAM,CAC3C,IACF,EAAe,EAAS,UAG1B,IAAM,EAAS,EAAkB,EAAM,EAAI,CAQ3C,OAPI,IACF,EAAa,EAAO,QAIlB,GAAgB,EAAmB,KAEhC,CAAE,MAAO,EAAc,IAAK,EAAY,CAiBjD,SAAS,EAAmB,EAAsB,CAChD,IAAM,EAA8B,CAClC,IAAK,QACL,IAAK,OACL,IAAK,OACL,IAAK,SACL,IAAK,QACL,IAAK,SACN,CACD,OAAO,EAAK,QAAQ,YAAc,GAAS,EAAI,GAAM"}