eyecite-ts 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +338 -0
- package/dist/annotate/index.cjs +2 -0
- package/dist/annotate/index.cjs.map +1 -0
- package/dist/annotate/index.d.cts +163 -0
- package/dist/annotate/index.d.cts.map +1 -0
- package/dist/annotate/index.d.mts +163 -0
- package/dist/annotate/index.d.mts.map +1 -0
- package/dist/annotate/index.mjs +2 -0
- package/dist/annotate/index.mjs.map +1 -0
- package/dist/citation-8_GvfEuj.d.mts +286 -0
- package/dist/citation-8_GvfEuj.d.mts.map +1 -0
- package/dist/citation-BcY5zzWb.d.cts +286 -0
- package/dist/citation-BcY5zzWb.d.cts.map +1 -0
- package/dist/data/index.cjs +2 -0
- package/dist/data/index.cjs.map +1 -0
- package/dist/data/index.d.cts +116 -0
- package/dist/data/index.d.cts.map +1 -0
- package/dist/data/index.d.mts +116 -0
- package/dist/data/index.d.mts.map +1 -0
- package/dist/data/index.mjs +2 -0
- package/dist/data/index.mjs.map +1 -0
- package/dist/index.cjs +2 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +682 -0
- package/dist/index.d.cts.map +1 -0
- package/dist/index.d.mts +682 -0
- package/dist/index.d.mts.map +1 -0
- package/dist/index.mjs +2 -0
- package/dist/index.mjs.map +1 -0
- package/dist/reporters-BclWimmk.cjs +2 -0
- package/dist/reporters-BclWimmk.cjs.map +1 -0
- package/dist/reporters-DYNnh4O0.mjs +2 -0
- package/dist/reporters-DYNnh4O0.mjs.map +1 -0
- package/package.json +69 -0
package/README.md
ADDED
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
# eyecite-ts
|
|
2
|
+
|
|
3
|
+
TypeScript legal citation extraction library - port of Python [eyecite](https://github.com/freelawproject/eyecite).
|
|
4
|
+
|
|
5
|
+
Extract, validate, annotate, and resolve legal citations from court opinions and legal documents with zero runtime dependencies and a <50KB bundle size.
|
|
6
|
+
|
|
7
|
+
## Features
|
|
8
|
+
|
|
9
|
+
- **Full citation extraction**: Case citations, statutes, journal articles, neutral citations, public laws, federal register
|
|
10
|
+
- **Short-form resolution**: Id./Ibid., supra, and short-form case citations resolved to their full antecedents
|
|
11
|
+
- **Reporter database**: 1235 reporters with variant matching and confidence scoring
|
|
12
|
+
- **Citation annotation**: HTML/Markdown markup with auto-escape and position tracking
|
|
13
|
+
- **Bundle optimization**: Tree-shakeable exports, lazy-loaded data, separate entry points
|
|
14
|
+
- **TypeScript native**: Discriminated unions, strict types, full IntelliSense
|
|
15
|
+
|
|
16
|
+
## Installation
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
npm install eyecite-ts
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Quick Start
|
|
23
|
+
|
|
24
|
+
```typescript
|
|
25
|
+
import { extractCitations } from 'eyecite-ts'
|
|
26
|
+
|
|
27
|
+
const text = 'See Smith v. Jones, 500 F.2d 123 (9th Cir. 2020)'
|
|
28
|
+
const citations = extractCitations(text)
|
|
29
|
+
|
|
30
|
+
console.log(citations[0])
|
|
31
|
+
// {
|
|
32
|
+
// type: 'case',
|
|
33
|
+
// volume: 500,
|
|
34
|
+
// reporter: 'F.2d',
|
|
35
|
+
// page: 123,
|
|
36
|
+
// court: '9th Cir.',
|
|
37
|
+
// year: 2020,
|
|
38
|
+
// confidence: 0.85,
|
|
39
|
+
// span: { originalStart: 4, originalEnd: 48 }
|
|
40
|
+
// }
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Citation Extraction
|
|
44
|
+
|
|
45
|
+
### Basic Usage
|
|
46
|
+
|
|
47
|
+
```typescript
|
|
48
|
+
import { extractCitations } from 'eyecite-ts'
|
|
49
|
+
|
|
50
|
+
const text = `
|
|
51
|
+
See Smith v. Jones, 500 F.2d 123 (9th Cir. 2020).
|
|
52
|
+
Also 42 U.S.C. § 1983.
|
|
53
|
+
Compare 123 Harv. L. Rev. 456.
|
|
54
|
+
`
|
|
55
|
+
const citations = extractCitations(text)
|
|
56
|
+
|
|
57
|
+
citations.forEach(citation => {
|
|
58
|
+
console.log(citation.type) // 'case', 'statute', 'journal', etc.
|
|
59
|
+
})
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
### Async API
|
|
63
|
+
|
|
64
|
+
```typescript
|
|
65
|
+
import { extractCitationsAsync } from 'eyecite-ts'
|
|
66
|
+
|
|
67
|
+
const citations = await extractCitationsAsync(text)
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
### Custom Patterns
|
|
71
|
+
|
|
72
|
+
```typescript
|
|
73
|
+
import { extractCitations, casePatterns } from 'eyecite-ts'
|
|
74
|
+
|
|
75
|
+
// Extract only case citations
|
|
76
|
+
const citations = extractCitations(text, {
|
|
77
|
+
patterns: casePatterns
|
|
78
|
+
})
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### Custom Cleaners
|
|
82
|
+
|
|
83
|
+
```typescript
|
|
84
|
+
import { extractCitations, stripHtmlTags } from 'eyecite-ts'
|
|
85
|
+
|
|
86
|
+
// Use only HTML stripping, skip Unicode normalization
|
|
87
|
+
const citations = extractCitations(html, {
|
|
88
|
+
cleaners: [stripHtmlTags]
|
|
89
|
+
})
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
## Resolving Short-Form Citations
|
|
93
|
+
|
|
94
|
+
Short-form citations (Id., supra, short-form case) refer to earlier citations in the document. The resolution engine automatically links them to their full antecedents.
|
|
95
|
+
|
|
96
|
+
### Convenience API
|
|
97
|
+
|
|
98
|
+
```typescript
|
|
99
|
+
import { extractCitations } from 'eyecite-ts'
|
|
100
|
+
|
|
101
|
+
const text = `
|
|
102
|
+
Smith v. Jones, 500 F.2d 123 (2020).
|
|
103
|
+
Id. at 125.
|
|
104
|
+
Smith, supra, at 130.
|
|
105
|
+
500 F.2d at 140.
|
|
106
|
+
`
|
|
107
|
+
|
|
108
|
+
// Convenience: extract + resolve in one call
|
|
109
|
+
const citations = extractCitations(text, { resolve: true })
|
|
110
|
+
|
|
111
|
+
// citations[1] is Id. citation
|
|
112
|
+
console.log(citations[1].resolution)
|
|
113
|
+
// {
|
|
114
|
+
// resolvedTo: 0, // Points to Smith v. Jones (index 0)
|
|
115
|
+
// confidence: 1.0,
|
|
116
|
+
// warnings: []
|
|
117
|
+
// }
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
### Power-User API
|
|
121
|
+
|
|
122
|
+
```typescript
|
|
123
|
+
import { extractCitations, resolveCitations } from 'eyecite-ts'
|
|
124
|
+
|
|
125
|
+
// Step 1: Extract citations
|
|
126
|
+
const citations = extractCitations(text)
|
|
127
|
+
|
|
128
|
+
// Step 2: Resolve short-form citations
|
|
129
|
+
const resolved = resolveCitations(citations, text, {
|
|
130
|
+
scopeStrategy: 'paragraph', // Only resolve within paragraphs
|
|
131
|
+
fuzzyPartyMatching: true, // Enable fuzzy supra matching
|
|
132
|
+
partyMatchThreshold: 0.8, // Similarity threshold (0-1)
|
|
133
|
+
reportUnresolved: true // Report failure reasons
|
|
134
|
+
})
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### Resolution Options
|
|
138
|
+
|
|
139
|
+
| Option | Type | Default | Description |
|
|
140
|
+
|--------|------|---------|-------------|
|
|
141
|
+
| `scopeStrategy` | `'paragraph'` \| `'section'` \| `'footnote'` \| `'none'` | `'paragraph'` | How far back to search for antecedents |
|
|
142
|
+
| `autoDetectParagraphs` | `boolean` | `true` | Auto-detect paragraph boundaries from text |
|
|
143
|
+
| `paragraphBoundaryPattern` | `RegExp` | `/\n\n+/` | Pattern to detect paragraphs |
|
|
144
|
+
| `fuzzyPartyMatching` | `boolean` | `true` | Enable fuzzy party name matching for supra |
|
|
145
|
+
| `partyMatchThreshold` | `number` | `0.8` | Similarity threshold (0-1) for fuzzy matching |
|
|
146
|
+
| `allowNestedResolution` | `boolean` | `false` | Allow Id. to resolve to other short-form citations |
|
|
147
|
+
| `reportUnresolved` | `boolean` | `true` | Report failure reasons for unresolved citations |
|
|
148
|
+
|
|
149
|
+
### Resolution Examples
|
|
150
|
+
|
|
151
|
+
**Id. citations:**
|
|
152
|
+
|
|
153
|
+
```typescript
|
|
154
|
+
const text = 'Smith v. Jones, 500 F.2d 123. Id. at 125.'
|
|
155
|
+
const citations = extractCitations(text, { resolve: true })
|
|
156
|
+
|
|
157
|
+
// citations[1].resolution.resolvedTo === 0 (points to Smith v. Jones)
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
**Supra citations:**
|
|
161
|
+
|
|
162
|
+
```typescript
|
|
163
|
+
const text = 'Smith v. Jones, 500 F.2d 123. See also Smith, supra, at 130.'
|
|
164
|
+
const citations = extractCitations(text, { resolve: true })
|
|
165
|
+
|
|
166
|
+
// citations[1].resolution.resolvedTo === 0 (party name matches "Smith")
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
**Short-form case citations:**
|
|
170
|
+
|
|
171
|
+
```typescript
|
|
172
|
+
const text = 'Brown v. Board, 347 U.S. 483 (1954). See 347 U.S. at 495.'
|
|
173
|
+
const citations = extractCitations(text, { resolve: true })
|
|
174
|
+
|
|
175
|
+
// citations[1].resolution.resolvedTo === 0 (volume/reporter matches)
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
### Handling Unresolved Citations
|
|
179
|
+
|
|
180
|
+
```typescript
|
|
181
|
+
const text = 'Id. at 100.' // Orphan Id. with no preceding citation
|
|
182
|
+
|
|
183
|
+
const citations = extractCitations(text, { resolve: true })
|
|
184
|
+
|
|
185
|
+
console.log(citations[0].resolution)
|
|
186
|
+
// {
|
|
187
|
+
// resolvedTo: undefined,
|
|
188
|
+
// failureReason: 'No preceding full citation found',
|
|
189
|
+
// confidence: 0,
|
|
190
|
+
// warnings: []
|
|
191
|
+
// }
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
To suppress unresolved warnings:
|
|
195
|
+
|
|
196
|
+
```typescript
|
|
197
|
+
const citations = extractCitations(text, {
|
|
198
|
+
resolve: true,
|
|
199
|
+
resolutionOptions: {
|
|
200
|
+
reportUnresolved: false // Omits resolution field for unresolved citations
|
|
201
|
+
}
|
|
202
|
+
})
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
## Citation Validation
|
|
206
|
+
|
|
207
|
+
Validate case citations against the reporters database:
|
|
208
|
+
|
|
209
|
+
```typescript
|
|
210
|
+
import { validateCitation } from 'eyecite-ts/data'
|
|
211
|
+
|
|
212
|
+
// Returns citations with adjusted confidence scores
|
|
213
|
+
const validated = await validateCitation(citations)
|
|
214
|
+
|
|
215
|
+
// Confidence adjustments:
|
|
216
|
+
// - +0.2 boost for reporter match
|
|
217
|
+
// - -0.3 penalty for reporter mismatch
|
|
218
|
+
// - -0.1 penalty for ambiguous reporter
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
## Citation Annotation
|
|
222
|
+
|
|
223
|
+
Add HTML/Markdown markup to citations:
|
|
224
|
+
|
|
225
|
+
```typescript
|
|
226
|
+
import { annotate } from 'eyecite-ts/annotate'
|
|
227
|
+
|
|
228
|
+
// Template mode (simple)
|
|
229
|
+
const html = annotate(
|
|
230
|
+
text,
|
|
231
|
+
citations,
|
|
232
|
+
'<a href="{{url}}">{{text}}</a>'
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
// Callback mode (full control)
|
|
236
|
+
const html = annotate(text, citations, (citation, text) => {
|
|
237
|
+
const url = `https://example.com/${citation.volume}/${citation.reporter}/${citation.page}`
|
|
238
|
+
return `<a href="${url}">${text}</a>`
|
|
239
|
+
})
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
Auto-escape is enabled by default for XSS protection:
|
|
243
|
+
|
|
244
|
+
```typescript
|
|
245
|
+
// User input is automatically escaped
|
|
246
|
+
const html = annotate(text, citations, '<a>{{text}}</a>', {
|
|
247
|
+
autoEscape: true // default
|
|
248
|
+
})
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
## Bundle Size
|
|
252
|
+
|
|
253
|
+
Core library is optimized for tree-shaking:
|
|
254
|
+
|
|
255
|
+
- **Core extraction**: 2.5 KB gzipped
|
|
256
|
+
- **Reporter database**: 88.5 KB gzipped (lazy-loaded)
|
|
257
|
+
- **Annotation**: 0.5 KB gzipped
|
|
258
|
+
|
|
259
|
+
Import only what you need:
|
|
260
|
+
|
|
261
|
+
```typescript
|
|
262
|
+
// Tree-shakeable imports
|
|
263
|
+
import { extractCitations } from 'eyecite-ts' // Core only
|
|
264
|
+
import { validateCitation } from 'eyecite-ts/data' // Core + data
|
|
265
|
+
import { annotate } from 'eyecite-ts/annotate' // Core + annotate
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
## Citation Types
|
|
269
|
+
|
|
270
|
+
All citation types are exported with full TypeScript types:
|
|
271
|
+
|
|
272
|
+
```typescript
|
|
273
|
+
import type {
|
|
274
|
+
Citation,
|
|
275
|
+
FullCaseCitation,
|
|
276
|
+
StatuteCitation,
|
|
277
|
+
JournalCitation,
|
|
278
|
+
NeutralCitation,
|
|
279
|
+
PublicLawCitation,
|
|
280
|
+
FederalRegisterCitation,
|
|
281
|
+
IdCitation,
|
|
282
|
+
SupraCitation,
|
|
283
|
+
ShortFormCaseCitation
|
|
284
|
+
} from 'eyecite-ts'
|
|
285
|
+
|
|
286
|
+
// Discriminated union - switch on type
|
|
287
|
+
citations.forEach(citation => {
|
|
288
|
+
switch (citation.type) {
|
|
289
|
+
case 'case':
|
|
290
|
+
console.log(citation.reporter) // FullCaseCitation
|
|
291
|
+
break
|
|
292
|
+
case 'statute':
|
|
293
|
+
console.log(citation.title) // StatuteCitation
|
|
294
|
+
break
|
|
295
|
+
case 'id':
|
|
296
|
+
console.log(citation.pincite) // IdCitation
|
|
297
|
+
break
|
|
298
|
+
// etc.
|
|
299
|
+
}
|
|
300
|
+
})
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
## Architecture
|
|
304
|
+
|
|
305
|
+
Citation extraction follows a 4-stage pipeline:
|
|
306
|
+
|
|
307
|
+
1. **Clean**: Remove HTML, normalize Unicode, fix smart quotes
|
|
308
|
+
2. **Tokenize**: Apply regex patterns to find citation candidates
|
|
309
|
+
3. **Extract**: Parse metadata (volume, reporter, page, etc.)
|
|
310
|
+
4. **Translate**: Map positions from cleaned text → original text
|
|
311
|
+
|
|
312
|
+
All positions (spans) track both cleaned and original text offsets.
|
|
313
|
+
|
|
314
|
+
See [ARCHITECTURE.md](ARCHITECTURE.md) for details.
|
|
315
|
+
|
|
316
|
+
## Development
|
|
317
|
+
|
|
318
|
+
```bash
|
|
319
|
+
# Install dependencies
|
|
320
|
+
npm install
|
|
321
|
+
|
|
322
|
+
# Run tests
|
|
323
|
+
npm test
|
|
324
|
+
|
|
325
|
+
# Type checking
|
|
326
|
+
npm run typecheck
|
|
327
|
+
|
|
328
|
+
# Build
|
|
329
|
+
npm run build
|
|
330
|
+
```
|
|
331
|
+
|
|
332
|
+
## License
|
|
333
|
+
|
|
334
|
+
MIT
|
|
335
|
+
|
|
336
|
+
## Credits
|
|
337
|
+
|
|
338
|
+
Ported from [eyecite](https://github.com/freelawproject/eyecite) (Python) by Free Law Project.
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
Object.defineProperty(exports,Symbol.toStringTag,{value:`Module`});function e(e,n,r={}){let{useCleanText:i=!1,autoEscape:a=!0,template:o,callback:s}=r,c=[...n].sort((e,t)=>{let n=i?e.span.cleanStart:e.span.originalStart;return(i?t.span.cleanStart:t.span.originalStart)-n}),l=e,u=new Map;for(let n of c){let r=i?n.span.cleanStart:n.span.originalStart,c=i?n.span.cleanEnd:n.span.originalEnd,d=``;if(s)d=s(n,e.substring(Math.max(0,r-30),Math.min(e.length,c+30)));else if(o){let e=l.substring(r,c),n=a?t(e):e;d=o.before+n+o.after}else continue;l=l.slice(0,r)+d+l.slice(c),u.set(r,r)}return{text:l,positionMap:u,skipped:[]}}function t(e){let t={"&":`&`,"<":`<`,">":`>`,'"':`"`,"'":`'`,"/":`/`};return e.replace(/[&<>"'\/]/g,e=>t[e])}exports.annotate=e;
|
|
2
|
+
//# sourceMappingURL=index.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.cjs","names":[],"sources":["../../src/annotate/annotate.ts"],"sourcesContent":["import type { Citation } from '../types/citation'\nimport type { AnnotationOptions, AnnotationResult } from './types'\n\n/**\n * Annotate citations in text with custom markup.\n *\n * Supports two modes:\n * - **Template mode**: Simple before/after wrapping (set `options.template`)\n * - **Callback mode**: Custom logic with full citation context (set `options.callback`)\n *\n * Citations are processed in reverse order to avoid position shifts invalidating\n * subsequent annotations. Position tracking maps original positions to new positions\n * after markup insertion.\n *\n * @param text - Original or cleaned text to annotate\n * @param citations - Citations to mark up (from extraction pipeline)\n * @param options - Annotation configuration\n * @returns Annotated text with position mapping\n *\n * @example Template mode\n * ```typescript\n * const result = annotate(text, citations, {\n * template: { before: '<cite>', after: '</cite>' }\n * })\n * // Result: \"See <cite>500 F.2d 123</cite>\"\n * ```\n *\n * @example Callback mode\n * ```typescript\n * const result = annotate(text, citations, {\n * callback: (citation) => {\n * if (citation.type === 'case') {\n * return `<a href=\"/cases/${citation.volume}\">${citation.matchedText}</a>`\n * }\n * return citation.matchedText\n * }\n * })\n * ```\n *\n * @example Position tracking\n * ```typescript\n * const result = annotate(text, citations, { template: { before: '<mark>', after: '</mark>' } })\n * // result.positionMap tracks how positions shifted\n * const originalPos = 10\n * const newPos = result.positionMap.get(originalPos)\n * ```\n */\nexport function annotate(\n text: string,\n citations: Citation[],\n options: AnnotationOptions = {}\n): AnnotationResult {\n const {\n useCleanText = false,\n autoEscape = true, // Secure by default\n template,\n callback,\n } = options\n\n // Sort reverse to avoid position shifts invalidating subsequent annotations\n const sorted = [...citations].sort((a, b) => {\n const aPos = useCleanText ? a.span.cleanStart : a.span.originalStart\n const bPos = useCleanText ? b.span.cleanStart : b.span.originalStart\n return bPos - aPos // Reverse for backward iteration\n })\n\n let result = text\n const positionMap = new Map<number, number>()\n\n for (const citation of sorted) {\n const start = useCleanText ? citation.span.cleanStart : citation.span.originalStart\n const end = useCleanText ? citation.span.cleanEnd : citation.span.originalEnd\n\n let markup = ''\n\n if (callback) {\n // Callback mode: developer provides full logic\n const surrounding = text.substring(\n Math.max(0, start - 30),\n Math.min(text.length, end + 30)\n )\n markup = callback(citation, surrounding)\n } else if (template) {\n // Template mode: simple before/after wrapping\n const citationText = result.substring(start, end)\n const escaped = autoEscape ? escapeHtmlEntities(citationText) : citationText\n markup = template.before + escaped + template.after\n } else {\n // No annotation specified\n continue\n }\n\n // Insert annotation (working backwards preserves positions for later citations)\n result = result.slice(0, start) + markup + result.slice(end)\n\n // Track original position to new position (before this annotation was added)\n positionMap.set(start, start)\n }\n\n return { text: result, positionMap, skipped: [] }\n}\n\n/**\n * Escape HTML entities to prevent XSS injection.\n *\n * Converts special HTML characters to their entity equivalents:\n * - `&` → `&`\n * - `<` → `<`\n * - `>` → `>`\n * - `\"` → `"`\n * - `'` → `'`\n * - `/` → `/`\n *\n * @param text - Text to escape\n * @returns Escaped text safe for HTML insertion\n */\nfunction escapeHtmlEntities(text: string): string {\n const map: Record<string, string> = {\n '&': '&',\n '<': '<',\n '>': '>',\n '\"': '"',\n \"'\": ''',\n '/': '/',\n }\n return text.replace(/[&<>\"'\\/]/g, (char) => map[char])\n}\n"],"mappings":"mEA+CA,SAAgB,EACd,EACA,EACA,EAA6B,EAAE,CACb,CAClB,GAAM,CACJ,eAAe,GACf,aAAa,GACb,WACA,YACE,EAGE,EAAS,CAAC,GAAG,EAAU,CAAC,MAAM,EAAG,IAAM,CAC3C,IAAM,EAAO,EAAe,EAAE,KAAK,WAAa,EAAE,KAAK,cAEvD,OADa,EAAe,EAAE,KAAK,WAAa,EAAE,KAAK,eACzC,GACd,CAEE,EAAS,EACP,EAAc,IAAI,IAExB,IAAK,IAAM,KAAY,EAAQ,CAC7B,IAAM,EAAQ,EAAe,EAAS,KAAK,WAAa,EAAS,KAAK,cAChE,EAAM,EAAe,EAAS,KAAK,SAAW,EAAS,KAAK,YAE9D,EAAS,GAEb,GAAI,EAMF,EAAS,EAAS,EAJE,EAAK,UACvB,KAAK,IAAI,EAAG,EAAQ,GAAG,CACvB,KAAK,IAAI,EAAK,OAAQ,EAAM,GAAG,CAChC,CACuC,SAC/B,EAAU,CAEnB,IAAM,EAAe,EAAO,UAAU,EAAO,EAAI,CAC3C,EAAU,EAAa,EAAmB,EAAa,CAAG,EAChE,EAAS,EAAS,OAAS,EAAU,EAAS,WAG9C,SAIF,EAAS,EAAO,MAAM,EAAG,EAAM,CAAG,EAAS,EAAO,MAAM,EAAI,CAG5D,EAAY,IAAI,EAAO,EAAM,CAG/B,MAAO,CAAE,KAAM,EAAQ,cAAa,QAAS,EAAE,CAAE,CAiBnD,SAAS,EAAmB,EAAsB,CAChD,IAAM,EAA8B,CAClC,IAAK,QACL,IAAK,OACL,IAAK,OACL,IAAK,SACL,IAAK,QACL,IAAK,SACN,CACD,OAAO,EAAK,QAAQ,aAAe,GAAS,EAAI,GAAM"}
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
import { t as Citation } from "../citation-BcY5zzWb.cjs";
|
|
2
|
+
|
|
3
|
+
//#region src/annotate/types.d.ts
|
|
4
|
+
/**
|
|
5
|
+
* Options for annotating citations in text.
|
|
6
|
+
*
|
|
7
|
+
* Supports two modes:
|
|
8
|
+
* - **Template mode**: Simple before/after string wrapping (e.g., `<cite>...</cite>`)
|
|
9
|
+
* - **Callback mode**: Full custom annotation logic with access to citation and surrounding context
|
|
10
|
+
*
|
|
11
|
+
* @example Template mode
|
|
12
|
+
* ```typescript
|
|
13
|
+
* annotate(text, citations, {
|
|
14
|
+
* template: { before: '<mark data-type="case">', after: '</mark>' }
|
|
15
|
+
* })
|
|
16
|
+
* ```
|
|
17
|
+
*
|
|
18
|
+
* @example Callback mode
|
|
19
|
+
* ```typescript
|
|
20
|
+
* annotate(text, citations, {
|
|
21
|
+
* callback: (citation, surrounding) => {
|
|
22
|
+
* if (citation.type === 'case') {
|
|
23
|
+
* return `<a href="/cases/${citation.volume}-${citation.page}">${citation.matchedText}</a>`
|
|
24
|
+
* }
|
|
25
|
+
* return `<span>${citation.matchedText}</span>`
|
|
26
|
+
* }
|
|
27
|
+
* })
|
|
28
|
+
* ```
|
|
29
|
+
*/
|
|
30
|
+
interface AnnotationOptions {
|
|
31
|
+
/**
|
|
32
|
+
* Apply annotations to cleaned text (true) or original text (false).
|
|
33
|
+
*
|
|
34
|
+
* - `true`: Use citation.span.cleanStart/End positions
|
|
35
|
+
* - `false`: Use citation.span.originalStart/End positions
|
|
36
|
+
*
|
|
37
|
+
* @default false
|
|
38
|
+
*/
|
|
39
|
+
useCleanText?: boolean;
|
|
40
|
+
/**
|
|
41
|
+
* Auto-escape HTML entities to prevent XSS injection.
|
|
42
|
+
*
|
|
43
|
+
* When enabled, special HTML characters are escaped:
|
|
44
|
+
* - `<` → `<`
|
|
45
|
+
* - `>` → `>`
|
|
46
|
+
* - `&` → `&`
|
|
47
|
+
* - `"` → `"`
|
|
48
|
+
* - `'` → `'`
|
|
49
|
+
* - `/` → `/`
|
|
50
|
+
*
|
|
51
|
+
* **SECURITY WARNING:** Disabling this option introduces XSS vulnerability
|
|
52
|
+
* if the text contains untrusted user input. Only disable if you are certain
|
|
53
|
+
* the text comes from a trusted source.
|
|
54
|
+
*
|
|
55
|
+
* @default true (secure by default)
|
|
56
|
+
*/
|
|
57
|
+
autoEscape?: boolean;
|
|
58
|
+
/**
|
|
59
|
+
* Callback for custom annotation logic.
|
|
60
|
+
*
|
|
61
|
+
* Receives each citation and surrounding context (±30 characters),
|
|
62
|
+
* returns the complete markup string to replace the citation text.
|
|
63
|
+
*
|
|
64
|
+
* @param citation - The citation to annotate
|
|
65
|
+
* @param surrounding - Text around the citation (for context-aware markup)
|
|
66
|
+
* @returns Complete markup string (replaces citation.matchedText)
|
|
67
|
+
*/
|
|
68
|
+
callback?: (citation: Citation, surrounding: string) => string;
|
|
69
|
+
/**
|
|
70
|
+
* Template mode: simple before/after markup strings.
|
|
71
|
+
*
|
|
72
|
+
* The citation text (with auto-escaping applied if enabled) is wrapped
|
|
73
|
+
* with these strings: `template.before + citationText + template.after`
|
|
74
|
+
*
|
|
75
|
+
* @example
|
|
76
|
+
* ```typescript
|
|
77
|
+
* template: {
|
|
78
|
+
* before: '<cite data-type="case">',
|
|
79
|
+
* after: '</cite>'
|
|
80
|
+
* }
|
|
81
|
+
* // Result: <cite data-type="case">500 F.2d 123</cite>
|
|
82
|
+
* ```
|
|
83
|
+
*/
|
|
84
|
+
template?: {
|
|
85
|
+
/** Markup inserted before citation text */before: string; /** Markup inserted after citation text */
|
|
86
|
+
after: string;
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Result of annotation operation.
|
|
91
|
+
*/
|
|
92
|
+
interface AnnotationResult {
|
|
93
|
+
/**
|
|
94
|
+
* Annotated text with markup inserted at citation positions.
|
|
95
|
+
*/
|
|
96
|
+
text: string;
|
|
97
|
+
/**
|
|
98
|
+
* Position mapping from original positions to annotated positions.
|
|
99
|
+
*
|
|
100
|
+
* Tracks how citation positions shift after markup insertion.
|
|
101
|
+
* Useful for updating external indices (search, highlighting, etc.)
|
|
102
|
+
*
|
|
103
|
+
* Maps: original position → new position after annotation
|
|
104
|
+
*/
|
|
105
|
+
positionMap: Map<number, number>;
|
|
106
|
+
/**
|
|
107
|
+
* Citations that couldn't be annotated.
|
|
108
|
+
*
|
|
109
|
+
* Currently empty (all citations are annotated if callback/template provided).
|
|
110
|
+
* Future versions may skip overlapping citations or invalid positions.
|
|
111
|
+
*/
|
|
112
|
+
skipped: Citation[];
|
|
113
|
+
}
|
|
114
|
+
//#endregion
|
|
115
|
+
//#region src/annotate/annotate.d.ts
|
|
116
|
+
/**
|
|
117
|
+
* Annotate citations in text with custom markup.
|
|
118
|
+
*
|
|
119
|
+
* Supports two modes:
|
|
120
|
+
* - **Template mode**: Simple before/after wrapping (set `options.template`)
|
|
121
|
+
* - **Callback mode**: Custom logic with full citation context (set `options.callback`)
|
|
122
|
+
*
|
|
123
|
+
* Citations are processed in reverse order to avoid position shifts invalidating
|
|
124
|
+
* subsequent annotations. Position tracking maps original positions to new positions
|
|
125
|
+
* after markup insertion.
|
|
126
|
+
*
|
|
127
|
+
* @param text - Original or cleaned text to annotate
|
|
128
|
+
* @param citations - Citations to mark up (from extraction pipeline)
|
|
129
|
+
* @param options - Annotation configuration
|
|
130
|
+
* @returns Annotated text with position mapping
|
|
131
|
+
*
|
|
132
|
+
* @example Template mode
|
|
133
|
+
* ```typescript
|
|
134
|
+
* const result = annotate(text, citations, {
|
|
135
|
+
* template: { before: '<cite>', after: '</cite>' }
|
|
136
|
+
* })
|
|
137
|
+
* // Result: "See <cite>500 F.2d 123</cite>"
|
|
138
|
+
* ```
|
|
139
|
+
*
|
|
140
|
+
* @example Callback mode
|
|
141
|
+
* ```typescript
|
|
142
|
+
* const result = annotate(text, citations, {
|
|
143
|
+
* callback: (citation) => {
|
|
144
|
+
* if (citation.type === 'case') {
|
|
145
|
+
* return `<a href="/cases/${citation.volume}">${citation.matchedText}</a>`
|
|
146
|
+
* }
|
|
147
|
+
* return citation.matchedText
|
|
148
|
+
* }
|
|
149
|
+
* })
|
|
150
|
+
* ```
|
|
151
|
+
*
|
|
152
|
+
* @example Position tracking
|
|
153
|
+
* ```typescript
|
|
154
|
+
* const result = annotate(text, citations, { template: { before: '<mark>', after: '</mark>' } })
|
|
155
|
+
* // result.positionMap tracks how positions shifted
|
|
156
|
+
* const originalPos = 10
|
|
157
|
+
* const newPos = result.positionMap.get(originalPos)
|
|
158
|
+
* ```
|
|
159
|
+
*/
|
|
160
|
+
declare function annotate(text: string, citations: Citation[], options?: AnnotationOptions): AnnotationResult;
|
|
161
|
+
//#endregion
|
|
162
|
+
export { AnnotationOptions, AnnotationResult, annotate };
|
|
163
|
+
//# sourceMappingURL=index.d.cts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.cts","names":[],"sources":["../../src/annotate/types.ts","../../src/annotate/annotate.ts"],"mappings":";;;;;AA4BA;;;;;;;;;;;;;;;AAoEA;;;;;;;;;UApEiB,iBAAA;EA0FN;;;;ACvEX;;;;EDVE,YAAA;;;;;;;;;;;;;;;;;;EAmBA,UAAA;;;;;;;;;;;EAYA,QAAA,IAAY,QAAA,EAAU,QAAA,EAAU,WAAA;;;;;;;;;;;;;;;;EAiBhC,QAAA;+CAEE,MAAA;IAEA,KAAA;EAAA;AAAA;;;;UAOa,gBAAA;;;;EAIf,IAAA;;;;;;;;;EAUA,WAAA,EAAa,GAAA;;;;;;;EAQb,OAAA,EAAS,QAAA;AAAA;;;;AA1FX;;;;;;;;;;;;;;;AAoEA;;;;;;;;;;;;;;ACjDA;;;;;;;;;;;;;;iBAAgB,QAAA,CACd,IAAA,UACA,SAAA,EAAW,QAAA,IACX,OAAA,GAAS,iBAAA,GACR,gBAAA"}
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
import { t as Citation } from "../citation-8_GvfEuj.mjs";
|
|
2
|
+
|
|
3
|
+
//#region src/annotate/types.d.ts
|
|
4
|
+
/**
|
|
5
|
+
* Options for annotating citations in text.
|
|
6
|
+
*
|
|
7
|
+
* Supports two modes:
|
|
8
|
+
* - **Template mode**: Simple before/after string wrapping (e.g., `<cite>...</cite>`)
|
|
9
|
+
* - **Callback mode**: Full custom annotation logic with access to citation and surrounding context
|
|
10
|
+
*
|
|
11
|
+
* @example Template mode
|
|
12
|
+
* ```typescript
|
|
13
|
+
* annotate(text, citations, {
|
|
14
|
+
* template: { before: '<mark data-type="case">', after: '</mark>' }
|
|
15
|
+
* })
|
|
16
|
+
* ```
|
|
17
|
+
*
|
|
18
|
+
* @example Callback mode
|
|
19
|
+
* ```typescript
|
|
20
|
+
* annotate(text, citations, {
|
|
21
|
+
* callback: (citation, surrounding) => {
|
|
22
|
+
* if (citation.type === 'case') {
|
|
23
|
+
* return `<a href="/cases/${citation.volume}-${citation.page}">${citation.matchedText}</a>`
|
|
24
|
+
* }
|
|
25
|
+
* return `<span>${citation.matchedText}</span>`
|
|
26
|
+
* }
|
|
27
|
+
* })
|
|
28
|
+
* ```
|
|
29
|
+
*/
|
|
30
|
+
interface AnnotationOptions {
|
|
31
|
+
/**
|
|
32
|
+
* Apply annotations to cleaned text (true) or original text (false).
|
|
33
|
+
*
|
|
34
|
+
* - `true`: Use citation.span.cleanStart/End positions
|
|
35
|
+
* - `false`: Use citation.span.originalStart/End positions
|
|
36
|
+
*
|
|
37
|
+
* @default false
|
|
38
|
+
*/
|
|
39
|
+
useCleanText?: boolean;
|
|
40
|
+
/**
|
|
41
|
+
* Auto-escape HTML entities to prevent XSS injection.
|
|
42
|
+
*
|
|
43
|
+
* When enabled, special HTML characters are escaped:
|
|
44
|
+
* - `<` → `<`
|
|
45
|
+
* - `>` → `>`
|
|
46
|
+
* - `&` → `&`
|
|
47
|
+
* - `"` → `"`
|
|
48
|
+
* - `'` → `'`
|
|
49
|
+
* - `/` → `/`
|
|
50
|
+
*
|
|
51
|
+
* **SECURITY WARNING:** Disabling this option introduces XSS vulnerability
|
|
52
|
+
* if the text contains untrusted user input. Only disable if you are certain
|
|
53
|
+
* the text comes from a trusted source.
|
|
54
|
+
*
|
|
55
|
+
* @default true (secure by default)
|
|
56
|
+
*/
|
|
57
|
+
autoEscape?: boolean;
|
|
58
|
+
/**
|
|
59
|
+
* Callback for custom annotation logic.
|
|
60
|
+
*
|
|
61
|
+
* Receives each citation and surrounding context (±30 characters),
|
|
62
|
+
* returns the complete markup string to replace the citation text.
|
|
63
|
+
*
|
|
64
|
+
* @param citation - The citation to annotate
|
|
65
|
+
* @param surrounding - Text around the citation (for context-aware markup)
|
|
66
|
+
* @returns Complete markup string (replaces citation.matchedText)
|
|
67
|
+
*/
|
|
68
|
+
callback?: (citation: Citation, surrounding: string) => string;
|
|
69
|
+
/**
|
|
70
|
+
* Template mode: simple before/after markup strings.
|
|
71
|
+
*
|
|
72
|
+
* The citation text (with auto-escaping applied if enabled) is wrapped
|
|
73
|
+
* with these strings: `template.before + citationText + template.after`
|
|
74
|
+
*
|
|
75
|
+
* @example
|
|
76
|
+
* ```typescript
|
|
77
|
+
* template: {
|
|
78
|
+
* before: '<cite data-type="case">',
|
|
79
|
+
* after: '</cite>'
|
|
80
|
+
* }
|
|
81
|
+
* // Result: <cite data-type="case">500 F.2d 123</cite>
|
|
82
|
+
* ```
|
|
83
|
+
*/
|
|
84
|
+
template?: {
|
|
85
|
+
/** Markup inserted before citation text */before: string; /** Markup inserted after citation text */
|
|
86
|
+
after: string;
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Result of annotation operation.
|
|
91
|
+
*/
|
|
92
|
+
interface AnnotationResult {
|
|
93
|
+
/**
|
|
94
|
+
* Annotated text with markup inserted at citation positions.
|
|
95
|
+
*/
|
|
96
|
+
text: string;
|
|
97
|
+
/**
|
|
98
|
+
* Position mapping from original positions to annotated positions.
|
|
99
|
+
*
|
|
100
|
+
* Tracks how citation positions shift after markup insertion.
|
|
101
|
+
* Useful for updating external indices (search, highlighting, etc.)
|
|
102
|
+
*
|
|
103
|
+
* Maps: original position → new position after annotation
|
|
104
|
+
*/
|
|
105
|
+
positionMap: Map<number, number>;
|
|
106
|
+
/**
|
|
107
|
+
* Citations that couldn't be annotated.
|
|
108
|
+
*
|
|
109
|
+
* Currently empty (all citations are annotated if callback/template provided).
|
|
110
|
+
* Future versions may skip overlapping citations or invalid positions.
|
|
111
|
+
*/
|
|
112
|
+
skipped: Citation[];
|
|
113
|
+
}
|
|
114
|
+
//#endregion
|
|
115
|
+
//#region src/annotate/annotate.d.ts
|
|
116
|
+
/**
|
|
117
|
+
* Annotate citations in text with custom markup.
|
|
118
|
+
*
|
|
119
|
+
* Supports two modes:
|
|
120
|
+
* - **Template mode**: Simple before/after wrapping (set `options.template`)
|
|
121
|
+
* - **Callback mode**: Custom logic with full citation context (set `options.callback`)
|
|
122
|
+
*
|
|
123
|
+
* Citations are processed in reverse order to avoid position shifts invalidating
|
|
124
|
+
* subsequent annotations. Position tracking maps original positions to new positions
|
|
125
|
+
* after markup insertion.
|
|
126
|
+
*
|
|
127
|
+
* @param text - Original or cleaned text to annotate
|
|
128
|
+
* @param citations - Citations to mark up (from extraction pipeline)
|
|
129
|
+
* @param options - Annotation configuration
|
|
130
|
+
* @returns Annotated text with position mapping
|
|
131
|
+
*
|
|
132
|
+
* @example Template mode
|
|
133
|
+
* ```typescript
|
|
134
|
+
* const result = annotate(text, citations, {
|
|
135
|
+
* template: { before: '<cite>', after: '</cite>' }
|
|
136
|
+
* })
|
|
137
|
+
* // Result: "See <cite>500 F.2d 123</cite>"
|
|
138
|
+
* ```
|
|
139
|
+
*
|
|
140
|
+
* @example Callback mode
|
|
141
|
+
* ```typescript
|
|
142
|
+
* const result = annotate(text, citations, {
|
|
143
|
+
* callback: (citation) => {
|
|
144
|
+
* if (citation.type === 'case') {
|
|
145
|
+
* return `<a href="/cases/${citation.volume}">${citation.matchedText}</a>`
|
|
146
|
+
* }
|
|
147
|
+
* return citation.matchedText
|
|
148
|
+
* }
|
|
149
|
+
* })
|
|
150
|
+
* ```
|
|
151
|
+
*
|
|
152
|
+
* @example Position tracking
|
|
153
|
+
* ```typescript
|
|
154
|
+
* const result = annotate(text, citations, { template: { before: '<mark>', after: '</mark>' } })
|
|
155
|
+
* // result.positionMap tracks how positions shifted
|
|
156
|
+
* const originalPos = 10
|
|
157
|
+
* const newPos = result.positionMap.get(originalPos)
|
|
158
|
+
* ```
|
|
159
|
+
*/
|
|
160
|
+
declare function annotate(text: string, citations: Citation[], options?: AnnotationOptions): AnnotationResult;
|
|
161
|
+
//#endregion
|
|
162
|
+
export { AnnotationOptions, AnnotationResult, annotate };
|
|
163
|
+
//# sourceMappingURL=index.d.mts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.mts","names":[],"sources":["../../src/annotate/types.ts","../../src/annotate/annotate.ts"],"mappings":";;;;;AA4BA;;;;;;;;;;;;;;;AAoEA;;;;;;;;;UApEiB,iBAAA;EA0FN;;;;ACvEX;;;;EDVE,YAAA;;;;;;;;;;;;;;;;;;EAmBA,UAAA;;;;;;;;;;;EAYA,QAAA,IAAY,QAAA,EAAU,QAAA,EAAU,WAAA;;;;;;;;;;;;;;;;EAiBhC,QAAA;+CAEE,MAAA;IAEA,KAAA;EAAA;AAAA;;;;UAOa,gBAAA;;;;EAIf,IAAA;;;;;;;;;EAUA,WAAA,EAAa,GAAA;;;;;;;EAQb,OAAA,EAAS,QAAA;AAAA;;;;AA1FX;;;;;;;;;;;;;;;AAoEA;;;;;;;;;;;;;;ACjDA;;;;;;;;;;;;;;iBAAgB,QAAA,CACd,IAAA,UACA,SAAA,EAAW,QAAA,IACX,OAAA,GAAS,iBAAA,GACR,gBAAA"}
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
function e(e,n,r={}){let{useCleanText:i=!1,autoEscape:a=!0,template:o,callback:s}=r,c=[...n].sort((e,t)=>{let n=i?e.span.cleanStart:e.span.originalStart;return(i?t.span.cleanStart:t.span.originalStart)-n}),l=e,u=new Map;for(let n of c){let r=i?n.span.cleanStart:n.span.originalStart,c=i?n.span.cleanEnd:n.span.originalEnd,d=``;if(s)d=s(n,e.substring(Math.max(0,r-30),Math.min(e.length,c+30)));else if(o){let e=l.substring(r,c),n=a?t(e):e;d=o.before+n+o.after}else continue;l=l.slice(0,r)+d+l.slice(c),u.set(r,r)}return{text:l,positionMap:u,skipped:[]}}function t(e){let t={"&":`&`,"<":`<`,">":`>`,'"':`"`,"'":`'`,"/":`/`};return e.replace(/[&<>"'\/]/g,e=>t[e])}export{e as annotate};
|
|
2
|
+
//# sourceMappingURL=index.mjs.map
|