eyecite-ts 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +179 -10
- package/dist/annotate/index.cjs +1 -1
- package/dist/annotate/index.cjs.map +1 -1
- package/dist/annotate/index.d.cts +14 -1
- package/dist/annotate/index.d.cts.map +1 -1
- package/dist/annotate/index.d.mts +14 -1
- package/dist/annotate/index.d.mts.map +1 -1
- package/dist/annotate/index.mjs +1 -1
- package/dist/annotate/index.mjs.map +1 -1
- package/dist/{citation-DAyM8kNA.d.mts → citation-25ZydLsu.d.mts} +78 -2
- package/dist/citation-25ZydLsu.d.mts.map +1 -0
- package/dist/{citation-qKSc_Myj.d.cts → citation-Cymq3pJ-.d.cts} +78 -2
- package/dist/citation-Cymq3pJ-.d.cts.map +1 -0
- package/dist/data/index.cjs +1 -1
- package/dist/data/index.cjs.map +1 -1
- package/dist/data/index.d.cts +91 -1
- package/dist/data/index.d.cts.map +1 -1
- package/dist/data/index.d.mts +91 -1
- package/dist/data/index.d.mts.map +1 -1
- package/dist/data/index.mjs +1 -1
- package/dist/data/index.mjs.map +1 -1
- package/dist/index.cjs +1 -1
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +4 -35
- package/dist/index.d.cts.map +1 -1
- package/dist/index.d.mts +4 -35
- package/dist/index.d.mts.map +1 -1
- package/dist/index.mjs +1 -1
- package/dist/index.mjs.map +1 -1
- package/dist/knownCodes-CI-vnoBO.cjs +2 -0
- package/dist/knownCodes-CI-vnoBO.cjs.map +1 -0
- package/dist/knownCodes-MkDSiR1j.mjs +2 -0
- package/dist/knownCodes-MkDSiR1j.mjs.map +1 -0
- package/dist/{reporters-DYNnh4O0.mjs → reporters-CZoC98-L.mjs} +1 -1
- package/dist/reporters-CZoC98-L.mjs.map +1 -0
- package/dist/reporters-Wob0oyD9.cjs +2 -0
- package/dist/reporters-Wob0oyD9.cjs.map +1 -0
- package/package.json +1 -1
- package/dist/citation-DAyM8kNA.d.mts.map +0 -1
- package/dist/citation-qKSc_Myj.d.cts.map +0 -1
- package/dist/reporters-BclWimmk.cjs +0 -2
- package/dist/reporters-BclWimmk.cjs.map +0 -1
- package/dist/reporters-DYNnh4O0.mjs.map +0 -1
package/README.md
CHANGED
|
@@ -9,19 +9,22 @@
|
|
|
9
9
|
[](https://www.typescriptlang.org/)
|
|
10
10
|
[](https://www.npmjs.com/package/eyecite-ts)
|
|
11
11
|
|
|
12
|
-
TypeScript legal citation extraction library —
|
|
12
|
+
TypeScript legal citation extraction library — inspired by and extending Python [eyecite](https://github.com/freelawproject/eyecite).
|
|
13
13
|
|
|
14
14
|
Extract, resolve, and annotate legal citations from court opinions and legal documents with zero runtime dependencies.
|
|
15
15
|
|
|
16
16
|
## Features
|
|
17
17
|
|
|
18
|
-
- **Full citation extraction**: Case citations, statutes, journal articles, neutral citations, public laws, federal register
|
|
18
|
+
- **Full citation extraction**: Case citations, statutes (20 jurisdictions), journal articles, neutral citations, public laws, federal register
|
|
19
|
+
- **Case name & full span**: Backward search extracts case names ("Smith v. Jones", "In re Smith"), `fullSpan` covers case name through closing parenthetical
|
|
20
|
+
- **Parallel citation linking**: Automatic detection and grouping of comma-separated citations sharing a parenthetical (e.g., "410 U.S. 113, 93 S. Ct. 705 (1973)")
|
|
21
|
+
- **Complex parentheticals**: Unified parser handles court+year, full dates (Jan. 15, 2020 / January 15, 2020 / 1/15/2020), disposition (en banc, per curiam), and chained parentheticals
|
|
19
22
|
- **Short-form resolution**: Id./Ibid., supra, and short-form case citations resolved to their full antecedents
|
|
20
23
|
- **Reporter database**: 1,200+ reporters with variant matching and confidence scoring
|
|
21
24
|
- **Citation annotation**: HTML markup with auto-escape XSS protection and position tracking
|
|
22
25
|
- **Bundle optimization**: Tree-shakeable exports, lazy-loaded reporter data, separate entry points
|
|
23
26
|
- **TypeScript native**: Discriminated unions, conditional types, type guards, full IntelliSense
|
|
24
|
-
- **Zero dependencies**: No runtime dependencies,
|
|
27
|
+
- **Zero dependencies**: No runtime dependencies, ~10KB gzipped core bundle
|
|
25
28
|
|
|
26
29
|
## Installation
|
|
27
30
|
|
|
@@ -34,7 +37,7 @@ npm install eyecite-ts
|
|
|
34
37
|
```typescript
|
|
35
38
|
import { extractCitations } from 'eyecite-ts'
|
|
36
39
|
|
|
37
|
-
const text = 'See Smith v. Jones, 500 F.2d 123 (9th Cir. 2020)'
|
|
40
|
+
const text = 'See Smith v. Jones, 500 F.2d 123 (9th Cir. Jan. 15, 2020)'
|
|
38
41
|
const citations = extractCitations(text)
|
|
39
42
|
|
|
40
43
|
console.log(citations[0])
|
|
@@ -45,8 +48,11 @@ console.log(citations[0])
|
|
|
45
48
|
// page: 123,
|
|
46
49
|
// court: '9th Cir.',
|
|
47
50
|
// year: 2020,
|
|
51
|
+
// caseName: 'Smith v. Jones',
|
|
52
|
+
// date: { iso: '2020-01-15', parsed: { year: 2020, month: 1, day: 15 } },
|
|
48
53
|
// confidence: 0.85,
|
|
49
|
-
// span: { originalStart:
|
|
54
|
+
// span: { originalStart: 20, originalEnd: 33, ... },
|
|
55
|
+
// fullSpan: { originalStart: 4, originalEnd: 57, ... }
|
|
50
56
|
// }
|
|
51
57
|
```
|
|
52
58
|
|
|
@@ -69,6 +75,46 @@ citations.forEach(citation => {
|
|
|
69
75
|
})
|
|
70
76
|
```
|
|
71
77
|
|
|
78
|
+
### Statute Citations
|
|
79
|
+
|
|
80
|
+
Extract citations from 20 state and federal jurisdictions with subsection, et seq., and jurisdiction identification:
|
|
81
|
+
|
|
82
|
+
```typescript
|
|
83
|
+
import { extractCitations } from 'eyecite-ts'
|
|
84
|
+
|
|
85
|
+
const text = `
|
|
86
|
+
See 42 U.S.C. § 1983(a)(1) et seq.
|
|
87
|
+
Also Cal. Penal Code § 187.
|
|
88
|
+
And N.Y. Penal Law § 125.25(1)(a).
|
|
89
|
+
Compare 735 ILCS 5/2-1001.
|
|
90
|
+
`
|
|
91
|
+
const citations = extractCitations(text)
|
|
92
|
+
|
|
93
|
+
// Federal with subsections + et seq.
|
|
94
|
+
// { type: 'statute', title: 42, code: 'U.S.C.', section: '1983',
|
|
95
|
+
// subsection: '(a)(1)', jurisdiction: 'US', hasEtSeq: true, confidence: 1.0 }
|
|
96
|
+
|
|
97
|
+
// California named-code
|
|
98
|
+
// { type: 'statute', code: 'Penal', section: '187', jurisdiction: 'CA', confidence: 0.95 }
|
|
99
|
+
|
|
100
|
+
// New York named-code with subsections
|
|
101
|
+
// { type: 'statute', code: 'Penal Law', section: '125.25',
|
|
102
|
+
// subsection: '(1)(a)', jurisdiction: 'NY', confidence: 1.0 }
|
|
103
|
+
|
|
104
|
+
// Illinois chapter-act format
|
|
105
|
+
// { type: 'statute', title: 735, code: '5', section: '2-1001',
|
|
106
|
+
// jurisdiction: 'IL', confidence: 0.95 }
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
**Supported jurisdictions:**
|
|
110
|
+
|
|
111
|
+
| Family | Jurisdictions |
|
|
112
|
+
|--------|--------------|
|
|
113
|
+
| Federal | USC, CFR, prose ("section X of title Y") |
|
|
114
|
+
| Named-code | NY (21 laws), CA (29 codes), TX (29 codes), MD (36 articles), VA, AL, MA |
|
|
115
|
+
| Abbreviated-code | FL, OH, MI, UT, CO, WA, NC, GA, PA, IN, NJ, DE |
|
|
116
|
+
| Chapter-act | IL (ILCS) |
|
|
117
|
+
|
|
72
118
|
### Async API
|
|
73
119
|
|
|
74
120
|
```typescript
|
|
@@ -100,6 +146,97 @@ const citations = extractCitations(html, {
|
|
|
100
146
|
})
|
|
101
147
|
```
|
|
102
148
|
|
|
149
|
+
## Case Names & Full Spans
|
|
150
|
+
|
|
151
|
+
Case citations can include the case name and full citation boundaries:
|
|
152
|
+
|
|
153
|
+
```typescript
|
|
154
|
+
const text = 'In Smith v. Jones, 500 F.2d 123 (9th Cir. 2020) (en banc), the court held...'
|
|
155
|
+
const citations = extractCitations(text)
|
|
156
|
+
|
|
157
|
+
if (citations[0].type === 'case') {
|
|
158
|
+
console.log(citations[0].caseName) // 'Smith v. Jones'
|
|
159
|
+
console.log(citations[0].disposition) // 'en banc'
|
|
160
|
+
console.log(citations[0].fullSpan) // covers "Smith v. Jones, 500 F.2d 123 (9th Cir. 2020) (en banc)"
|
|
161
|
+
console.log(citations[0].span) // covers "500 F.2d 123" only (citation core)
|
|
162
|
+
}
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
Procedural prefixes are recognized automatically:
|
|
166
|
+
|
|
167
|
+
```typescript
|
|
168
|
+
const text = 'In re Smith, 410 U.S. 113 (1973)'
|
|
169
|
+
// caseName: 'In re Smith'
|
|
170
|
+
|
|
171
|
+
const text2 = 'Ex parte Young, 209 U.S. 123 (1908)'
|
|
172
|
+
// caseName: 'Ex parte Young'
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### Structured Dates
|
|
176
|
+
|
|
177
|
+
Parentheticals with full dates return structured date objects:
|
|
178
|
+
|
|
179
|
+
```typescript
|
|
180
|
+
const text = '500 F.3d 100 (2d Cir. Jan. 15, 2020)'
|
|
181
|
+
// date: { iso: '2020-01-15', parsed: { year: 2020, month: 1, day: 15 } }
|
|
182
|
+
|
|
183
|
+
const text2 = '410 U.S. 113 (1973)'
|
|
184
|
+
// date: { iso: '1973', parsed: { year: 1973 } }
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
Three date formats are supported: `Jan. 15, 2020`, `January 15, 2020`, and `1/15/2020`.
|
|
188
|
+
|
|
189
|
+
### Blank Page Citations
|
|
190
|
+
|
|
191
|
+
Citations can reference blank pages using placeholder notation:
|
|
192
|
+
|
|
193
|
+
```typescript
|
|
194
|
+
const text = '500 F.2d ___ (2020)'
|
|
195
|
+
const citations = extractCitations(text)
|
|
196
|
+
|
|
197
|
+
if (citations[0].type === 'case') {
|
|
198
|
+
console.log(citations[0].hasBlankPage) // true
|
|
199
|
+
console.log(citations[0].page) // undefined
|
|
200
|
+
}
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
Both `___` (triple underscore) and `---` (triple dash) are recognized as blank page placeholders. These appear in slip opinions or unpublished decisions where the final reporter page number is not yet available.
|
|
204
|
+
|
|
205
|
+
## Parallel Citations
|
|
206
|
+
|
|
207
|
+
When multiple case citations share the same parenthetical, they represent parallel citations for the same case in different reporters. The library automatically detects and groups them:
|
|
208
|
+
|
|
209
|
+
```typescript
|
|
210
|
+
const text = 'See 410 U.S. 113, 93 S. Ct. 705, 35 L. Ed. 2d 147 (1973).'
|
|
211
|
+
const citations = extractCitations(text)
|
|
212
|
+
|
|
213
|
+
// Returns 3 citations, all linked by groupId
|
|
214
|
+
console.log(citations[0].groupId) // "410-U.S.-113"
|
|
215
|
+
console.log(citations[1].groupId) // "410-U.S.-113" (same group)
|
|
216
|
+
console.log(citations[2].groupId) // "410-U.S.-113" (same group)
|
|
217
|
+
|
|
218
|
+
// Primary citation (first in group) has parallelCitations array
|
|
219
|
+
if (citations[0].type === 'case') {
|
|
220
|
+
console.log(citations[0].parallelCitations)
|
|
221
|
+
// [
|
|
222
|
+
// { volume: 93, reporter: 'S. Ct.', page: 705 },
|
|
223
|
+
// { volume: 35, reporter: 'L. Ed. 2d', page: 147 }
|
|
224
|
+
// ]
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
// Secondary citations don't duplicate the array
|
|
228
|
+
console.log(citations[1].parallelCitations) // undefined
|
|
229
|
+
console.log(citations[2].parallelCitations) // undefined
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
**Key points:**
|
|
233
|
+
- All citations in a parallel group share the same `groupId`
|
|
234
|
+
- Only the **first citation** (primary) has the `parallelCitations` array
|
|
235
|
+
- Secondary citations remain in the results array for individual processing
|
|
236
|
+
- Group ID format: `${volume}-${reporter}-${page}` (e.g., "410-U.S.-113")
|
|
237
|
+
|
|
238
|
+
Use `groupId` to identify which citations refer to the same case, or access `parallelCitations` on the primary to get all reporters at once.
|
|
239
|
+
|
|
103
240
|
## Resolving Short-Form Citations
|
|
104
241
|
|
|
105
242
|
Short-form citations (Id., supra, short-form case) refer to earlier citations in the document. The resolution engine links them to their full antecedents.
|
|
@@ -221,6 +358,38 @@ const result = annotate(text, citations, {
|
|
|
221
358
|
})
|
|
222
359
|
```
|
|
223
360
|
|
|
361
|
+
### Annotating Full Spans
|
|
362
|
+
|
|
363
|
+
By default, annotation wraps only the citation core (volume-reporter-page). Use `useFullSpan` to annotate from the case name through the closing parenthetical:
|
|
364
|
+
|
|
365
|
+
```typescript
|
|
366
|
+
const text = 'In Smith v. Jones, 500 F.2d 123 (9th Cir. 2020) (en banc), the court held...'
|
|
367
|
+
const citations = extractCitations(text)
|
|
368
|
+
|
|
369
|
+
// Default: annotates only "500 F.2d 123"
|
|
370
|
+
const coreOnly = annotate(text, citations, {
|
|
371
|
+
template: { before: '<cite>', after: '</cite>' }
|
|
372
|
+
})
|
|
373
|
+
// Result: "In Smith v. Jones, <cite>500 F.2d 123</cite> (9th Cir. 2020) (en banc), the court held..."
|
|
374
|
+
|
|
375
|
+
// With useFullSpan: annotates "Smith v. Jones, 500 F.2d 123 (9th Cir. 2020) (en banc)"
|
|
376
|
+
const fullSpan = annotate(text, citations, {
|
|
377
|
+
template: { before: '<cite>', after: '</cite>' },
|
|
378
|
+
useFullSpan: true
|
|
379
|
+
})
|
|
380
|
+
// Result: "In <cite>Smith v. Jones, 500 F.2d 123 (9th Cir. 2020) (en banc)</cite>, the court held..."
|
|
381
|
+
```
|
|
382
|
+
|
|
383
|
+
Full span annotation covers:
|
|
384
|
+
- Case name (if present)
|
|
385
|
+
- Volume-reporter-page
|
|
386
|
+
- Court and date parenthetical
|
|
387
|
+
- Disposition parenthetical (en banc, per curiam)
|
|
388
|
+
- Chained parentheticals
|
|
389
|
+
- Subsequent history
|
|
390
|
+
|
|
391
|
+
Use `useFullSpan: true` when you want to highlight the entire citation as a unit, or `useFullSpan: false` (default) to annotate only the citation core for minimal markup.
|
|
392
|
+
|
|
224
393
|
## Reporter Validation
|
|
225
394
|
|
|
226
395
|
Validate case citations against the reporters database:
|
|
@@ -305,9 +474,9 @@ Three entry points for optimal tree-shaking:
|
|
|
305
474
|
|
|
306
475
|
| Entry Point | Import | Gzipped |
|
|
307
476
|
|------------|--------|---------|
|
|
308
|
-
| Core extraction | `eyecite-ts` |
|
|
309
|
-
| Annotation | `eyecite-ts/annotate` | 0.
|
|
310
|
-
| Reporter data | `eyecite-ts/data` |
|
|
477
|
+
| Core extraction | `eyecite-ts` | ~10 KB |
|
|
478
|
+
| Annotation | `eyecite-ts/annotate` | 0.7 KB |
|
|
479
|
+
| Reporter data | `eyecite-ts/data` | 86.5 KB (lazy-loaded) |
|
|
311
480
|
|
|
312
481
|
```typescript
|
|
313
482
|
import { extractCitations } from 'eyecite-ts' // Core only
|
|
@@ -340,7 +509,7 @@ pnpm lint # Lint with Biome
|
|
|
340
509
|
pnpm format # Format with Biome
|
|
341
510
|
```
|
|
342
511
|
|
|
343
|
-
|
|
512
|
+
985+ tests across 32 test files.
|
|
344
513
|
|
|
345
514
|
## License
|
|
346
515
|
|
|
@@ -348,4 +517,4 @@ MIT
|
|
|
348
517
|
|
|
349
518
|
## Credits
|
|
350
519
|
|
|
351
|
-
|
|
520
|
+
Inspired by [eyecite](https://github.com/freelawproject/eyecite) (Python) by Free Law Project. This TypeScript implementation adds parallel citation linking, party name extraction, full span tracking, and performance optimizations while maintaining compatibility with the original API design.
|
package/dist/annotate/index.cjs
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
Object.defineProperty(exports,Symbol.toStringTag,{value:`Module`});function e(e,t,i={}){let{useCleanText:a=!1,autoEscape:o=!0,
|
|
1
|
+
Object.defineProperty(exports,Symbol.toStringTag,{value:`Module`});function e(e,t,i={}){let{useCleanText:a=!1,autoEscape:o=!0,useFullSpan:s=!1,template:c,callback:l}=i,u=[...t].sort((e,t)=>{let n=a?e.span.cleanStart:e.span.originalStart;return(a?t.span.cleanStart:t.span.originalStart)-n}),d=e,f=new Map,p=[];for(let t of u){let i,u;if(s&&`fullSpan`in t&&t.fullSpan?(i=a?t.fullSpan.cleanStart:t.fullSpan.originalStart,u=a?t.fullSpan.cleanEnd:t.fullSpan.originalEnd):(i=a?t.span.cleanStart:t.span.originalStart,u=a?t.span.cleanEnd:t.span.originalEnd),!a){let e=n(d,i,u);if(e===null){p.push(t);continue}i=e.start,u=e.end}let m=``;if(l)m=l(t,e.substring(Math.max(0,i-30),Math.min(e.length,u+30)));else if(c){let e=d.substring(i,u),t=o?r(e):e;m=c.before+t+c.after}else continue;d=d.slice(0,i)+m+d.slice(u),f.set(i,i)}return{text:d,positionMap:f,skipped:p}}function t(e,t){let n=t-1;for(;n>=0;){if(e[n]===`>`)return null;if(e[n]===`<`){let r=t;for(;r<e.length;){if(e[r]===`>`)return{tagStart:n,tagEnd:r+1};r++}return{tagStart:n,tagEnd:e.length}}n--}return null}function n(e,n,r){let i=n,a=r,o=t(e,n);o&&(i=o.tagStart);let s=t(e,r);return s&&(a=s.tagEnd),i>=a?null:{start:i,end:a}}function r(e){let t={"&":`&`,"<":`<`,">":`>`,'"':`"`,"'":`'`,"/":`/`};return e.replace(/[&<>"'/]/g,e=>t[e])}exports.annotate=e;
|
|
2
2
|
//# sourceMappingURL=index.cjs.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.cjs","names":[],"sources":["../../src/annotate/annotate.ts"],"sourcesContent":["import type { Citation } from '../types/citation'\nimport type { AnnotationOptions, AnnotationResult } from './types'\n\n/**\n * Annotate citations in text with custom markup.\n *\n * Supports two modes:\n * - **Template mode**: Simple before/after wrapping (set `options.template`)\n * - **Callback mode**: Custom logic with full citation context (set `options.callback`)\n *\n * Citations are processed in reverse order to avoid position shifts invalidating\n * subsequent annotations. Position tracking maps original positions to new positions\n * after markup insertion.\n *\n * @param text - Original or cleaned text to annotate\n * @param citations - Citations to mark up (from extraction pipeline)\n * @param options - Annotation configuration\n * @returns Annotated text with position mapping\n *\n * @example Template mode\n * ```typescript\n * const result = annotate(text, citations, {\n * template: { before: '<cite>', after: '</cite>' }\n * })\n * // Result: \"See <cite>500 F.2d 123</cite>\"\n * ```\n *\n * @example Callback mode\n * ```typescript\n * const result = annotate(text, citations, {\n * callback: (citation) => {\n * if (citation.type === 'case') {\n * return `<a href=\"/cases/${citation.volume}\">${citation.matchedText}</a>`\n * }\n * return citation.matchedText\n * }\n * })\n * ```\n *\n * @example Position tracking\n * ```typescript\n * const result = annotate(text, citations, { template: { before: '<mark>', after: '</mark>' } })\n * // result.positionMap tracks how positions shifted\n * const originalPos = 10\n * const newPos = result.positionMap.get(originalPos)\n * ```\n */\nexport function annotate<C extends Citation = Citation>(\n text: string,\n citations: C[],\n options: AnnotationOptions<C> = {}\n): AnnotationResult {\n const {\n useCleanText = false,\n autoEscape = true, // Secure by default\n template,\n callback,\n } = options\n\n // Sort reverse to avoid position shifts invalidating subsequent annotations\n const sorted = [...citations].sort((a, b) => {\n const aPos = useCleanText ? a.span.cleanStart : a.span.originalStart\n const bPos = useCleanText ? b.span.cleanStart : b.span.originalStart\n return bPos - aPos // Reverse for backward iteration\n })\n\n let result = text\n const positionMap = new Map<number, number>()\n const skipped: Citation[] = []\n\n for (const citation of sorted) {\n let start = useCleanText ? citation.span.cleanStart : citation.span.originalStart\n
|
|
1
|
+
{"version":3,"file":"index.cjs","names":[],"sources":["../../src/annotate/annotate.ts"],"sourcesContent":["import type { Citation } from '../types/citation'\nimport type { AnnotationOptions, AnnotationResult } from './types'\n\n/**\n * Annotate citations in text with custom markup.\n *\n * Supports two modes:\n * - **Template mode**: Simple before/after wrapping (set `options.template`)\n * - **Callback mode**: Custom logic with full citation context (set `options.callback`)\n *\n * Citations are processed in reverse order to avoid position shifts invalidating\n * subsequent annotations. Position tracking maps original positions to new positions\n * after markup insertion.\n *\n * @param text - Original or cleaned text to annotate\n * @param citations - Citations to mark up (from extraction pipeline)\n * @param options - Annotation configuration\n * @returns Annotated text with position mapping\n *\n * @example Template mode\n * ```typescript\n * const result = annotate(text, citations, {\n * template: { before: '<cite>', after: '</cite>' }\n * })\n * // Result: \"See <cite>500 F.2d 123</cite>\"\n * ```\n *\n * @example Callback mode\n * ```typescript\n * const result = annotate(text, citations, {\n * callback: (citation) => {\n * if (citation.type === 'case') {\n * return `<a href=\"/cases/${citation.volume}\">${citation.matchedText}</a>`\n * }\n * return citation.matchedText\n * }\n * })\n * ```\n *\n * @example Position tracking\n * ```typescript\n * const result = annotate(text, citations, { template: { before: '<mark>', after: '</mark>' } })\n * // result.positionMap tracks how positions shifted\n * const originalPos = 10\n * const newPos = result.positionMap.get(originalPos)\n * ```\n */\nexport function annotate<C extends Citation = Citation>(\n text: string,\n citations: C[],\n options: AnnotationOptions<C> = {}\n): AnnotationResult {\n const {\n useCleanText = false,\n autoEscape = true, // Secure by default\n useFullSpan = false, // Backward compatible default\n template,\n callback,\n } = options\n\n // Sort reverse to avoid position shifts invalidating subsequent annotations\n const sorted = [...citations].sort((a, b) => {\n const aPos = useCleanText ? a.span.cleanStart : a.span.originalStart\n const bPos = useCleanText ? b.span.cleanStart : b.span.originalStart\n return bPos - aPos // Reverse for backward iteration\n })\n\n let result = text\n const positionMap = new Map<number, number>()\n const skipped: Citation[] = []\n\n for (const citation of sorted) {\n // Determine which span to use\n let start: number\n let end: number\n\n if (useFullSpan && 'fullSpan' in citation && citation.fullSpan) {\n // Full span mode: case name through parenthetical\n start = useCleanText ? citation.fullSpan.cleanStart : citation.fullSpan.originalStart\n end = useCleanText ? citation.fullSpan.cleanEnd : citation.fullSpan.originalEnd\n } else {\n // Default mode: core citation only\n start = useCleanText ? citation.span.cleanStart : citation.span.originalStart\n end = useCleanText ? citation.span.cleanEnd : citation.span.originalEnd\n }\n\n // Snap positions out of HTML tags when annotating original text\n if (!useCleanText) {\n const snapped = snapOutOfHtmlTags(result, start, end)\n if (snapped === null) {\n // Could not safely snap — skip this citation\n skipped.push(citation)\n continue\n }\n start = snapped.start\n end = snapped.end\n }\n\n let markup = ''\n\n if (callback) {\n // Callback mode: developer provides full logic\n const surrounding = text.substring(\n Math.max(0, start - 30),\n Math.min(text.length, end + 30)\n )\n markup = callback(citation, surrounding)\n } else if (template) {\n // Template mode: simple before/after wrapping\n const citationText = result.substring(start, end)\n const escaped = autoEscape ? escapeHtmlEntities(citationText) : citationText\n markup = template.before + escaped + template.after\n } else {\n // No annotation specified\n continue\n }\n\n // Insert annotation (working backwards preserves positions for later citations)\n result = result.slice(0, start) + markup + result.slice(end)\n\n // Track original position to new position (before this annotation was added)\n positionMap.set(start, start)\n }\n\n return { text: result, positionMap, skipped }\n}\n\n/**\n * Check if a position falls inside an HTML tag (between `<` and `>`).\n * Returns the index of the opening `<` if inside a tag, otherwise -1.\n */\nfunction findContainingTag(text: string, pos: number): { tagStart: number; tagEnd: number } | null {\n // Search backwards from pos for '<' without encountering '>' first\n let i = pos - 1\n while (i >= 0) {\n if (text[i] === '>') return null // Hit a tag close — we're outside\n if (text[i] === '<') {\n // Found opening '<' — now find the closing '>'\n let j = pos\n while (j < text.length) {\n if (text[j] === '>') return { tagStart: i, tagEnd: j + 1 }\n j++\n }\n // Unclosed tag — treat as inside\n return { tagStart: i, tagEnd: text.length }\n }\n i--\n }\n return null\n}\n\n/**\n * Snap annotation start/end positions to avoid landing inside HTML tags.\n *\n * If a position falls inside an HTML tag, it is moved:\n * - Start position: snapped to before the tag's `<`\n * - End position: snapped to after the tag's `>`\n *\n * Returns null if the positions can't be safely adjusted (e.g., entirely\n * within a single tag).\n */\nfunction snapOutOfHtmlTags(\n text: string,\n start: number,\n end: number,\n): { start: number; end: number } | null {\n let snappedStart = start\n let snappedEnd = end\n\n const startTag = findContainingTag(text, start)\n if (startTag) {\n snappedStart = startTag.tagStart\n }\n\n const endTag = findContainingTag(text, end)\n if (endTag) {\n snappedEnd = endTag.tagEnd\n }\n\n // Sanity check: start must come before end\n if (snappedStart >= snappedEnd) return null\n\n return { start: snappedStart, end: snappedEnd }\n}\n\n/**\n * Escape HTML entities to prevent XSS injection.\n *\n * Converts special HTML characters to their entity equivalents:\n * - `&` → `&`\n * - `<` → `<`\n * - `>` → `>`\n * - `\"` → `"`\n * - `'` → `'`\n * - `/` → `/`\n *\n * @param text - Text to escape\n * @returns Escaped text safe for HTML insertion\n */\nfunction escapeHtmlEntities(text: string): string {\n const map: Record<string, string> = {\n '&': '&',\n '<': '<',\n '>': '>',\n '\"': '"',\n \"'\": ''',\n '/': '/',\n }\n return text.replace(/[&<>\"'/]/g, (char) => map[char])\n}\n"],"mappings":"mEA+CA,SAAgB,EACd,EACA,EACA,EAAgC,EAAE,CAChB,CAClB,GAAM,CACJ,eAAe,GACf,aAAa,GACb,cAAc,GACd,WACA,YACE,EAGE,EAAS,CAAC,GAAG,EAAU,CAAC,MAAM,EAAG,IAAM,CAC3C,IAAM,EAAO,EAAe,EAAE,KAAK,WAAa,EAAE,KAAK,cAEvD,OADa,EAAe,EAAE,KAAK,WAAa,EAAE,KAAK,eACzC,GACd,CAEE,EAAS,EACP,EAAc,IAAI,IAClB,EAAsB,EAAE,CAE9B,IAAK,IAAM,KAAY,EAAQ,CAE7B,IAAI,EACA,EAaJ,GAXI,GAAe,aAAc,GAAY,EAAS,UAEpD,EAAQ,EAAe,EAAS,SAAS,WAAa,EAAS,SAAS,cACxE,EAAM,EAAe,EAAS,SAAS,SAAW,EAAS,SAAS,cAGpE,EAAQ,EAAe,EAAS,KAAK,WAAa,EAAS,KAAK,cAChE,EAAM,EAAe,EAAS,KAAK,SAAW,EAAS,KAAK,aAI1D,CAAC,EAAc,CACjB,IAAM,EAAU,EAAkB,EAAQ,EAAO,EAAI,CACrD,GAAI,IAAY,KAAM,CAEpB,EAAQ,KAAK,EAAS,CACtB,SAEF,EAAQ,EAAQ,MAChB,EAAM,EAAQ,IAGhB,IAAI,EAAS,GAEb,GAAI,EAMF,EAAS,EAAS,EAJE,EAAK,UACvB,KAAK,IAAI,EAAG,EAAQ,GAAG,CACvB,KAAK,IAAI,EAAK,OAAQ,EAAM,GAAG,CAChC,CACuC,SAC/B,EAAU,CAEnB,IAAM,EAAe,EAAO,UAAU,EAAO,EAAI,CAC3C,EAAU,EAAa,EAAmB,EAAa,CAAG,EAChE,EAAS,EAAS,OAAS,EAAU,EAAS,WAG9C,SAIF,EAAS,EAAO,MAAM,EAAG,EAAM,CAAG,EAAS,EAAO,MAAM,EAAI,CAG5D,EAAY,IAAI,EAAO,EAAM,CAG/B,MAAO,CAAE,KAAM,EAAQ,cAAa,UAAS,CAO/C,SAAS,EAAkB,EAAc,EAA0D,CAEjG,IAAI,EAAI,EAAM,EACd,KAAO,GAAK,GAAG,CACb,GAAI,EAAK,KAAO,IAAK,OAAO,KAC5B,GAAI,EAAK,KAAO,IAAK,CAEnB,IAAI,EAAI,EACR,KAAO,EAAI,EAAK,QAAQ,CACtB,GAAI,EAAK,KAAO,IAAK,MAAO,CAAE,SAAU,EAAG,OAAQ,EAAI,EAAG,CAC1D,IAGF,MAAO,CAAE,SAAU,EAAG,OAAQ,EAAK,OAAQ,CAE7C,IAEF,OAAO,KAaT,SAAS,EACP,EACA,EACA,EACuC,CACvC,IAAI,EAAe,EACf,EAAa,EAEX,EAAW,EAAkB,EAAM,EAAM,CAC3C,IACF,EAAe,EAAS,UAG1B,IAAM,EAAS,EAAkB,EAAM,EAAI,CAQ3C,OAPI,IACF,EAAa,EAAO,QAIlB,GAAgB,EAAmB,KAEhC,CAAE,MAAO,EAAc,IAAK,EAAY,CAiBjD,SAAS,EAAmB,EAAsB,CAChD,IAAM,EAA8B,CAClC,IAAK,QACL,IAAK,OACL,IAAK,OACL,IAAK,SACL,IAAK,QACL,IAAK,SACN,CACD,OAAO,EAAK,QAAQ,YAAc,GAAS,EAAI,GAAM"}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { t as Citation } from "../citation-
|
|
1
|
+
import { t as Citation } from "../citation-Cymq3pJ-.cjs";
|
|
2
2
|
|
|
3
3
|
//#region src/annotate/types.d.ts
|
|
4
4
|
/**
|
|
@@ -56,6 +56,19 @@ interface AnnotationOptions<C extends Citation = Citation> {
|
|
|
56
56
|
*/
|
|
57
57
|
autoEscape?: boolean;
|
|
58
58
|
/**
|
|
59
|
+
* Use full citation span from case name through parenthetical (true) or core citation only (false).
|
|
60
|
+
*
|
|
61
|
+
* When enabled and citation has a fullSpan field (from Phase 6+), annotation will span:
|
|
62
|
+
* - Case name: "Smith v. Jones"
|
|
63
|
+
* - Reporter: "500 F.2d 123"
|
|
64
|
+
* - Parenthetical: "(9th Cir. 1974)"
|
|
65
|
+
*
|
|
66
|
+
* When disabled or fullSpan unavailable, falls back to core citation span (volume-reporter-page).
|
|
67
|
+
*
|
|
68
|
+
* @default false (backward compatible)
|
|
69
|
+
*/
|
|
70
|
+
useFullSpan?: boolean;
|
|
71
|
+
/**
|
|
59
72
|
* Callback for custom annotation logic.
|
|
60
73
|
*
|
|
61
74
|
* Receives each citation and surrounding context (±30 characters),
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.cts","names":[],"sources":["../../src/annotate/types.ts","../../src/annotate/annotate.ts"],"mappings":";;;;;AA4BA
|
|
1
|
+
{"version":3,"file":"index.d.cts","names":[],"sources":["../../src/annotate/types.ts","../../src/annotate/annotate.ts"],"mappings":";;;;;AA4BA;;;;;;;;;;;;;;;;;;;;;;;AAkFA;UAlFiB,iBAAA,WAA4B,QAAA,GAAW,QAAA;;;;;;;;;EAStD,YAAA;;;;ACUF;;;;;;;;;;;;;;EDSE,UAAA;;;;;;;;;;;;;EAcA,WAAA;;;;;;;;;;;EAYA,QAAA,IAAY,QAAA,EAAU,CAAA,EAAG,WAAA;;;;;;;;;;;;;;;;EAiBzB,QAAA;+CAEE,MAAA;IAEA,KAAA;EAAA;AAAA;;;;UAOa,gBAAA;;;;EAIf,IAAA;;;;;;;;;EAUA,WAAA,EAAa,GAAA;;;;;;;EAQb,OAAA,EAAS,QAAA;AAAA;;;;AAxGX;;;;;;;;;;;;;;;;;;;;;;;AAkFA;;;;;;;;;;;;;;AC/DA;;;;;;iBAAgB,QAAA,WAAmB,QAAA,GAAW,QAAA,CAAA,CAC5C,IAAA,UACA,SAAA,EAAW,CAAA,IACX,OAAA,GAAS,iBAAA,CAAkB,CAAA,IAC1B,gBAAA"}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { t as Citation } from "../citation-
|
|
1
|
+
import { t as Citation } from "../citation-25ZydLsu.mjs";
|
|
2
2
|
|
|
3
3
|
//#region src/annotate/types.d.ts
|
|
4
4
|
/**
|
|
@@ -56,6 +56,19 @@ interface AnnotationOptions<C extends Citation = Citation> {
|
|
|
56
56
|
*/
|
|
57
57
|
autoEscape?: boolean;
|
|
58
58
|
/**
|
|
59
|
+
* Use full citation span from case name through parenthetical (true) or core citation only (false).
|
|
60
|
+
*
|
|
61
|
+
* When enabled and citation has a fullSpan field (from Phase 6+), annotation will span:
|
|
62
|
+
* - Case name: "Smith v. Jones"
|
|
63
|
+
* - Reporter: "500 F.2d 123"
|
|
64
|
+
* - Parenthetical: "(9th Cir. 1974)"
|
|
65
|
+
*
|
|
66
|
+
* When disabled or fullSpan unavailable, falls back to core citation span (volume-reporter-page).
|
|
67
|
+
*
|
|
68
|
+
* @default false (backward compatible)
|
|
69
|
+
*/
|
|
70
|
+
useFullSpan?: boolean;
|
|
71
|
+
/**
|
|
59
72
|
* Callback for custom annotation logic.
|
|
60
73
|
*
|
|
61
74
|
* Receives each citation and surrounding context (±30 characters),
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.mts","names":[],"sources":["../../src/annotate/types.ts","../../src/annotate/annotate.ts"],"mappings":";;;;;AA4BA
|
|
1
|
+
{"version":3,"file":"index.d.mts","names":[],"sources":["../../src/annotate/types.ts","../../src/annotate/annotate.ts"],"mappings":";;;;;AA4BA;;;;;;;;;;;;;;;;;;;;;;;AAkFA;UAlFiB,iBAAA,WAA4B,QAAA,GAAW,QAAA;;;;;;;;;EAStD,YAAA;;;;ACUF;;;;;;;;;;;;;;EDSE,UAAA;;;;;;;;;;;;;EAcA,WAAA;;;;;;;;;;;EAYA,QAAA,IAAY,QAAA,EAAU,CAAA,EAAG,WAAA;;;;;;;;;;;;;;;;EAiBzB,QAAA;+CAEE,MAAA;IAEA,KAAA;EAAA;AAAA;;;;UAOa,gBAAA;;;;EAIf,IAAA;;;;;;;;;EAUA,WAAA,EAAa,GAAA;;;;;;;EAQb,OAAA,EAAS,QAAA;AAAA;;;;AAxGX;;;;;;;;;;;;;;;;;;;;;;;AAkFA;;;;;;;;;;;;;;AC/DA;;;;;;iBAAgB,QAAA,WAAmB,QAAA,GAAW,QAAA,CAAA,CAC5C,IAAA,UACA,SAAA,EAAW,CAAA,IACX,OAAA,GAAS,iBAAA,CAAkB,CAAA,IAC1B,gBAAA"}
|
package/dist/annotate/index.mjs
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
function e(e,t,i={}){let{useCleanText:a=!1,autoEscape:o=!0,
|
|
1
|
+
function e(e,t,i={}){let{useCleanText:a=!1,autoEscape:o=!0,useFullSpan:s=!1,template:c,callback:l}=i,u=[...t].sort((e,t)=>{let n=a?e.span.cleanStart:e.span.originalStart;return(a?t.span.cleanStart:t.span.originalStart)-n}),d=e,f=new Map,p=[];for(let t of u){let i,u;if(s&&`fullSpan`in t&&t.fullSpan?(i=a?t.fullSpan.cleanStart:t.fullSpan.originalStart,u=a?t.fullSpan.cleanEnd:t.fullSpan.originalEnd):(i=a?t.span.cleanStart:t.span.originalStart,u=a?t.span.cleanEnd:t.span.originalEnd),!a){let e=n(d,i,u);if(e===null){p.push(t);continue}i=e.start,u=e.end}let m=``;if(l)m=l(t,e.substring(Math.max(0,i-30),Math.min(e.length,u+30)));else if(c){let e=d.substring(i,u),t=o?r(e):e;m=c.before+t+c.after}else continue;d=d.slice(0,i)+m+d.slice(u),f.set(i,i)}return{text:d,positionMap:f,skipped:p}}function t(e,t){let n=t-1;for(;n>=0;){if(e[n]===`>`)return null;if(e[n]===`<`){let r=t;for(;r<e.length;){if(e[r]===`>`)return{tagStart:n,tagEnd:r+1};r++}return{tagStart:n,tagEnd:e.length}}n--}return null}function n(e,n,r){let i=n,a=r,o=t(e,n);o&&(i=o.tagStart);let s=t(e,r);return s&&(a=s.tagEnd),i>=a?null:{start:i,end:a}}function r(e){let t={"&":`&`,"<":`<`,">":`>`,'"':`"`,"'":`'`,"/":`/`};return e.replace(/[&<>"'/]/g,e=>t[e])}export{e as annotate};
|
|
2
2
|
//# sourceMappingURL=index.mjs.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.mjs","names":[],"sources":["../../src/annotate/annotate.ts"],"sourcesContent":["import type { Citation } from '../types/citation'\nimport type { AnnotationOptions, AnnotationResult } from './types'\n\n/**\n * Annotate citations in text with custom markup.\n *\n * Supports two modes:\n * - **Template mode**: Simple before/after wrapping (set `options.template`)\n * - **Callback mode**: Custom logic with full citation context (set `options.callback`)\n *\n * Citations are processed in reverse order to avoid position shifts invalidating\n * subsequent annotations. Position tracking maps original positions to new positions\n * after markup insertion.\n *\n * @param text - Original or cleaned text to annotate\n * @param citations - Citations to mark up (from extraction pipeline)\n * @param options - Annotation configuration\n * @returns Annotated text with position mapping\n *\n * @example Template mode\n * ```typescript\n * const result = annotate(text, citations, {\n * template: { before: '<cite>', after: '</cite>' }\n * })\n * // Result: \"See <cite>500 F.2d 123</cite>\"\n * ```\n *\n * @example Callback mode\n * ```typescript\n * const result = annotate(text, citations, {\n * callback: (citation) => {\n * if (citation.type === 'case') {\n * return `<a href=\"/cases/${citation.volume}\">${citation.matchedText}</a>`\n * }\n * return citation.matchedText\n * }\n * })\n * ```\n *\n * @example Position tracking\n * ```typescript\n * const result = annotate(text, citations, { template: { before: '<mark>', after: '</mark>' } })\n * // result.positionMap tracks how positions shifted\n * const originalPos = 10\n * const newPos = result.positionMap.get(originalPos)\n * ```\n */\nexport function annotate<C extends Citation = Citation>(\n text: string,\n citations: C[],\n options: AnnotationOptions<C> = {}\n): AnnotationResult {\n const {\n useCleanText = false,\n autoEscape = true, // Secure by default\n template,\n callback,\n } = options\n\n // Sort reverse to avoid position shifts invalidating subsequent annotations\n const sorted = [...citations].sort((a, b) => {\n const aPos = useCleanText ? a.span.cleanStart : a.span.originalStart\n const bPos = useCleanText ? b.span.cleanStart : b.span.originalStart\n return bPos - aPos // Reverse for backward iteration\n })\n\n let result = text\n const positionMap = new Map<number, number>()\n const skipped: Citation[] = []\n\n for (const citation of sorted) {\n let start = useCleanText ? citation.span.cleanStart : citation.span.originalStart\n
|
|
1
|
+
{"version":3,"file":"index.mjs","names":[],"sources":["../../src/annotate/annotate.ts"],"sourcesContent":["import type { Citation } from '../types/citation'\nimport type { AnnotationOptions, AnnotationResult } from './types'\n\n/**\n * Annotate citations in text with custom markup.\n *\n * Supports two modes:\n * - **Template mode**: Simple before/after wrapping (set `options.template`)\n * - **Callback mode**: Custom logic with full citation context (set `options.callback`)\n *\n * Citations are processed in reverse order to avoid position shifts invalidating\n * subsequent annotations. Position tracking maps original positions to new positions\n * after markup insertion.\n *\n * @param text - Original or cleaned text to annotate\n * @param citations - Citations to mark up (from extraction pipeline)\n * @param options - Annotation configuration\n * @returns Annotated text with position mapping\n *\n * @example Template mode\n * ```typescript\n * const result = annotate(text, citations, {\n * template: { before: '<cite>', after: '</cite>' }\n * })\n * // Result: \"See <cite>500 F.2d 123</cite>\"\n * ```\n *\n * @example Callback mode\n * ```typescript\n * const result = annotate(text, citations, {\n * callback: (citation) => {\n * if (citation.type === 'case') {\n * return `<a href=\"/cases/${citation.volume}\">${citation.matchedText}</a>`\n * }\n * return citation.matchedText\n * }\n * })\n * ```\n *\n * @example Position tracking\n * ```typescript\n * const result = annotate(text, citations, { template: { before: '<mark>', after: '</mark>' } })\n * // result.positionMap tracks how positions shifted\n * const originalPos = 10\n * const newPos = result.positionMap.get(originalPos)\n * ```\n */\nexport function annotate<C extends Citation = Citation>(\n text: string,\n citations: C[],\n options: AnnotationOptions<C> = {}\n): AnnotationResult {\n const {\n useCleanText = false,\n autoEscape = true, // Secure by default\n useFullSpan = false, // Backward compatible default\n template,\n callback,\n } = options\n\n // Sort reverse to avoid position shifts invalidating subsequent annotations\n const sorted = [...citations].sort((a, b) => {\n const aPos = useCleanText ? a.span.cleanStart : a.span.originalStart\n const bPos = useCleanText ? b.span.cleanStart : b.span.originalStart\n return bPos - aPos // Reverse for backward iteration\n })\n\n let result = text\n const positionMap = new Map<number, number>()\n const skipped: Citation[] = []\n\n for (const citation of sorted) {\n // Determine which span to use\n let start: number\n let end: number\n\n if (useFullSpan && 'fullSpan' in citation && citation.fullSpan) {\n // Full span mode: case name through parenthetical\n start = useCleanText ? citation.fullSpan.cleanStart : citation.fullSpan.originalStart\n end = useCleanText ? citation.fullSpan.cleanEnd : citation.fullSpan.originalEnd\n } else {\n // Default mode: core citation only\n start = useCleanText ? citation.span.cleanStart : citation.span.originalStart\n end = useCleanText ? citation.span.cleanEnd : citation.span.originalEnd\n }\n\n // Snap positions out of HTML tags when annotating original text\n if (!useCleanText) {\n const snapped = snapOutOfHtmlTags(result, start, end)\n if (snapped === null) {\n // Could not safely snap — skip this citation\n skipped.push(citation)\n continue\n }\n start = snapped.start\n end = snapped.end\n }\n\n let markup = ''\n\n if (callback) {\n // Callback mode: developer provides full logic\n const surrounding = text.substring(\n Math.max(0, start - 30),\n Math.min(text.length, end + 30)\n )\n markup = callback(citation, surrounding)\n } else if (template) {\n // Template mode: simple before/after wrapping\n const citationText = result.substring(start, end)\n const escaped = autoEscape ? escapeHtmlEntities(citationText) : citationText\n markup = template.before + escaped + template.after\n } else {\n // No annotation specified\n continue\n }\n\n // Insert annotation (working backwards preserves positions for later citations)\n result = result.slice(0, start) + markup + result.slice(end)\n\n // Track original position to new position (before this annotation was added)\n positionMap.set(start, start)\n }\n\n return { text: result, positionMap, skipped }\n}\n\n/**\n * Check if a position falls inside an HTML tag (between `<` and `>`).\n * Returns the index of the opening `<` if inside a tag, otherwise -1.\n */\nfunction findContainingTag(text: string, pos: number): { tagStart: number; tagEnd: number } | null {\n // Search backwards from pos for '<' without encountering '>' first\n let i = pos - 1\n while (i >= 0) {\n if (text[i] === '>') return null // Hit a tag close — we're outside\n if (text[i] === '<') {\n // Found opening '<' — now find the closing '>'\n let j = pos\n while (j < text.length) {\n if (text[j] === '>') return { tagStart: i, tagEnd: j + 1 }\n j++\n }\n // Unclosed tag — treat as inside\n return { tagStart: i, tagEnd: text.length }\n }\n i--\n }\n return null\n}\n\n/**\n * Snap annotation start/end positions to avoid landing inside HTML tags.\n *\n * If a position falls inside an HTML tag, it is moved:\n * - Start position: snapped to before the tag's `<`\n * - End position: snapped to after the tag's `>`\n *\n * Returns null if the positions can't be safely adjusted (e.g., entirely\n * within a single tag).\n */\nfunction snapOutOfHtmlTags(\n text: string,\n start: number,\n end: number,\n): { start: number; end: number } | null {\n let snappedStart = start\n let snappedEnd = end\n\n const startTag = findContainingTag(text, start)\n if (startTag) {\n snappedStart = startTag.tagStart\n }\n\n const endTag = findContainingTag(text, end)\n if (endTag) {\n snappedEnd = endTag.tagEnd\n }\n\n // Sanity check: start must come before end\n if (snappedStart >= snappedEnd) return null\n\n return { start: snappedStart, end: snappedEnd }\n}\n\n/**\n * Escape HTML entities to prevent XSS injection.\n *\n * Converts special HTML characters to their entity equivalents:\n * - `&` → `&`\n * - `<` → `<`\n * - `>` → `>`\n * - `\"` → `"`\n * - `'` → `'`\n * - `/` → `/`\n *\n * @param text - Text to escape\n * @returns Escaped text safe for HTML insertion\n */\nfunction escapeHtmlEntities(text: string): string {\n const map: Record<string, string> = {\n '&': '&',\n '<': '<',\n '>': '>',\n '\"': '"',\n \"'\": ''',\n '/': '/',\n }\n return text.replace(/[&<>\"'/]/g, (char) => map[char])\n}\n"],"mappings":"AA+CA,SAAgB,EACd,EACA,EACA,EAAgC,EAAE,CAChB,CAClB,GAAM,CACJ,eAAe,GACf,aAAa,GACb,cAAc,GACd,WACA,YACE,EAGE,EAAS,CAAC,GAAG,EAAU,CAAC,MAAM,EAAG,IAAM,CAC3C,IAAM,EAAO,EAAe,EAAE,KAAK,WAAa,EAAE,KAAK,cAEvD,OADa,EAAe,EAAE,KAAK,WAAa,EAAE,KAAK,eACzC,GACd,CAEE,EAAS,EACP,EAAc,IAAI,IAClB,EAAsB,EAAE,CAE9B,IAAK,IAAM,KAAY,EAAQ,CAE7B,IAAI,EACA,EAaJ,GAXI,GAAe,aAAc,GAAY,EAAS,UAEpD,EAAQ,EAAe,EAAS,SAAS,WAAa,EAAS,SAAS,cACxE,EAAM,EAAe,EAAS,SAAS,SAAW,EAAS,SAAS,cAGpE,EAAQ,EAAe,EAAS,KAAK,WAAa,EAAS,KAAK,cAChE,EAAM,EAAe,EAAS,KAAK,SAAW,EAAS,KAAK,aAI1D,CAAC,EAAc,CACjB,IAAM,EAAU,EAAkB,EAAQ,EAAO,EAAI,CACrD,GAAI,IAAY,KAAM,CAEpB,EAAQ,KAAK,EAAS,CACtB,SAEF,EAAQ,EAAQ,MAChB,EAAM,EAAQ,IAGhB,IAAI,EAAS,GAEb,GAAI,EAMF,EAAS,EAAS,EAJE,EAAK,UACvB,KAAK,IAAI,EAAG,EAAQ,GAAG,CACvB,KAAK,IAAI,EAAK,OAAQ,EAAM,GAAG,CAChC,CACuC,SAC/B,EAAU,CAEnB,IAAM,EAAe,EAAO,UAAU,EAAO,EAAI,CAC3C,EAAU,EAAa,EAAmB,EAAa,CAAG,EAChE,EAAS,EAAS,OAAS,EAAU,EAAS,WAG9C,SAIF,EAAS,EAAO,MAAM,EAAG,EAAM,CAAG,EAAS,EAAO,MAAM,EAAI,CAG5D,EAAY,IAAI,EAAO,EAAM,CAG/B,MAAO,CAAE,KAAM,EAAQ,cAAa,UAAS,CAO/C,SAAS,EAAkB,EAAc,EAA0D,CAEjG,IAAI,EAAI,EAAM,EACd,KAAO,GAAK,GAAG,CACb,GAAI,EAAK,KAAO,IAAK,OAAO,KAC5B,GAAI,EAAK,KAAO,IAAK,CAEnB,IAAI,EAAI,EACR,KAAO,EAAI,EAAK,QAAQ,CACtB,GAAI,EAAK,KAAO,IAAK,MAAO,CAAE,SAAU,EAAG,OAAQ,EAAI,EAAG,CAC1D,IAGF,MAAO,CAAE,SAAU,EAAG,OAAQ,EAAK,OAAQ,CAE7C,IAEF,OAAO,KAaT,SAAS,EACP,EACA,EACA,EACuC,CACvC,IAAI,EAAe,EACf,EAAa,EAEX,EAAW,EAAkB,EAAM,EAAM,CAC3C,IACF,EAAe,EAAS,UAG1B,IAAM,EAAS,EAAkB,EAAM,EAAI,CAQ3C,OAPI,IACF,EAAa,EAAO,QAIlB,GAAgB,EAAmB,KAEhC,CAAE,MAAO,EAAc,IAAK,EAAY,CAiBjD,SAAS,EAAmB,EAAsB,CAChD,IAAM,EAA8B,CAClC,IAAK,QACL,IAAK,OACL,IAAK,OACL,IAAK,SACL,IAAK,QACL,IAAK,SACN,CACD,OAAO,EAAK,QAAQ,YAAc,GAAS,EAAI,GAAM"}
|
|
@@ -95,12 +95,21 @@ interface FullCaseCitation extends CitationBase {
|
|
|
95
95
|
type: "case";
|
|
96
96
|
volume: number | string;
|
|
97
97
|
reporter: string;
|
|
98
|
-
page
|
|
98
|
+
/** Page number — optional for blank page placeholder citations (e.g., "___" or "---") */
|
|
99
|
+
page?: number;
|
|
99
100
|
pincite?: number;
|
|
100
101
|
court?: string;
|
|
101
102
|
year?: number;
|
|
102
103
|
/** Normalized reporter abbreviation from reporters-db (e.g., "F.2d" vs "F. 2d") */
|
|
103
104
|
normalizedReporter?: string;
|
|
105
|
+
/**
|
|
106
|
+
* Group identifier for parallel citations (same case in multiple reporters).
|
|
107
|
+
* Populated by Phase 8 (Parallel Linking).
|
|
108
|
+
* Format: ${volume}-${reporter}-${page} (e.g., "410-U.S.-113")
|
|
109
|
+
* All citations in the same parallel group share the same groupId.
|
|
110
|
+
* @example "410-U.S.-113" for parallel group [410 U.S. 113, 93 S. Ct. 705]
|
|
111
|
+
*/
|
|
112
|
+
groupId?: string;
|
|
104
113
|
/** Parallel citations for same case in different reporters */
|
|
105
114
|
parallelCitations?: Array<{
|
|
106
115
|
volume: number | string;
|
|
@@ -137,17 +146,84 @@ interface FullCaseCitation extends CitationBase {
|
|
|
137
146
|
confidence: number;
|
|
138
147
|
reason: string;
|
|
139
148
|
}>;
|
|
149
|
+
/**
|
|
150
|
+
* Full span covering citation from case name through closing parenthetical.
|
|
151
|
+
* Populated by Phase 6 (Full Span extraction).
|
|
152
|
+
* @example For "Smith v. Doe, 500 F.2d 123 (2020)", fullSpan covers entire text.
|
|
153
|
+
*/
|
|
154
|
+
fullSpan?: Span;
|
|
155
|
+
/**
|
|
156
|
+
* Extracted case name (party names around "v.").
|
|
157
|
+
* Populated by Phase 6 (Full Span extraction).
|
|
158
|
+
* @example "Smith v. Doe" or "United States v. Jones"
|
|
159
|
+
*/
|
|
160
|
+
caseName?: string;
|
|
161
|
+
/**
|
|
162
|
+
* Plaintiff party name (text before "v." or procedural prefix).
|
|
163
|
+
* Populated by Phase 7 (Party Name extraction).
|
|
164
|
+
* @example "Smith" from "Smith v. Doe" or "Jones" from "In re Jones"
|
|
165
|
+
*/
|
|
166
|
+
plaintiff?: string;
|
|
167
|
+
/**
|
|
168
|
+
* Defendant party name (text after "v.").
|
|
169
|
+
* Populated by Phase 7 (Party Name extraction).
|
|
170
|
+
* @example "Doe" from "Smith v. Doe"
|
|
171
|
+
*/
|
|
172
|
+
defendant?: string;
|
|
173
|
+
/**
|
|
174
|
+
* Normalized plaintiff name for matching (lowercase, stripped of noise).
|
|
175
|
+
* Populated by Phase 7 (Party Name extraction).
|
|
176
|
+
* @example "smith" from "The Smith Corp., Inc."
|
|
177
|
+
*/
|
|
178
|
+
plaintiffNormalized?: string;
|
|
179
|
+
/**
|
|
180
|
+
* Normalized defendant name for matching (lowercase, stripped of noise).
|
|
181
|
+
* Populated by Phase 7 (Party Name extraction).
|
|
182
|
+
* @example "doe" from "Doe et al."
|
|
183
|
+
*/
|
|
184
|
+
defendantNormalized?: string;
|
|
185
|
+
/**
|
|
186
|
+
* Procedural prefix for non-adversarial cases.
|
|
187
|
+
* Populated by Phase 7 (Party Name extraction).
|
|
188
|
+
* @example "In re" from "In re Smith"
|
|
189
|
+
*/
|
|
190
|
+
proceduralPrefix?: string;
|
|
191
|
+
/**
|
|
192
|
+
* True when page position contains a blank placeholder ("___" or "---").
|
|
193
|
+
* Populated by Phase 5 (Blank Page support).
|
|
194
|
+
* When true, page field will be undefined and confidence reduced to 0.8.
|
|
195
|
+
*/
|
|
196
|
+
hasBlankPage?: boolean;
|
|
197
|
+
/**
|
|
198
|
+
* Disposition or procedural status from parenthetical.
|
|
199
|
+
* Populated by Phase 6 (Complex Parentheticals).
|
|
200
|
+
* @example "en banc", "per curiam"
|
|
201
|
+
*/
|
|
202
|
+
disposition?: string;
|
|
140
203
|
}
|
|
141
204
|
/**
|
|
142
205
|
* Statute citation (U.S. Code, state codes, etc.).
|
|
143
206
|
*
|
|
144
207
|
* @example "42 U.S.C. § 1983"
|
|
208
|
+
* @example "42 U.S.C. § 1983(a)(1) et seq."
|
|
145
209
|
*/
|
|
146
210
|
interface StatuteCitation extends CitationBase {
|
|
147
211
|
type: "statute";
|
|
148
212
|
title?: number;
|
|
149
213
|
code: string;
|
|
150
214
|
section: string;
|
|
215
|
+
/** Subsection/pincite chain, e.g. "(a)(1)(A)" */
|
|
216
|
+
subsection?: string;
|
|
217
|
+
/** 2-letter state code or "US" when unambiguously identified */
|
|
218
|
+
jurisdiction?: string;
|
|
219
|
+
/**
|
|
220
|
+
* Alias for subsection (eyecite-ts convention).
|
|
221
|
+
* Note: this is string (subsection chain), unlike FullCaseCitation.pincite which is number (page offset).
|
|
222
|
+
* The discriminated union on `type` ensures type safety at call sites.
|
|
223
|
+
*/
|
|
224
|
+
pincite?: string;
|
|
225
|
+
/** True when "et seq." follows the citation */
|
|
226
|
+
hasEtSeq?: boolean;
|
|
151
227
|
}
|
|
152
228
|
/**
|
|
153
229
|
* Journal citation (law review, legal periodical).
|
|
@@ -323,4 +399,4 @@ type CitationOfType<T extends CitationType> = Extract<Citation, {
|
|
|
323
399
|
type ExtractorMap = { [K in FullCitationType]: CitationOfType<K> };
|
|
324
400
|
//#endregion
|
|
325
401
|
export { TransformationMap as S, StatuteCitation as _, ExtractorMap as a, Warning as b, FullCitation as c, JournalCitation as d, NeutralCitation as f, ShortFormCitationType as g, ShortFormCitation as h, CitationType as i, FullCitationType as l, ShortFormCaseCitation as m, CitationBase as n, FederalRegisterCitation as o, PublicLawCitation as p, CitationOfType as r, FullCaseCitation as s, Citation as t, IdCitation as u, StatutesAtLargeCitation as v, Span as x, SupraCitation as y };
|
|
326
|
-
//# sourceMappingURL=citation-
|
|
402
|
+
//# sourceMappingURL=citation-25ZydLsu.d.mts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"citation-25ZydLsu.d.mts","names":[],"sources":["../src/types/span.ts","../src/types/citation.ts"],"mappings":";;AAiBA;;;;;;;;;;AAoBA;;;;;;UApBiB,IAAA;;EAEf,UAAA;EAuBiB;EApBjB,QAAA;;EAGA,aAAA;ECpBF;EDuBE,WAAA;AAAA;;;AClBF;;;;UD2BiB,iBAAA;;EAEf,eAAA,EAAiB,GAAA;;EAGjB,eAAA,EAAiB,GAAA;AAAA;;;AAzBnB;;;AAAA,KCZY,YAAA;;;;UAKK,OAAA;;EAEf,KAAA;EDyBF;ECvBE,OAAA;;EAEA,QAAA;IAAY,KAAA;IAAe,GAAA;EAAA;;EAE3B,OAAA;AAAA;;;;UAMe,YAAA;EAnBL;EAqBV,IAAA;EArBU;EAwBV,IAAA,EAAM,IAAA;EAnBR;;;;;;;EA4BE,UAAA;;EAGA,WAAA;;EAGA,aAAA;EApBF;EAuBE,eAAA;;EAGA,QAAA,GAAW,OAAA;AAAA;;;;;;;UASI,gBAAA,SAAyB,YAAA;EACxC,IAAA;EACA,MAAA;EACA,QAAA;EAHF;EAKE,IAAA;EACA,OAAA;EACA,KAAA;EACA,IAAA;;EAGA,kBAAA;;;;;;;;EASA,OAAA;;EAGA,iBAAA,GAAoB,KAAA;IAClB,MAAA;IACA,QAAA;IACA,IAAA;EAAA;;EAIF,MAAA;;EAGA,aAAA;;EAGA,iBAAA;;;;;;EAOA,IAAA;IACE,GAAA;IACA,MAAA;MAAW,IAAA;MAAc,KAAA;MAAgB,GAAA;IAAA;EAAA;;;;;EAO3C,uBAAA,GAA0B,KAAA;IACxB,MAAA;IACA,QAAA;IACA,IAAA;IACA,UAAA;IACA,MAAA;EAAA;EAgEF;AASF;;;;EAjEE,QAAA,GAAW,IAAA;;;;;;EAOX,QAAA;;;;;AAqFF;EA9EE,SAAA;;;;;;EAOA,SAAA;;;;;;EAOA,mBAAA;;;AA4FF;;;EArFE,mBAAA;;;;;;EAOA,gBAAA;EAqFA;AAWF;;;;EAzFE,YAAA;;;;;;EAOA,WAAA;AAAA;;;;;;;UASe,eAAA,SAAwB,YAAA;EACvC,IAAA;EACA,KAAA;EACA,IAAA;EACA,OAAA;EAkGe;EAhGf,UAAA;EAgG+C;EA9F/C,YAAA;;;;;;EAMA,OAAA;EAwGF;EAtGE,QAAA;AAAA;;;;;;;AAiHF;;UAtGiB,eAAA,SAAwB,YAAA;EACvC,IAAA;;EAEA,MAAA;;EAEA,KAAA;;EAEA,MAAA;EA6GF;EA3GE,OAAA;;EAEA,YAAA;;EAEA,IAAA;;EAEA,OAAA;;EAEA,IAAA;AAAA;;AA4HF;;;;;;;UAjHiB,eAAA,SAAwB,YAAA;EACvC,IAAA;;EAEA,IAAA;;EAEA,KAAA;;EAEA,cAAA;AAAA;;;;;;;;;UAWe,iBAAA,SAA0B,YAAA;EACzC,IAAA;EAwGE;EAtGF,QAAA;EA2GU;EAzGV,SAAA;EAyGU;EAvGV,KAAA;AAAA;;;;;AA6GF;;;;UAlGiB,uBAAA,SAAgC,YAAA;EAC/C,IAAA;;EAEA,MAAA;;EAEA,IAAA;;EAEA,IAAA;AAAA;;UAIe,uBAAA,SAAgC,YAAA;EAC/C,IAAA;;EAEA,MAAA;;EAEA,IAAA;EAkFgJ;EAhFhJ,IAAA;AAAA;;;;;;;UASe,UAAA,SAAmB,YAAA;EAClC,IAAA;EACA,OAAA;AAAA;;AAqFF;;;;;UA5EiB,aAAA,SAAsB,YAAA;EACrC,IAAA;;EAEA,SAAA;;EAEA,OAAA;AAAA;;;;;;AA6EF;UApEiB,qBAAA,SAA8B,YAAA;EAC7C,IAAA;EACA,MAAA;EACA,QAAA;EACA,IAAA;EACA,OAAA;AAAA;;;;;;;;;;;;;;;;;;KAoBU,QAAA,GACR,gBAAA,GACA,eAAA,GACA,eAAA,GACA,eAAA,GACA,iBAAA,GACA,uBAAA,GACA,uBAAA,GACA,UAAA,GACA,aAAA,GACA,qBAAA;;;;KAKQ,gBAAA;AAAA,KACA,qBAAA;;;;KAKA,YAAA,GAAe,gBAAA,GAAmB,eAAA,GAAkB,eAAA,GAAkB,eAAA,GAAkB,iBAAA,GAAoB,uBAAA,GAA0B,uBAAA;;;;KAKtI,iBAAA,GAAoB,UAAA,GAAa,aAAA,GAAgB,qBAAA;;;;;;;;;;KAWjD,cAAA,WAAyB,YAAA,IAAgB,OAAA,CAAQ,QAAA;EAAY,IAAA,EAAM,CAAA;AAAA;;;;;KAMnE,YAAA,WACJ,gBAAA,GAAmB,cAAA,CAAe,CAAA"}
|