eyecite-ts 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +154 -16
- package/dist/annotate/index.cjs +1 -1
- package/dist/annotate/index.cjs.map +1 -1
- package/dist/annotate/index.d.cts +14 -1
- package/dist/annotate/index.d.cts.map +1 -1
- package/dist/annotate/index.d.mts +14 -1
- package/dist/annotate/index.d.mts.map +1 -1
- package/dist/annotate/index.mjs +1 -1
- package/dist/annotate/index.mjs.map +1 -1
- package/dist/{citation-BhJJj_AZ.d.cts → citation-4bmWbhSK.d.cts} +87 -14
- package/dist/citation-4bmWbhSK.d.cts.map +1 -0
- package/dist/{citation-FJ10UFM7.d.mts → citation-BVN0o8TJ.d.mts} +87 -14
- package/dist/citation-BVN0o8TJ.d.mts.map +1 -0
- package/dist/data/index.cjs +1 -1
- package/dist/data/index.cjs.map +1 -1
- package/dist/data/index.mjs +1 -1
- package/dist/data/index.mjs.map +1 -1
- package/dist/index.cjs +1 -1
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +13 -3
- package/dist/index.d.cts.map +1 -1
- package/dist/index.d.mts +13 -3
- package/dist/index.d.mts.map +1 -1
- package/dist/index.mjs +1 -1
- package/dist/index.mjs.map +1 -1
- package/package.json +16 -13
- package/dist/citation-BhJJj_AZ.d.cts.map +0 -1
- package/dist/citation-FJ10UFM7.d.mts.map +0 -1
package/dist/index.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.cjs","names":[],"sources":["../src/types/guards.ts","../src/clean/cleaners.ts","../src/clean/cleanText.ts","../src/patterns/casePatterns.ts","../src/patterns/statutePatterns.ts","../src/patterns/journalPatterns.ts","../src/patterns/neutralPatterns.ts","../src/patterns/shortForm.ts","../src/tokenize/tokenizer.ts","../src/extract/extractCase.ts","../src/extract/extractStatute.ts","../src/extract/extractJournal.ts","../src/extract/extractNeutral.ts","../src/extract/extractPublicLaw.ts","../src/extract/extractFederalRegister.ts","../src/extract/extractShortForms.ts","../src/resolve/scopeBoundary.ts","../src/resolve/levenshtein.ts","../src/resolve/DocumentResolver.ts","../src/resolve/index.ts","../src/extract/extractCitations.ts"],"sourcesContent":["import type { Citation, CitationType, CitationOfType, FullCitation, ShortFormCitation, FullCaseCitation } from \"./citation\"\n\n/**\n * Type guard: narrows Citation to a full citation (case, statute, journal, neutral, publicLaw, federalRegister).\n */\nexport function isFullCitation(citation: Citation): citation is FullCitation {\n return citation.type === 'case'\n || citation.type === 'statute'\n || citation.type === 'journal'\n || citation.type === 'neutral'\n || citation.type === 'publicLaw'\n || citation.type === 'federalRegister'\n}\n\n/**\n * Type guard: narrows Citation to a short-form citation (id, supra, shortFormCase).\n */\nexport function isShortFormCitation(citation: Citation): citation is ShortFormCitation {\n return citation.type === 'id'\n || citation.type === 'supra'\n || citation.type === 'shortFormCase'\n}\n\n/**\n * Type guard: narrows Citation to a full case citation.\n */\nexport function isCaseCitation(citation: Citation): citation is FullCaseCitation {\n return citation.type === 'case'\n}\n\n/**\n * Generic type guard that narrows a Citation to a specific type.\n * Useful when the target type is dynamic or generic.\n */\nexport function isCitationType<T extends CitationType>(\n citation: Citation,\n type: T\n): citation is CitationOfType<T> {\n return citation.type === type\n}\n\n/**\n * Exhaustiveness helper for switch statements on discriminated unions.\n *\n * Place in the `default` branch to get a compile-time error if a new\n * variant is added but not handled.\n *\n * @example\n * ```typescript\n * switch (citation.type) {\n * case 'case': ...\n * case 'statute': ...\n * // If you forget a variant, TypeScript errors here:\n * default: assertUnreachable(citation.type)\n * }\n * ```\n */\nexport function assertUnreachable(x: never): never {\n throw new Error(`Unexpected value: ${x}`)\n}\n","/**\n * Built-in text cleaner functions for preprocessing legal documents.\n *\n * Each cleaner is a simple transformation: (text: string) => string\n * Cleaners can be composed via the cleanText() pipeline.\n */\n\n/**\n * Remove all HTML tags from text.\n *\n * @example\n * stripHtmlTags(\"Smith v. <b>Doe</b>, 500 F.2d 123\")\n * // => \"Smith v. Doe, 500 F.2d 123\"\n */\nexport function stripHtmlTags(text: string): string {\n\treturn text.replace(/<[^>]+>/g, \"\")\n}\n\n/**\n * Normalize whitespace: convert tabs/newlines to spaces, collapse multiple spaces.\n *\n * @example\n * normalizeWhitespace(\"Smith v. Doe, 500 F.2d 123\")\n * // => \"Smith v. Doe, 500 F.2d 123\"\n */\nexport function normalizeWhitespace(text: string): string {\n\treturn text.replace(/[\\t\\n\\r]+/g, \" \").replace(/ {2,}/g, \" \")\n}\n\n/**\n * Apply Unicode NFKC normalization (ligatures → separate chars).\n *\n * @example\n * normalizeUnicode(\"Smith v. Doe, 500 F.2d 123\") // with ligature \"fi\"\n * // => \"Smith v. Doe, 500 F.2d 123\" // normalized\n */\nexport function normalizeUnicode(text: string): string {\n\treturn text.normalize(\"NFKC\")\n}\n\n/**\n * Replace curly quotes and apostrophes with straight quotes.\n *\n * @example\n * fixSmartQuotes(\"\"Smith\" v. 'Doe', 500 F.2d 123\")\n * // => \"\\\"Smith\\\" v. 'Doe', 500 F.2d 123\"\n */\nexport function fixSmartQuotes(text: string): string {\n\treturn text\n\t\t.replace(/[\\u201C\\u201D]/g, '\"') // curly double quotes\n\t\t.replace(/[\\u2018\\u2019]/g, \"'\") // curly single quotes/apostrophes\n}\n\n/**\n * Remove underscore OCR artifacts (common in scanned documents).\n *\n * @example\n * removeOcrArtifacts(\"Smith v. Doe, 500 F._2d 123\")\n * // => \"Smith v. Doe, 500 F.2d 123\"\n */\nexport function removeOcrArtifacts(text: string): string {\n\treturn text.replace(/_/g, \"\")\n}\n","import type { TransformationMap } from \"../types/span\"\nimport type { Warning } from \"../types/citation\"\nimport {\n\tfixSmartQuotes,\n\tnormalizeUnicode,\n\tnormalizeWhitespace,\n\tstripHtmlTags,\n} from \"./cleaners\"\n\n/**\n * Result of text cleaning operation.\n */\nexport interface CleanTextResult {\n\t/** Cleaned text after all transformations */\n\tcleaned: string\n\n\t/** Position mappings between cleaned and original text */\n\ttransformationMap: TransformationMap\n\n\t/** Warnings generated during cleaning (currently unused) */\n\twarnings: Warning[]\n}\n\n/**\n * Clean text using a pipeline of transformation functions.\n *\n * Applies cleaners sequentially while maintaining accurate position mappings\n * between the original and cleaned text. This enables citation extraction from\n * cleaned text while reporting positions in the original text.\n *\n * @param original - Original input text\n * @param cleaners - Array of cleaner functions to apply (default: stripHtmlTags, normalizeWhitespace, normalizeUnicode, fixSmartQuotes)\n * @returns Cleaned text with position mappings and warnings\n *\n * @example\n * const result = cleanText(\"Smith v. <b>Doe</b>, 500 F.2d 123\")\n * // result.cleaned: \"Smith v. Doe, 500 F.2d 123\"\n * // result.transformationMap tracks position shifts from HTML removal\n */\nexport function cleanText(\n\toriginal: string,\n\tcleaners: Array<(text: string) => string> = [\n\t\tstripHtmlTags,\n\t\tnormalizeWhitespace,\n\t\tnormalizeUnicode,\n\t\tfixSmartQuotes,\n\t],\n): CleanTextResult {\n\t// Initialize 1:1 position mapping\n\tlet currentText = original\n\tlet cleanToOriginal = new Map<number, number>()\n\tlet originalToClean = new Map<number, number>()\n\n\t// Identity mapping: cleanToOriginal[i] = i, originalToClean[i] = i\n\tfor (let i = 0; i <= original.length; i++) {\n\t\tcleanToOriginal.set(i, i)\n\t\toriginalToClean.set(i, i)\n\t}\n\n\t// Apply each cleaner sequentially, rebuilding position maps\n\tfor (const cleaner of cleaners) {\n\t\tconst beforeText = currentText\n\t\tconst afterText = cleaner(currentText)\n\n\t\tif (beforeText !== afterText) {\n\t\t\t// Text changed - rebuild position maps\n\t\t\tconst { newCleanToOriginal, newOriginalToClean } = rebuildPositionMaps(\n\t\t\t\tbeforeText,\n\t\t\t\tafterText,\n\t\t\t\tcleanToOriginal,\n\t\t\t\toriginalToClean,\n\t\t\t)\n\n\t\t\tcleanToOriginal = newCleanToOriginal\n\t\t\toriginalToClean = newOriginalToClean\n\t\t\tcurrentText = afterText\n\t\t}\n\t}\n\n\tconst transformationMap: TransformationMap = {\n\t\tcleanToOriginal,\n\t\toriginalToClean,\n\t}\n\n\treturn {\n\t\tcleaned: currentText,\n\t\ttransformationMap,\n\t\twarnings: [],\n\t}\n}\n\n/**\n * Rebuild position maps after a text transformation.\n *\n * Uses a simplified algorithm that scans through both strings, matching\n * characters where possible and tracking the offset accumulation.\n *\n * @param beforeText - Text before transformation\n * @param afterText - Text after transformation\n * @param oldCleanToOriginal - Previous clean-to-original mapping\n * @param oldOriginalToClean - Previous original-to-clean mapping\n * @returns New position maps\n */\nfunction rebuildPositionMaps(\n\tbeforeText: string,\n\tafterText: string,\n\toldCleanToOriginal: Map<number, number>,\n\toldOriginalToClean: Map<number, number>,\n): {\n\tnewCleanToOriginal: Map<number, number>\n\tnewOriginalToClean: Map<number, number>\n} {\n\tconst newCleanToOriginal = new Map<number, number>()\n\tconst newOriginalToClean = new Map<number, number>()\n\n\tlet beforeIdx = 0\n\tlet afterIdx = 0\n\n\t// Scan through both strings, matching characters where possible\n\twhile (beforeIdx <= beforeText.length || afterIdx <= afterText.length) {\n\t\t// Both at end\n\t\tif (beforeIdx >= beforeText.length && afterIdx >= afterText.length) {\n\t\t\tconst originalPos = oldCleanToOriginal.get(beforeIdx) ?? beforeIdx\n\t\t\tnewCleanToOriginal.set(afterIdx, originalPos)\n\t\t\tnewOriginalToClean.set(originalPos, afterIdx)\n\t\t\tbreak\n\t\t}\n\n\t\t// Before text exhausted (expansion case)\n\t\tif (beforeIdx >= beforeText.length) {\n\t\t\tconst originalPos = oldCleanToOriginal.get(beforeIdx) ?? beforeIdx\n\t\t\tnewCleanToOriginal.set(afterIdx, originalPos)\n\t\t\tafterIdx++\n\t\t\tcontinue\n\t\t}\n\n\t\t// After text exhausted (removal case)\n\t\tif (afterIdx >= afterText.length) {\n\t\t\tconst originalPos = oldCleanToOriginal.get(beforeIdx) ?? beforeIdx\n\t\t\tnewOriginalToClean.set(originalPos, afterIdx)\n\t\t\tbeforeIdx++\n\t\t\tcontinue\n\t\t}\n\n\t\t// Characters match - carry forward the mapping\n\t\tif (beforeText[beforeIdx] === afterText[afterIdx]) {\n\t\t\tconst originalPos = oldCleanToOriginal.get(beforeIdx) ?? beforeIdx\n\t\t\tnewCleanToOriginal.set(afterIdx, originalPos)\n\t\t\tnewOriginalToClean.set(originalPos, afterIdx)\n\t\t\tbeforeIdx++\n\t\t\tafterIdx++\n\t\t} else {\n\t\t\t// Characters differ - need to determine if this is insertion/deletion/replacement\n\t\t\t// Look ahead to find next match\n\t\t\tlet foundMatch = false\n\t\t\tconst maxLookAhead = 20 // Limit lookahead to avoid performance issues\n\n\t\t\t// Check if something was deleted from before text\n\t\t\tfor (let lookAhead = 1; lookAhead <= maxLookAhead; lookAhead++) {\n\t\t\t\tif (beforeIdx + lookAhead >= beforeText.length) break\n\n\t\t\t\tif (beforeText[beforeIdx + lookAhead] === afterText[afterIdx]) {\n\t\t\t\t\t// Found a match - characters were deleted from before text\n\t\t\t\t\tfor (let i = 0; i < lookAhead; i++) {\n\t\t\t\t\t\tconst originalPos =\n\t\t\t\t\t\t\toldCleanToOriginal.get(beforeIdx + i) ?? beforeIdx + i\n\t\t\t\t\t\tnewOriginalToClean.set(originalPos, afterIdx)\n\t\t\t\t\t}\n\t\t\t\t\tbeforeIdx += lookAhead\n\t\t\t\t\tfoundMatch = true\n\t\t\t\t\tbreak\n\t\t\t\t}\n\t\t\t}\n\n\t\t\tif (foundMatch) continue\n\n\t\t\t// Check if something was inserted into after text\n\t\t\tfor (let lookAhead = 1; lookAhead <= maxLookAhead; lookAhead++) {\n\t\t\t\tif (afterIdx + lookAhead >= afterText.length) break\n\n\t\t\t\tif (beforeText[beforeIdx] === afterText[afterIdx + lookAhead]) {\n\t\t\t\t\t// Found a match - characters were inserted into after text\n\t\t\t\t\tconst originalPos = oldCleanToOriginal.get(beforeIdx) ?? beforeIdx\n\t\t\t\t\tfor (let i = 0; i < lookAhead; i++) {\n\t\t\t\t\t\tnewCleanToOriginal.set(afterIdx + i, originalPos)\n\t\t\t\t\t}\n\t\t\t\t\tafterIdx += lookAhead\n\t\t\t\t\tfoundMatch = true\n\t\t\t\t\tbreak\n\t\t\t\t}\n\t\t\t}\n\n\t\t\tif (foundMatch) continue\n\n\t\t\t// No match found within lookahead - treat as replacement\n\t\t\tconst originalPos = oldCleanToOriginal.get(beforeIdx) ?? beforeIdx\n\t\t\tnewCleanToOriginal.set(afterIdx, originalPos)\n\t\t\tnewOriginalToClean.set(originalPos, afterIdx)\n\t\t\tbeforeIdx++\n\t\t\tafterIdx++\n\t\t}\n\t}\n\n\treturn { newCleanToOriginal, newOriginalToClean }\n}\n","/**\n * Case Citation Regex Patterns\n *\n * These patterns are designed for tokenization (broad matching) not extraction.\n * They identify potential case citations in text for the tokenizer (Plan 3).\n * Metadata parsing and validation against reporters-db happens in Phase 2 Plan 5 (extraction layer).\n *\n * Pattern Design Principles (from RESEARCH.md):\n * - Use \\b word boundaries to avoid matching \"F.\" in \"F.B.I.\"\n * - Avoid nested quantifiers: (a+)+ causes ReDoS\n * - Keep patterns simple: tokenization only needs to find candidates\n * - Use global flag /g for matchAll()\n */\n\nimport type { FullCitationType } from '@/types/citation'\n\nexport interface Pattern {\n id: string\n regex: RegExp\n description: string\n type: FullCitationType\n}\n\nexport const casePatterns: Pattern[] = [\n {\n id: 'federal-reporter',\n regex: /\\b(\\d+)\\s+(F\\.|F\\.2d|F\\.3d|F\\.\\s?Supp\\.|F\\.\\s?Supp\\.\\s?2d|F\\.\\s?Supp\\.\\s?3d)\\s+(\\d+)\\b/g,\n description: 'Federal Reporter (F., F.2d, F.3d, F.Supp., etc.)',\n type: 'case',\n },\n {\n id: 'supreme-court',\n regex: /\\b(\\d+)\\s+(U\\.S\\.|S\\.\\s?Ct\\.|L\\.\\s?Ed\\.(?:\\s?2d)?)\\s+(\\d+)\\b/g,\n description: 'U.S. Supreme Court reporters',\n type: 'case',\n },\n {\n id: 'state-reporter',\n regex: /\\b(\\d+)\\s+([A-Z][A-Za-z\\.]+(?:\\s?2d|\\s?3d)?)\\s+(\\d+)\\b/g,\n description: 'State reporters (broad pattern, validated against reporters-db in Phase 3)',\n type: 'case',\n },\n]\n","/**\n * Statute Citation Regex Patterns\n *\n * Patterns for U.S. Code and state code citations.\n * These are intentionally broad for tokenization - validation against\n * actual code databases happens in Phase 2 Plan 5 (extraction layer).\n *\n * Pattern Design:\n * - Simple structure to avoid ReDoS\n * - Matches both \"§\" and \"Section\" formats\n * - State codes use broad pattern (validated later)\n */\n\nimport type { Pattern } from './casePatterns'\n\nexport const statutePatterns: Pattern[] = [\n {\n id: 'usc',\n regex: /\\b(\\d+)\\s+U\\.S\\.C\\.?\\s+§+\\s*(\\d+)\\b/g,\n description: 'U.S. Code citations (e.g., \"42 U.S.C. § 1983\")',\n type: 'statute',\n },\n {\n id: 'state-code',\n regex: /\\b([A-Z][a-z]+\\.?\\s+[A-Za-z\\.]+\\s+Code)\\s+§\\s*(\\d+)\\b/g,\n description: 'State code citations (broad pattern, e.g., \"Cal. Penal Code § 187\")',\n type: 'statute',\n },\n]\n","/**\n * Journal Citation Regex Patterns\n *\n * Patterns for law review and journal citations.\n * These are intentionally broad for tokenization - validation against\n * journals-db happens in Phase 3 (extraction layer).\n *\n * Pattern Design:\n * - Matches volume-journal-page format\n * - Broad journal name matching (validated later)\n * - Simple structure to avoid ReDoS\n */\n\nimport type { Pattern } from './casePatterns'\n\nexport const journalPatterns: Pattern[] = [\n {\n id: 'law-review',\n regex: /\\b(\\d+)\\s+([A-Z][A-Za-z\\.\\s]+)\\s+(\\d+)\\b/g,\n description: 'Law review citations (e.g., \"120 Harv. L. Rev. 500\"), validated against journals-db in Phase 3',\n type: 'journal',\n },\n]\n","/**\n * Neutral and Online Citation Regex Patterns\n *\n * Patterns for WestLaw, LexisNexis, public laws, and Federal Register citations.\n * These have predictable formats and don't require external validation.\n *\n * Pattern Design:\n * - Matches year-database-number format for online citations\n * - Matches Pub. L. No. format for public laws\n * - Matches volume-Fed. Reg.-page for Federal Register\n * - Simple structure to avoid ReDoS\n */\n\nimport type { Pattern } from './casePatterns'\n\nexport const neutralPatterns: Pattern[] = [\n {\n id: 'westlaw',\n regex: /\\b(\\d{4})\\s+WL\\s+(\\d+)\\b/g,\n description: 'WestLaw citations (e.g., \"2021 WL 123456\")',\n type: 'neutral',\n },\n {\n id: 'lexis',\n regex: /\\b(\\d{4})\\s+U\\.S\\.\\s+LEXIS\\s+(\\d+)\\b/g,\n description: 'LexisNexis citations (e.g., \"2021 U.S. LEXIS 5000\")',\n type: 'neutral',\n },\n {\n id: 'public-law',\n regex: /\\bPub\\.\\s?L\\.\\s?No\\.\\s?(\\d+-\\d+)\\b/g,\n description: 'Public Law citations (e.g., \"Pub. L. No. 117-58\")',\n type: 'publicLaw',\n },\n {\n id: 'federal-register',\n regex: /\\b(\\d+)\\s+Fed\\.\\s?Reg\\.\\s+(\\d+)\\b/g,\n description: 'Federal Register citations (e.g., \"86 Fed. Reg. 12345\")',\n type: 'federalRegister',\n },\n]\n","/**\n * Short-form Citation Regex Patterns\n *\n * Patterns for Id., Ibid., supra, and short-form case citations.\n * These refer to earlier citations in the document.\n *\n * Pattern Design:\n * - Simple structure to avoid ReDoS (no nested quantifiers)\n * - Broad matching for tokenization; validation happens in extraction layer\n * - Word boundaries to prevent false positives (e.g., \"Idaho\" vs \"Id.\")\n */\n\nimport type { Pattern } from './casePatterns'\n\n/** Id. with optional pincite: \"Id.\" or \"Id. at 253\" */\nexport const ID_PATTERN: RegExp = /\\b[Ii]d\\.(?:\\s+at\\s+(\\d+))?/g\n\n/** Ibid. with optional pincite (less common variant) */\nexport const IBID_PATTERN: RegExp = /\\b[Ii]bid\\.(?:\\s+at\\s+(\\d+))?/g\n\n/**\n * Supra with party name and optional pincite.\n * Pattern: word(s), supra [, at page]\n * Captures: (1) party name, (2) pincite\n * Note: Matches party names including \"v.\" (e.g., \"Smith v. Jones, supra\")\n */\nexport const SUPRA_PATTERN: RegExp = /\\b([A-Z][a-zA-Z]+(?:(?:\\s+v\\.?\\s+|\\s+)[A-Z][a-zA-Z]+)*),?\\s+supra(?:,?\\s+at\\s+(\\d+))?/g\n\n/**\n * Short-form case: volume reporter at page\n * Pattern: number space abbreviation space \"at\" space number\n * Simplified detection; full parsing in extraction layer\n */\nexport const SHORT_FORM_CASE_PATTERN: RegExp = /\\b(\\d+)\\s+([A-Z][A-Za-z.\\s]+?(?:\\d[a-z])?)\\s+at\\s+(\\d+)\\b/g\n\n/** All short-form patterns for tokenization */\nexport const SHORT_FORM_PATTERNS: readonly RegExp[] = [\n ID_PATTERN,\n IBID_PATTERN,\n SUPRA_PATTERN,\n SHORT_FORM_CASE_PATTERN,\n] as const\n\n/** Pattern objects for consistency with other pattern modules */\nexport const shortFormPatterns: Pattern[] = [\n {\n id: 'id',\n regex: ID_PATTERN,\n description: 'Id. citations (e.g., \"Id.\" or \"Id. at 253\")',\n type: 'case', // Will be typed as 'id' in extraction layer\n },\n {\n id: 'ibid',\n regex: IBID_PATTERN,\n description: 'Ibid. citations (e.g., \"Ibid.\" or \"Ibid. at 125\")',\n type: 'case', // Will be typed as 'id' in extraction layer\n },\n {\n id: 'supra',\n regex: SUPRA_PATTERN,\n description: 'Supra citations (e.g., \"Smith, supra\" or \"Smith, supra, at 460\")',\n type: 'case', // Will be typed as 'supra' in extraction layer\n },\n {\n id: 'shortFormCase',\n regex: SHORT_FORM_CASE_PATTERN,\n description: 'Short-form case citations (e.g., \"500 F.2d at 125\")',\n type: 'case', // Will be typed as 'shortFormCase' in extraction layer\n },\n]\n","/**\n * Tokenization Layer for Citation Extraction\n *\n * Applies regex patterns to cleaned text to produce citation candidate tokens.\n * This is the second stage of the parsing pipeline:\n * 1. Clean text (remove HTML, normalize Unicode)\n * 2. Tokenize (apply patterns to find candidates) ← THIS MODULE\n * 3. Extract (parse metadata, validate against reporters-db)\n *\n * Tokenization is intentionally broad - it finds potential citations without\n * validating them. The extraction layer (Plan 5) validates tokens against\n * reporters-db and parses metadata.\n *\n * @module tokenize\n */\n\nimport type { Span } from '@/types/span'\nimport type { Pattern } from '@/patterns'\nimport {\n casePatterns,\n statutePatterns,\n journalPatterns,\n neutralPatterns,\n} from '@/patterns'\nimport { shortFormPatterns } from '@/patterns/shortForm'\n\n/**\n * A token representing a potential citation found in cleaned text.\n *\n * Tokens are produced by applying regex patterns to cleaned text.\n * They include matched text, position in cleaned text, and pattern metadata\n * for use in the extraction layer.\n */\nexport interface Token {\n /** Matched text from input */\n text: string\n\n /** Position in cleaned text (cleanStart/cleanEnd only, no original positions yet) */\n span: Pick<Span, 'cleanStart' | 'cleanEnd'>\n\n /** Pattern type that matched this token */\n type: Pattern['type']\n\n /** Pattern ID that matched this token */\n patternId: string\n}\n\n/**\n * Tokenizes cleaned text by applying regex patterns to find citation candidates.\n *\n * For each pattern in the patterns array:\n * 1. Apply pattern.regex.matchAll(cleanedText)\n * 2. Create Token for each match with position, text, and pattern metadata\n * 3. Collect all tokens from all patterns\n * 4. Sort by cleanStart position (ascending)\n *\n * Timeout protection: If a pattern throws (e.g., ReDoS), skip it and continue\n * with remaining patterns. Logs warning to console.\n *\n * Note: This function is synchronous because regex matching is inherently\n * synchronous. This enables both sync (extractCitations) and async\n * (extractCitationsAsync) APIs in Plan 6.\n *\n * @param cleanedText - Text that has been cleaned by cleanText() from Plan 1\n * @param patterns - Regex patterns to apply (defaults to all patterns from Plan 2)\n * @returns Array of tokens sorted by position (cleanStart ascending)\n *\n * @example\n * ```typescript\n * import { tokenize } from '@/tokenize'\n * import { cleanText } from '@/clean'\n *\n * const original = \"See Smith v. Doe, 500 F.2d 123 (9th Cir. 2020)\"\n * const { cleanedText } = cleanText(original)\n * const tokens = tokenize(cleanedText)\n * // tokens[0] = {\n * // text: \"500 F.2d 123\",\n * // span: { cleanStart: 18, cleanEnd: 30 },\n * // type: \"case\",\n * // patternId: \"federal-reporter\"\n * // }\n * ```\n */\nexport function tokenize(\n cleanedText: string,\n patterns: Pattern[] = [\n ...casePatterns,\n ...statutePatterns,\n ...journalPatterns,\n ...neutralPatterns,\n ...shortFormPatterns,\n ]\n): Token[] {\n const tokens: Token[] = []\n\n for (const pattern of patterns) {\n try {\n // Apply pattern to cleaned text\n const matches = cleanedText.matchAll(pattern.regex)\n\n for (const match of matches) {\n // Create token from match\n tokens.push({\n text: match[0],\n span: {\n cleanStart: match.index!,\n cleanEnd: match.index! + match[0].length,\n },\n type: pattern.type,\n patternId: pattern.id,\n })\n }\n } catch (error) {\n // Timeout protection: If pattern throws (ReDoS, etc.), skip it\n console.warn(\n `Pattern ${pattern.id} threw error, skipping:`,\n error instanceof Error ? error.message : String(error)\n )\n continue\n }\n }\n\n // Sort tokens by position (cleanStart ascending)\n tokens.sort((a, b) => a.span.cleanStart - b.span.cleanStart)\n\n return tokens\n}\n","/**\n * Case Citation Extraction\n *\n * Parses tokenized case citations to extract volume, reporter, page, and\n * optional metadata (pincite, court, year). This is the third stage of\n * the parsing pipeline:\n * 1. Clean text (remove HTML, normalize Unicode)\n * 2. Tokenize (apply patterns to find candidates)\n * 3. Extract (parse metadata, validate) ← THIS MODULE\n *\n * Extraction parses structured data from token text. Validation against\n * reporters-db happens in Phase 3 (resolution layer).\n *\n * @module extract/extractCase\n */\n\nimport type { Token } from '@/tokenize'\nimport type { FullCaseCitation } from '@/types/citation'\nimport type { TransformationMap } from '@/types/span'\n\n/**\n * Extracts case citation metadata from a tokenized citation.\n *\n * Parses token text to extract:\n * - Volume: Leading digits (e.g., \"500\" from \"500 F.2d 123\")\n * - Reporter: Alphabetic abbreviation (e.g., \"F.2d\")\n * - Page: Trailing digits after reporter (e.g., \"123\")\n * - Pincite: Optional page reference after comma (e.g., \", 125\")\n * - Court: Optional court abbreviation in parentheses (e.g., \"(9th Cir.)\")\n * - Year: Optional year in parentheses (e.g., \"(2020)\")\n *\n * Confidence scoring:\n * - Base: 0.5\n * - Common reporter pattern (F., U.S., etc.): +0.3\n * - Valid year (not future): +0.2\n * - Capped at 1.0\n *\n * Position translation:\n * - Uses TransformationMap to convert clean positions → original positions\n * - cleanStart/cleanEnd from token span\n * - originalStart/originalEnd via transformationMap.cleanToOriginal\n *\n * Note: This function does NOT validate against reporters-db. That happens\n * in Phase 3 (resolution layer). Phase 2 extraction only parses structure.\n *\n * @param token - Token from tokenizer containing matched text and clean positions\n * @param transformationMap - Position mapping from clean → original text\n * @returns FullCaseCitation with parsed metadata and translated positions\n *\n * @example\n * ```typescript\n * const token = {\n * text: \"500 F.2d 123, 125\",\n * span: { cleanStart: 10, cleanEnd: 27 },\n * type: \"case\",\n * patternId: \"federal-reporter\"\n * }\n * const citation = extractCase(token, transformationMap)\n * // citation = {\n * // type: \"case\",\n * // text: \"500 F.2d 123, 125\",\n * // volume: 500,\n * // reporter: \"F.2d\",\n * // page: 123,\n * // pincite: 125,\n * // span: { cleanStart: 10, cleanEnd: 27, originalStart: 10, originalEnd: 27 },\n * // confidence: 0.8,\n * // ...\n * // }\n * ```\n */\nexport function extractCase(\n\ttoken: Token,\n\ttransformationMap: TransformationMap,\n): FullCaseCitation {\n\tconst { text, span } = token\n\n\t// Parse volume-reporter-page using regex\n\t// Pattern: volume (digits) + reporter (letters/periods/spaces/numbers) + page (digits)\n\t// Use greedy matching for reporter to capture full abbreviation including spaces\n\tconst volumeReporterPageRegex = /^(\\d+)\\s+([A-Za-z0-9.\\s]+)\\s+(\\d+)/\n\tconst match = volumeReporterPageRegex.exec(text)\n\n\tif (!match) {\n\t\t// Fallback if pattern doesn't match (shouldn't happen if tokenizer is correct)\n\t\tthrow new Error(`Failed to parse case citation: ${text}`)\n\t}\n\n\tconst volume = Number.parseInt(match[1], 10)\n\tconst reporter = match[2].trim()\n\tconst page = Number.parseInt(match[3], 10)\n\n\t// Extract optional pincite (page reference after comma)\n\t// Pattern: \", digits\" (e.g., \", 125\")\n\tconst pinciteRegex = /,\\s*(\\d+)/\n\tconst pinciteMatch = pinciteRegex.exec(text)\n\tconst pincite = pinciteMatch ? Number.parseInt(pinciteMatch[1], 10) : undefined\n\n\t// Extract optional year in parentheses (extract first for better matching)\n\t// Pattern: 4-digit year anywhere in parentheses\n\tconst yearRegex = /\\((?:[^)]*\\s)?(\\d{4})\\)/\n\tconst yearMatch = yearRegex.exec(text)\n\tconst year = yearMatch ? Number.parseInt(yearMatch[1], 10) : undefined\n\n\t// Extract optional court abbreviation in parentheses\n\t// Pattern: \"(text)\" where text contains letters (captures full parenthetical)\n\tconst courtRegex = /\\(([^)]*[A-Za-z][^)]*)\\)/\n\tconst courtMatch = courtRegex.exec(text)\n\tconst court = courtMatch ? courtMatch[1].trim() : undefined\n\n\t// Translate positions from clean → original\n\tconst originalStart =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanStart) ?? span.cleanStart\n\tconst originalEnd =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanEnd) ?? span.cleanEnd\n\n\t// Calculate confidence score\n\tlet confidence = 0.5 // Base confidence\n\n\t// Common reporter patterns (F., U.S., S. Ct., etc.)\n\tconst commonReporters = [\n\t\t'F.',\n\t\t'F.2d',\n\t\t'F.3d',\n\t\t'F.4th',\n\t\t'U.S.',\n\t\t'S. Ct.',\n\t\t'L. Ed.',\n\t\t'P.',\n\t\t'P.2d',\n\t\t'P.3d',\n\t\t'A.',\n\t\t'A.2d',\n\t\t'A.3d',\n\t\t'N.E.',\n\t\t'N.E.2d',\n\t\t'N.E.3d',\n\t\t'N.W.',\n\t\t'N.W.2d',\n\t\t'S.E.',\n\t\t'S.E.2d',\n\t\t'S.W.',\n\t\t'S.W.2d',\n\t\t'S.W.3d',\n\t\t'So.',\n\t\t'So. 2d',\n\t\t'So. 3d',\n\t]\n\n\tif (commonReporters.some((r) => reporter.includes(r))) {\n\t\tconfidence += 0.3\n\t}\n\n\t// Valid year check (not in future)\n\tif (year !== undefined) {\n\t\tconst currentYear = new Date().getFullYear()\n\t\tif (year <= currentYear) {\n\t\t\tconfidence += 0.2\n\t\t}\n\t}\n\n\t// Cap at 1.0\n\tconfidence = Math.min(confidence, 1.0)\n\n\treturn {\n\t\ttype: 'case',\n\t\ttext,\n\t\tspan: {\n\t\t\tcleanStart: span.cleanStart,\n\t\t\tcleanEnd: span.cleanEnd,\n\t\t\toriginalStart,\n\t\t\toriginalEnd,\n\t\t},\n\t\tconfidence,\n\t\tmatchedText: text,\n\t\tprocessTimeMs: 0, // Placeholder - timing handled by orchestration layer\n\t\tpatternsChecked: 1, // Single token processed\n\t\tvolume,\n\t\treporter,\n\t\tpage,\n\t\tpincite,\n\t\tcourt,\n\t\tyear,\n\t}\n}\n","/**\n * Statute Citation Extraction\n *\n * Parses tokenized statute citations to extract title, code, section, and\n * optional subsections. Examples: \"42 U.S.C. § 1983\", \"Cal. Civ. Code § 1234(a)(1)\"\n *\n * @module extract/extractStatute\n */\n\nimport type { Token } from '@/tokenize'\nimport type { StatuteCitation } from '@/types/citation'\nimport type { TransformationMap } from '@/types/span'\n\n/**\n * Extracts statute citation metadata from a tokenized citation.\n *\n * Parses token text to extract:\n * - Title: Optional leading digits (e.g., \"42\" from \"42 U.S.C. § 1983\")\n * - Code: Statutory code abbreviation (e.g., \"U.S.C.\", \"Cal. Civ. Code\")\n * - Section: Section number after § symbol (e.g., \"1983\")\n * - Subsections: Optional parenthetical subdivisions (e.g., \"(a)(1)\")\n *\n * Confidence scoring:\n * - Base: 0.5\n * - Known code pattern (U.S.C., C.F.R., state codes): +0.3\n * - Capped at 1.0\n *\n * @param token - Token from tokenizer containing matched text and clean positions\n * @param transformationMap - Position mapping from clean → original text\n * @returns StatuteCitation with parsed metadata and translated positions\n *\n * @example\n * ```typescript\n * const token = {\n * text: \"42 U.S.C. § 1983\",\n * span: { cleanStart: 10, cleanEnd: 26 },\n * type: \"statute\",\n * patternId: \"usc\"\n * }\n * const citation = extractStatute(token, transformationMap)\n * // citation = {\n * // type: \"statute\",\n * // title: 42,\n * // code: \"U.S.C.\",\n * // section: \"1983\",\n * // ...\n * // }\n * ```\n */\nexport function extractStatute(\n\ttoken: Token,\n\ttransformationMap: TransformationMap,\n): StatuteCitation {\n\tconst { text, span } = token\n\n\t// Parse title-code-section using regex\n\t// Pattern: optional title (digits) + code (letters/periods/spaces) + § + section\n\tconst statuteRegex = /^(?:(\\d+)\\s+)?([A-Za-z.\\s]+?)\\s*§\\s*(\\d+[A-Za-z0-9\\-]*)/\n\tconst match = statuteRegex.exec(text)\n\n\tif (!match) {\n\t\tthrow new Error(`Failed to parse statute citation: ${text}`)\n\t}\n\n\tconst title = match[1] ? Number.parseInt(match[1], 10) : undefined\n\tconst code = match[2].trim()\n\tconst section = match[3]\n\n\t// Translate positions from clean → original\n\tconst originalStart =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanStart) ?? span.cleanStart\n\tconst originalEnd =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanEnd) ?? span.cleanEnd\n\n\t// Calculate confidence score\n\tlet confidence = 0.5 // Base confidence\n\n\t// Known statutory code patterns\n\tconst knownCodes = [\n\t\t'U.S.C.',\n\t\t'C.F.R.',\n\t\t'Cal. Civ. Code',\n\t\t'Cal. Penal Code',\n\t\t'N.Y. Civ. Prac. L. & R.',\n\t\t'Tex. Civ. Prac. & Rem. Code',\n\t]\n\n\tif (knownCodes.some((c) => code.includes(c))) {\n\t\tconfidence += 0.3\n\t}\n\n\tconfidence = Math.min(confidence, 1.0)\n\n\treturn {\n\t\ttype: 'statute',\n\t\ttext,\n\t\tspan: {\n\t\t\tcleanStart: span.cleanStart,\n\t\t\tcleanEnd: span.cleanEnd,\n\t\t\toriginalStart,\n\t\t\toriginalEnd,\n\t\t},\n\t\tconfidence,\n\t\tmatchedText: text,\n\t\tprocessTimeMs: 0,\n\t\tpatternsChecked: 1,\n\t\ttitle,\n\t\tcode,\n\t\tsection,\n\t}\n}\n","/**\n * Journal Citation Extraction\n *\n * Parses tokenized journal citations to extract volume, journal name, page,\n * and optional metadata. Examples: \"123 Harv. L. Rev. 456\", \"75 Yale L.J. 789, 791\"\n *\n * @module extract/extractJournal\n */\n\nimport type { Token } from '@/tokenize'\nimport type { JournalCitation } from '@/types/citation'\nimport type { TransformationMap } from '@/types/span'\n\n/**\n * Extracts journal citation metadata from a tokenized citation.\n *\n * Parses token text to extract:\n * - Volume: Leading digits (e.g., \"123\" from \"123 Harv. L. Rev. 456\")\n * - Journal: Journal abbreviation (e.g., \"Harv. L. Rev.\")\n * - Page: Starting page number (e.g., \"456\")\n * - Pincite: Optional specific page reference after comma (e.g., \", 458\")\n *\n * Confidence scoring:\n * - Base: 0.6 (journal validation happens in Phase 3)\n *\n * Note: Author and title extraction from preceding text is not implemented\n * in Phase 2. That requires context analysis in Phase 3.\n *\n * @param token - Token from tokenizer containing matched text and clean positions\n * @param transformationMap - Position mapping from clean → original text\n * @returns JournalCitation with parsed metadata and translated positions\n *\n * @example\n * ```typescript\n * const token = {\n * text: \"123 Harv. L. Rev. 456\",\n * span: { cleanStart: 10, cleanEnd: 31 },\n * type: \"journal\",\n * patternId: \"journal-standard\"\n * }\n * const citation = extractJournal(token, transformationMap)\n * // citation = {\n * // type: \"journal\",\n * // volume: 123,\n * // journal: \"Harv. L. Rev.\",\n * // abbreviation: \"Harv. L. Rev.\",\n * // page: 456,\n * // ...\n * // }\n * ```\n */\nexport function extractJournal(\n\ttoken: Token,\n\ttransformationMap: TransformationMap,\n): JournalCitation {\n\tconst { text, span } = token\n\n\t// Parse volume-journal-page using regex\n\t// Pattern: volume (digits) + journal (letters/periods/spaces) + page (digits)\n\tconst journalRegex = /^(\\d+)\\s+([A-Za-z.\\s]+?)\\s+(\\d+)/\n\tconst match = journalRegex.exec(text)\n\n\tif (!match) {\n\t\tthrow new Error(`Failed to parse journal citation: ${text}`)\n\t}\n\n\tconst volume = Number.parseInt(match[1], 10)\n\tconst journal = match[2].trim()\n\tconst page = Number.parseInt(match[3], 10)\n\n\t// Extract optional pincite (page reference after comma)\n\tconst pinciteRegex = /,\\s*(\\d+)/\n\tconst pinciteMatch = pinciteRegex.exec(text)\n\tconst pincite = pinciteMatch ? Number.parseInt(pinciteMatch[1], 10) : undefined\n\n\t// Translate positions from clean → original\n\tconst originalStart =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanStart) ?? span.cleanStart\n\tconst originalEnd =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanEnd) ?? span.cleanEnd\n\n\t// Confidence: 0.6 base (journal validation against database happens in Phase 3)\n\tconst confidence = 0.6\n\n\treturn {\n\t\ttype: 'journal',\n\t\ttext,\n\t\tspan: {\n\t\t\tcleanStart: span.cleanStart,\n\t\t\tcleanEnd: span.cleanEnd,\n\t\t\toriginalStart,\n\t\t\toriginalEnd,\n\t\t},\n\t\tconfidence,\n\t\tmatchedText: text,\n\t\tprocessTimeMs: 0,\n\t\tpatternsChecked: 1,\n\t\tvolume,\n\t\tjournal,\n\t\tabbreviation: journal, // For Phase 2, abbreviation = journal name\n\t\tpage,\n\t\tpincite,\n\t}\n}\n","/**\n * Neutral Citation Extraction\n *\n * Parses tokenized neutral (vendor-neutral) citations to extract year, court,\n * and document number. Examples: \"2020 WL 123456\", \"2020 U.S. LEXIS 456\"\n *\n * @module extract/extractNeutral\n */\n\nimport type { Token } from '@/tokenize'\nimport type { NeutralCitation } from '@/types/citation'\nimport type { TransformationMap } from '@/types/span'\n\n/**\n * Extracts neutral citation metadata from a tokenized citation.\n *\n * Parses token text to extract:\n * - Year: 4-digit year (e.g., \"2020\")\n * - Court: Vendor identifier (e.g., \"WL\", \"U.S. LEXIS\")\n * - Document number: Unique document identifier (e.g., \"123456\")\n *\n * Confidence scoring:\n * - 1.0 (neutral format is unambiguous and standardized)\n *\n * @param token - Token from tokenizer containing matched text and clean positions\n * @param transformationMap - Position mapping from clean → original text\n * @returns NeutralCitation with parsed metadata and translated positions\n *\n * @example\n * ```typescript\n * const token = {\n * text: \"2020 WL 123456\",\n * span: { cleanStart: 10, cleanEnd: 24 },\n * type: \"neutral\",\n * patternId: \"westlaw-neutral\"\n * }\n * const citation = extractNeutral(token, transformationMap)\n * // citation = {\n * // type: \"neutral\",\n * // year: 2020,\n * // court: \"WL\",\n * // documentNumber: \"123456\",\n * // confidence: 1.0,\n * // ...\n * // }\n * ```\n */\nexport function extractNeutral(\n\ttoken: Token,\n\ttransformationMap: TransformationMap,\n): NeutralCitation {\n\tconst { text, span } = token\n\n\t// Parse year-court-documentNumber using regex\n\t// Pattern: 4-digit year + court identifier (WL, LEXIS, etc.) + document number\n\tconst neutralRegex = /^(\\d{4})\\s+(WL|LEXIS|U\\.S\\.\\s+LEXIS)\\s+(\\d+)/\n\tconst match = neutralRegex.exec(text)\n\n\tif (!match) {\n\t\tthrow new Error(`Failed to parse neutral citation: ${text}`)\n\t}\n\n\tconst year = Number.parseInt(match[1], 10)\n\tconst court = match[2]\n\tconst documentNumber = match[3]\n\n\t// Translate positions from clean → original\n\tconst originalStart =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanStart) ?? span.cleanStart\n\tconst originalEnd =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanEnd) ?? span.cleanEnd\n\n\t// Confidence: 1.0 (neutral format is unambiguous)\n\tconst confidence = 1.0\n\n\treturn {\n\t\ttype: 'neutral',\n\t\ttext,\n\t\tspan: {\n\t\t\tcleanStart: span.cleanStart,\n\t\t\tcleanEnd: span.cleanEnd,\n\t\t\toriginalStart,\n\t\t\toriginalEnd,\n\t\t},\n\t\tconfidence,\n\t\tmatchedText: text,\n\t\tprocessTimeMs: 0,\n\t\tpatternsChecked: 1,\n\t\tyear,\n\t\tcourt,\n\t\tdocumentNumber,\n\t}\n}\n","/**\n * Public Law Citation Extraction\n *\n * Parses tokenized public law citations to extract congress number and law number.\n * Examples: \"Pub. L. No. 116-283\", \"Pub. L. 117-58\"\n *\n * @module extract/extractPublicLaw\n */\n\nimport type { Token } from '@/tokenize'\nimport type { PublicLawCitation } from '@/types/citation'\nimport type { TransformationMap } from '@/types/span'\n\n/**\n * Extracts public law citation metadata from a tokenized citation.\n *\n * Parses token text to extract:\n * - Congress: Congress number (e.g., \"116\" from \"Pub. L. No. 116-283\")\n * - Law number: Law number within that Congress (e.g., \"283\")\n *\n * Confidence scoring:\n * - 0.9 (public law format is fairly standard)\n *\n * Note: Bill title extraction from nearby text is not implemented in Phase 2.\n * That requires context analysis in Phase 3.\n *\n * @param token - Token from tokenizer containing matched text and clean positions\n * @param transformationMap - Position mapping from clean → original text\n * @returns PublicLawCitation with parsed metadata and translated positions\n *\n * @example\n * ```typescript\n * const token = {\n * text: \"Pub. L. No. 116-283\",\n * span: { cleanStart: 10, cleanEnd: 29 },\n * type: \"publicLaw\",\n * patternId: \"public-law\"\n * }\n * const citation = extractPublicLaw(token, transformationMap)\n * // citation = {\n * // type: \"publicLaw\",\n * // congress: 116,\n * // lawNumber: 283,\n * // confidence: 0.9,\n * // ...\n * // }\n * ```\n */\nexport function extractPublicLaw(\n\ttoken: Token,\n\ttransformationMap: TransformationMap,\n): PublicLawCitation {\n\tconst { text, span } = token\n\n\t// Parse congress-lawNumber using regex\n\t// Pattern: \"Pub. L.\" (with optional \"No.\") + congress number + \"-\" + law number\n\tconst publicLawRegex = /Pub\\.\\s?L\\.(?:\\s?No\\.)?\\s?(\\d+)-(\\d+)/\n\tconst match = publicLawRegex.exec(text)\n\n\tif (!match) {\n\t\tthrow new Error(`Failed to parse public law citation: ${text}`)\n\t}\n\n\tconst congress = Number.parseInt(match[1], 10)\n\tconst lawNumber = Number.parseInt(match[2], 10)\n\n\t// Translate positions from clean → original\n\tconst originalStart =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanStart) ?? span.cleanStart\n\tconst originalEnd =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanEnd) ?? span.cleanEnd\n\n\t// Confidence: 0.9 (public law format is fairly standard)\n\tconst confidence = 0.9\n\n\treturn {\n\t\ttype: 'publicLaw',\n\t\ttext,\n\t\tspan: {\n\t\t\tcleanStart: span.cleanStart,\n\t\t\tcleanEnd: span.cleanEnd,\n\t\t\toriginalStart,\n\t\t\toriginalEnd,\n\t\t},\n\t\tconfidence,\n\t\tmatchedText: text,\n\t\tprocessTimeMs: 0,\n\t\tpatternsChecked: 1,\n\t\tcongress,\n\t\tlawNumber,\n\t}\n}\n","/**\n * Federal Register Citation Extraction\n *\n * Parses tokenized Federal Register citations to extract volume, page, and\n * optional year. Examples: \"85 Fed. Reg. 12345\", \"86 Fed. Reg. 56789 (Jan. 15, 2021)\"\n *\n * @module extract/extractFederalRegister\n */\n\nimport type { Token } from '@/tokenize'\nimport type { FederalRegisterCitation } from '@/types/citation'\nimport type { TransformationMap } from '@/types/span'\n\n/**\n * Extracts Federal Register citation metadata from a tokenized citation.\n *\n * Parses token text to extract:\n * - Volume: Federal Register volume number (e.g., \"85\")\n * - Page: Page number (e.g., \"12345\")\n * - Year: Optional publication year in parentheses (e.g., \"(2021)\")\n *\n * Confidence scoring:\n * - 0.9 (Federal Register format is standardized)\n *\n * @param token - Token from tokenizer containing matched text and clean positions\n * @param transformationMap - Position mapping from clean → original text\n * @returns FederalRegisterCitation with parsed metadata and translated positions\n *\n * @example\n * ```typescript\n * const token = {\n * text: \"85 Fed. Reg. 12345\",\n * span: { cleanStart: 10, cleanEnd: 28 },\n * type: \"federalRegister\",\n * patternId: \"federal-register\"\n * }\n * const citation = extractFederalRegister(token, transformationMap)\n * // citation = {\n * // type: \"federalRegister\",\n * // volume: 85,\n * // page: 12345,\n * // confidence: 0.9,\n * // ...\n * // }\n * ```\n */\nexport function extractFederalRegister(\n\ttoken: Token,\n\ttransformationMap: TransformationMap,\n): FederalRegisterCitation {\n\tconst { text, span } = token\n\n\t// Parse volume-page using regex\n\t// Pattern: volume (digits) + \"Fed. Reg.\" + page (digits)\n\tconst federalRegisterRegex = /^(\\d+)\\s+Fed\\.\\s?Reg\\.\\s+(\\d+)/\n\tconst match = federalRegisterRegex.exec(text)\n\n\tif (!match) {\n\t\tthrow new Error(`Failed to parse Federal Register citation: ${text}`)\n\t}\n\n\tconst volume = Number.parseInt(match[1], 10)\n\tconst page = Number.parseInt(match[2], 10)\n\n\t// Extract optional year in parentheses\n\t// Pattern: \"(year)\" or \"(month day, year)\"\n\tconst yearRegex = /\\((?:.*?\\s)?(\\d{4})\\)/\n\tconst yearMatch = yearRegex.exec(text)\n\tconst year = yearMatch ? Number.parseInt(yearMatch[1], 10) : undefined\n\n\t// Translate positions from clean → original\n\tconst originalStart =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanStart) ?? span.cleanStart\n\tconst originalEnd =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanEnd) ?? span.cleanEnd\n\n\t// Confidence: 0.9 (Federal Register format is standardized)\n\tconst confidence = 0.9\n\n\treturn {\n\t\ttype: 'federalRegister',\n\t\ttext,\n\t\tspan: {\n\t\t\tcleanStart: span.cleanStart,\n\t\t\tcleanEnd: span.cleanEnd,\n\t\t\toriginalStart,\n\t\t\toriginalEnd,\n\t\t},\n\t\tconfidence,\n\t\tmatchedText: text,\n\t\tprocessTimeMs: 0,\n\t\tpatternsChecked: 1,\n\t\tvolume,\n\t\tpage,\n\t\tyear,\n\t}\n}\n","/**\n * Short-form Citation Extraction\n *\n * Parses tokenized short-form citations (Id., supra, short-form case) to extract\n * metadata. Short-form citations refer to earlier citations in the document.\n *\n * @module extract/extractShortForms\n */\n\nimport type { Token } from '@/tokenize'\nimport type { IdCitation, SupraCitation, ShortFormCaseCitation } from '@/types/citation'\nimport type { TransformationMap } from '@/types/span'\n\n/**\n * Extracts Id. citation metadata from a tokenized citation.\n *\n * Parses token text to extract:\n * - Pincite: Optional page reference (e.g., \"253\" from \"Id. at 253\")\n *\n * Confidence scoring:\n * - 1.0 (Id. format is unambiguous and standardized)\n *\n * @param token - Token from tokenizer containing matched text and clean positions\n * @param transformationMap - Position mapping from clean → original text\n * @returns IdCitation with parsed metadata and translated positions\n *\n * @example\n * ```typescript\n * const token = {\n * text: \"Id. at 253\",\n * span: { cleanStart: 10, cleanEnd: 20 },\n * type: \"case\",\n * patternId: \"id\"\n * }\n * const citation = extractId(token, transformationMap)\n * // citation = {\n * // type: \"id\",\n * // pincite: 253,\n * // confidence: 1.0,\n * // ...\n * // }\n * ```\n */\nexport function extractId(\n\ttoken: Token,\n\ttransformationMap: TransformationMap,\n): IdCitation {\n\tconst { text, span } = token\n\n\t// Parse Id. with optional pincite\n\t// Pattern: Id. or Ibid. with optional \"at [page]\"\n\tconst idRegex = /[Ii](?:d|bid)\\.(?:\\s+at\\s+(\\d+))?/\n\tconst match = idRegex.exec(text)\n\n\tif (!match) {\n\t\tthrow new Error(`Failed to parse Id. citation: ${text}`)\n\t}\n\n\t// Extract pincite if present\n\tconst pincite = match[1] ? Number.parseInt(match[1], 10) : undefined\n\n\t// Translate positions from clean → original\n\tconst originalStart =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanStart) ?? span.cleanStart\n\tconst originalEnd =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanEnd) ?? span.cleanEnd\n\n\t// Confidence: 1.0 (Id. format is unambiguous)\n\tconst confidence = 1.0\n\n\treturn {\n\t\ttype: 'id',\n\t\ttext,\n\t\tspan: {\n\t\t\tcleanStart: span.cleanStart,\n\t\t\tcleanEnd: span.cleanEnd,\n\t\t\toriginalStart,\n\t\t\toriginalEnd,\n\t\t},\n\t\tconfidence,\n\t\tmatchedText: text,\n\t\tprocessTimeMs: 0,\n\t\tpatternsChecked: 1,\n\t\tpincite,\n\t}\n}\n\n/**\n * Extracts supra citation metadata from a tokenized citation.\n *\n * Parses token text to extract:\n * - Party name: Name preceding \"supra\" (e.g., \"Smith\" from \"Smith, supra\")\n * - Pincite: Optional page reference (e.g., \"460\" from \"Smith, supra, at 460\")\n *\n * Confidence scoring:\n * - 0.9 (supra format is fairly standard but party name extraction can vary)\n *\n * @param token - Token from tokenizer containing matched text and clean positions\n * @param transformationMap - Position mapping from clean → original text\n * @returns SupraCitation with parsed metadata and translated positions\n *\n * @example\n * ```typescript\n * const token = {\n * text: \"Smith, supra, at 460\",\n * span: { cleanStart: 10, cleanEnd: 30 },\n * type: \"case\",\n * patternId: \"supra\"\n * }\n * const citation = extractSupra(token, transformationMap)\n * // citation = {\n * // type: \"supra\",\n * // partyName: \"Smith\",\n * // pincite: 460,\n * // confidence: 0.9,\n * // ...\n * // }\n * ```\n */\nexport function extractSupra(\n\ttoken: Token,\n\ttransformationMap: TransformationMap,\n): SupraCitation {\n\tconst { text, span } = token\n\n\t// Parse party name and optional pincite\n\t// Pattern: word(s), supra [, at page]\n\t// Note: Matches party names including \"v.\" (e.g., \"Smith v. Jones\")\n\tconst supraRegex = /\\b([A-Z][a-zA-Z]+(?:(?:\\s+v\\.?\\s+|\\s+)[A-Z][a-zA-Z]+)*),?\\s+supra(?:,?\\s+at\\s+(\\d+))?/\n\tconst match = supraRegex.exec(text)\n\n\tif (!match) {\n\t\tthrow new Error(`Failed to parse supra citation: ${text}`)\n\t}\n\n\tconst partyName = match[1]\n\tconst pincite = match[2] ? Number.parseInt(match[2], 10) : undefined\n\n\t// Translate positions from clean → original\n\tconst originalStart =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanStart) ?? span.cleanStart\n\tconst originalEnd =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanEnd) ?? span.cleanEnd\n\n\t// Confidence: 0.9 (supra format is fairly standard)\n\tconst confidence = 0.9\n\n\treturn {\n\t\ttype: 'supra',\n\t\ttext,\n\t\tspan: {\n\t\t\tcleanStart: span.cleanStart,\n\t\t\tcleanEnd: span.cleanEnd,\n\t\t\toriginalStart,\n\t\t\toriginalEnd,\n\t\t},\n\t\tconfidence,\n\t\tmatchedText: text,\n\t\tprocessTimeMs: 0,\n\t\tpatternsChecked: 1,\n\t\tpartyName,\n\t\tpincite,\n\t}\n}\n\n/**\n * Extracts short-form case citation metadata from a tokenized citation.\n *\n * Parses token text to extract:\n * - Volume: Volume number\n * - Reporter: Reporter abbreviation\n * - Pincite: Page reference (from \"at [page]\" pattern)\n *\n * Confidence scoring:\n * - 0.7 (short-form case citations are more ambiguous than full citations)\n *\n * @param token - Token from tokenizer containing matched text and clean positions\n * @param transformationMap - Position mapping from clean → original text\n * @returns ShortFormCaseCitation with parsed metadata and translated positions\n *\n * @example\n * ```typescript\n * const token = {\n * text: \"500 F.2d at 125\",\n * span: { cleanStart: 10, cleanEnd: 25 },\n * type: \"case\",\n * patternId: \"short-form-case\"\n * }\n * const citation = extractShortFormCase(token, transformationMap)\n * // citation = {\n * // type: \"shortFormCase\",\n * // volume: 500,\n * // reporter: \"F.2d\",\n * // pincite: 125,\n * // confidence: 0.7,\n * // ...\n * // }\n * ```\n */\nexport function extractShortFormCase(\n\ttoken: Token,\n\ttransformationMap: TransformationMap,\n): ShortFormCaseCitation {\n\tconst { text, span } = token\n\n\t// Parse volume-reporter-at-page\n\t// Pattern: number space abbreviation space \"at\" space number\n\tconst shortFormRegex = /(\\d+)\\s+([A-Z][A-Za-z.\\s]+?(?:\\d[a-z])?)\\s+at\\s+(\\d+)/\n\tconst match = shortFormRegex.exec(text)\n\n\tif (!match) {\n\t\tthrow new Error(`Failed to parse short-form case citation: ${text}`)\n\t}\n\n\tconst volume = Number.parseInt(match[1], 10)\n\tconst reporter = match[2].trim() // Remove trailing spaces\n\tconst pincite = Number.parseInt(match[3], 10)\n\n\t// Translate positions from clean → original\n\tconst originalStart =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanStart) ?? span.cleanStart\n\tconst originalEnd =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanEnd) ?? span.cleanEnd\n\n\t// Confidence: 0.7 (short-form citations are more ambiguous)\n\tconst confidence = 0.7\n\n\treturn {\n\t\ttype: 'shortFormCase',\n\t\ttext,\n\t\tspan: {\n\t\t\tcleanStart: span.cleanStart,\n\t\t\tcleanEnd: span.cleanEnd,\n\t\t\toriginalStart,\n\t\t\toriginalEnd,\n\t\t},\n\t\tconfidence,\n\t\tmatchedText: text,\n\t\tprocessTimeMs: 0,\n\t\tpatternsChecked: 1,\n\t\tvolume,\n\t\treporter,\n\t\tpincite,\n\t}\n}\n","/**\n * Scope Boundary Detection\n *\n * Detects paragraph/section boundaries in text and validates whether\n * an antecedent citation is within the resolution scope.\n */\n\nimport type { Citation } from '../types/citation'\nimport type { ScopeStrategy } from './types'\n\n/**\n * Detects paragraph boundaries from text and assigns each citation to a paragraph.\n *\n * @param text - Original document text\n * @param citations - Extracted citations with position spans\n * @param boundaryPattern - Regex pattern to detect boundaries (default: /\\n\\n+/)\n * @returns Map of citation index to paragraph number (0-based)\n */\nexport function detectParagraphBoundaries(\n text: string,\n citations: Citation[],\n boundaryPattern: RegExp = /\\n\\n+/g\n): Map<number, number> {\n const paragraphMap = new Map<number, number>()\n\n // Find all paragraph boundaries (positions in text)\n const boundaries: number[] = [0] // Start of document is first boundary\n let match: RegExpExecArray | null\n\n while ((match = boundaryPattern.exec(text)) !== null) {\n // Boundary is at end of match (start of next paragraph)\n boundaries.push(match.index + match[0].length)\n }\n\n boundaries.push(text.length) // End of document\n\n // Assign each citation to a paragraph\n for (let i = 0; i < citations.length; i++) {\n const citation = citations[i]\n const citationStart = citation.span.originalStart\n\n // Find which paragraph this citation belongs to\n let paragraphNum = 0\n for (let j = 0; j < boundaries.length - 1; j++) {\n if (citationStart >= boundaries[j] && citationStart < boundaries[j + 1]) {\n paragraphNum = j\n break\n }\n }\n\n paragraphMap.set(i, paragraphNum)\n }\n\n return paragraphMap\n}\n\n/**\n * Checks if an antecedent citation is within resolution scope.\n *\n * @param antecedentIndex - Index of the antecedent citation\n * @param currentIndex - Index of current citation being resolved\n * @param paragraphMap - Map of citation index to paragraph number\n * @param strategy - Scope boundary strategy\n * @returns true if antecedent is within scope, false otherwise\n */\nexport function isWithinBoundary(\n antecedentIndex: number,\n currentIndex: number,\n paragraphMap: Map<number, number>,\n strategy: ScopeStrategy\n): boolean {\n if (strategy === 'none') {\n // No boundary restriction - can resolve across entire document\n return true\n }\n\n // Get paragraph numbers for both citations\n const antecedentParagraph = paragraphMap.get(antecedentIndex)\n const currentParagraph = paragraphMap.get(currentIndex)\n\n // If either is undefined, default to allowing resolution\n if (antecedentParagraph === undefined || currentParagraph === undefined) {\n return true\n }\n\n // For paragraph/section/footnote strategies, citations must be in same boundary\n // (In this MVP, section and footnote behave same as paragraph - future enhancement)\n return antecedentParagraph === currentParagraph\n}\n","/**\n * Levenshtein Distance\n *\n * Calculates edit distance between strings for fuzzy party name matching\n * in supra citation resolution.\n *\n * Uses dynamic programming for O(m*n) time complexity.\n */\n\n/**\n * Calculates Levenshtein distance (edit distance) between two strings.\n *\n * The edit distance is the minimum number of single-character edits\n * (insertions, deletions, substitutions) needed to change one string into the other.\n *\n * @param a - First string\n * @param b - Second string\n * @returns Number of edits required (0 = identical)\n */\nexport function levenshteinDistance(a: string, b: string): number {\n // Handle empty strings\n if (a.length === 0) return b.length\n if (b.length === 0) return a.length\n\n // Create 2D array for dynamic programming\n // dp[i][j] = edit distance between a[0...i-1] and b[0...j-1]\n const dp: number[][] = Array.from({ length: a.length + 1 }, () =>\n Array(b.length + 1).fill(0)\n )\n\n // Initialize base cases\n for (let i = 0; i <= a.length; i++) {\n dp[i][0] = i // Distance from a[0...i-1] to empty string\n }\n for (let j = 0; j <= b.length; j++) {\n dp[0][j] = j // Distance from empty string to b[0...j-1]\n }\n\n // Fill the DP table\n for (let i = 1; i <= a.length; i++) {\n for (let j = 1; j <= b.length; j++) {\n if (a[i - 1] === b[j - 1]) {\n // Characters match - no edit needed\n dp[i][j] = dp[i - 1][j - 1]\n } else {\n // Characters differ - take minimum of:\n // 1. Insert: dp[i][j-1] + 1\n // 2. Delete: dp[i-1][j] + 1\n // 3. Substitute: dp[i-1][j-1] + 1\n dp[i][j] = 1 + Math.min(\n dp[i - 1][j], // Delete from a\n dp[i][j - 1], // Insert into a\n dp[i - 1][j - 1] // Substitute\n )\n }\n }\n }\n\n return dp[a.length][b.length]\n}\n\n/**\n * Calculates normalized Levenshtein similarity (0-1 scale).\n *\n * Returns similarity score where:\n * - 1.0 = identical strings\n * - 0.0 = completely different\n *\n * Comparison is case-insensitive.\n *\n * @param a - First string\n * @param b - Second string\n * @returns Similarity score from 0 to 1\n */\nexport function normalizedLevenshteinDistance(a: string, b: string): number {\n // Normalize to lowercase for case-insensitive comparison\n const lowerA = a.toLowerCase()\n const lowerB = b.toLowerCase()\n\n // Calculate raw edit distance\n const distance = levenshteinDistance(lowerA, lowerB)\n\n // Normalize by max length\n const maxLength = Math.max(lowerA.length, lowerB.length)\n if (maxLength === 0) return 1.0 // Both empty strings\n\n // Convert distance to similarity: 1 - (distance / maxLength)\n return 1 - distance / maxLength\n}\n","/**\n * Document-Scoped Citation Resolver\n *\n * Resolves short-form citations (Id./supra/short-form case) to their full antecedent citations\n * by maintaining resolution context and enforcing scope boundaries.\n *\n * Resolution rules:\n * - Id. resolves to immediately preceding full citation (within scope)\n * - Supra resolves to full citation with matching party name (within scope)\n * - Short-form case resolves to full case with matching volume/reporter (within scope)\n */\n\nimport type {\n Citation,\n FullCaseCitation,\n IdCitation,\n SupraCitation,\n ShortFormCaseCitation,\n} from '../types/citation'\nimport { isFullCitation } from '../types/guards'\nimport type {\n ResolutionOptions,\n ResolutionResult,\n ResolvedCitation,\n ResolutionContext,\n} from './types'\nimport { detectParagraphBoundaries, isWithinBoundary } from './scopeBoundary'\nimport { normalizedLevenshteinDistance } from './levenshtein'\n\n/**\n * Document-scoped resolver that processes citations sequentially\n * and resolves short-form citations to their antecedents.\n */\nexport class DocumentResolver {\n private readonly citations: Citation[]\n private readonly text: string\n private readonly options: Required<ResolutionOptions>\n private readonly context: ResolutionContext\n\n /**\n * Creates a new DocumentResolver.\n *\n * @param citations - All citations in document (in order of appearance)\n * @param text - Original document text\n * @param options - Resolution options\n */\n constructor(\n citations: Citation[],\n text: string,\n options: ResolutionOptions = {}\n ) {\n this.citations = citations\n this.text = text\n\n // Apply defaults to options\n this.options = {\n scopeStrategy: options.scopeStrategy ?? 'paragraph',\n autoDetectParagraphs: options.autoDetectParagraphs ?? true,\n paragraphBoundaryPattern: options.paragraphBoundaryPattern ?? /\\n\\n+/g,\n fuzzyPartyMatching: options.fuzzyPartyMatching ?? true,\n partyMatchThreshold: options.partyMatchThreshold ?? 0.8,\n allowNestedResolution: options.allowNestedResolution ?? false,\n reportUnresolved: options.reportUnresolved ?? true,\n }\n\n // Initialize resolution context\n this.context = {\n citationIndex: 0,\n allCitations: citations,\n lastFullCitation: undefined,\n fullCitationHistory: new Map(),\n paragraphMap: new Map(),\n }\n\n // Detect paragraph boundaries if enabled\n if (this.options.autoDetectParagraphs) {\n this.context.paragraphMap = detectParagraphBoundaries(\n text,\n citations,\n this.options.paragraphBoundaryPattern\n )\n }\n }\n\n /**\n * Resolves all citations in the document.\n *\n * @returns Array of citations with resolution metadata\n */\n resolve(): ResolvedCitation[] {\n const resolved: ResolvedCitation[] = []\n\n for (let i = 0; i < this.citations.length; i++) {\n this.context.citationIndex = i\n const citation = this.citations[i]\n\n // Resolve based on citation type\n let resolution: ResolutionResult | undefined\n\n switch (citation.type) {\n case 'id':\n resolution = this.resolveId(citation)\n break\n case 'supra':\n resolution = this.resolveSupra(citation)\n break\n case 'shortFormCase':\n resolution = this.resolveShortFormCase(citation)\n break\n default:\n // Full citation - update context for future resolutions\n if (isFullCitation(citation)) {\n this.context.lastFullCitation = i\n this.trackFullCitation(citation, i)\n }\n break\n }\n\n // Add citation with resolution metadata\n // Type assertion is safe: runtime logic only sets resolution on short-form citations\n resolved.push({\n ...citation,\n resolution,\n } as ResolvedCitation)\n }\n\n return resolved\n }\n\n /**\n * Resolves Id. citation to immediately preceding full case citation.\n */\n private resolveId(citation: IdCitation): ResolutionResult | undefined {\n const currentIndex = this.context.citationIndex\n\n // Find most recent full case citation (Id. only resolves to case citations, not statutes/journals)\n let antecedentIndex: number | undefined\n for (let i = currentIndex - 1; i >= 0; i--) {\n const candidate = this.citations[i]\n if (candidate.type === 'case') {\n antecedentIndex = i\n break\n }\n }\n\n // Check if we have a previous case citation\n if (antecedentIndex === undefined) {\n return this.createFailureResult('No preceding full case citation found')\n }\n\n // Check scope boundary\n if (!this.isWithinScope(antecedentIndex, currentIndex)) {\n return this.createFailureResult('Antecedent citation outside scope boundary')\n }\n\n return {\n resolvedTo: antecedentIndex,\n confidence: 1.0, // Id. resolution is unambiguous when successful\n }\n }\n\n /**\n * Resolves supra citation by matching party name.\n */\n private resolveSupra(citation: SupraCitation): ResolutionResult | undefined {\n const currentIndex = this.context.citationIndex\n const targetPartyName = this.normalizePartyName(citation.partyName)\n\n // Search full citation history for matching party name\n let bestMatch: { index: number; similarity: number } | undefined\n\n for (const [partyName, citationIndex] of this.context.fullCitationHistory) {\n // Check scope boundary\n if (!this.isWithinScope(citationIndex, currentIndex)) {\n continue\n }\n\n // Calculate similarity\n const similarity = normalizedLevenshteinDistance(targetPartyName, partyName)\n\n // Update best match if this is better\n if (!bestMatch || similarity > bestMatch.similarity) {\n bestMatch = { index: citationIndex, similarity }\n }\n }\n\n // Check if we found a match above threshold\n if (!bestMatch) {\n return this.createFailureResult('No full citation found in scope')\n }\n\n if (bestMatch.similarity < this.options.partyMatchThreshold) {\n return this.createFailureResult(\n `Party name similarity ${bestMatch.similarity.toFixed(2)} below threshold ${this.options.partyMatchThreshold}`\n )\n }\n\n // Return successful resolution with confidence based on similarity\n const warnings: string[] = []\n if (bestMatch.similarity < 1.0) {\n warnings.push(`Fuzzy match: similarity ${bestMatch.similarity.toFixed(2)}`)\n }\n\n return {\n resolvedTo: bestMatch.index,\n confidence: bestMatch.similarity,\n warnings: warnings.length > 0 ? warnings : undefined,\n }\n }\n\n /**\n * Resolves short-form case citation by matching volume/reporter.\n */\n private resolveShortFormCase(citation: ShortFormCaseCitation): ResolutionResult | undefined {\n const currentIndex = this.context.citationIndex\n\n // Search backwards for matching full case citation\n for (let i = currentIndex - 1; i >= 0; i--) {\n const candidate = this.citations[i]\n\n // Only match against full case citations\n if (candidate.type !== 'case') {\n continue\n }\n\n // Check if volume and reporter match\n if (\n candidate.volume === citation.volume &&\n this.normalizeReporter(candidate.reporter) === this.normalizeReporter(citation.reporter)\n ) {\n // Check scope boundary\n if (!this.isWithinScope(i, currentIndex)) {\n return this.createFailureResult('Matching citation outside scope boundary')\n }\n\n // Found a match\n return {\n resolvedTo: i,\n confidence: 0.95, // High confidence but not perfect (multiple cases could have same volume/reporter)\n }\n }\n }\n\n return this.createFailureResult('No matching full case citation found')\n }\n\n /**\n * Tracks a full citation in the resolution history.\n * Extracts party name for supra resolution.\n */\n private trackFullCitation(citation: Citation, index: number): void {\n // Only case citations have party names for supra resolution\n if (citation.type === 'case') {\n const partyName = this.extractPartyName(citation)\n if (partyName) {\n const normalized = this.normalizePartyName(partyName)\n this.context.fullCitationHistory.set(normalized, index)\n }\n }\n }\n\n /**\n * Extracts party name from full case citation text.\n * Handles \"Party v. Party\" format by looking at text before citation span.\n */\n private extractPartyName(citation: FullCaseCitation): string | undefined {\n // Look at text before citation span to find party names\n // Case citations typically appear as: \"Smith v. Jones, 100 F.2d 10\"\n // But tokenizer only captures \"100 F.2d 10\" - we need to look backwards in text\n\n const citationStart = citation.span.originalStart\n // Look backwards up to 100 characters for party name\n const lookbackStart = Math.max(0, citationStart - 100)\n const beforeText = this.text.substring(lookbackStart, citationStart)\n\n // Match pattern: \"FirstParty v. SecondParty, \" before the citation\n // Capture the first party name (handles single-letter party names like \"A\" or \"B\")\n const vMatch = beforeText.match(/([A-Z][a-zA-Z]*(?:\\s+[A-Z][a-zA-Z]*)*)\\s+v\\.?\\s+[A-Z][a-zA-Z]*(?:\\s+[A-Z][a-zA-Z]*)*,\\s*$/)\n if (vMatch) {\n return vMatch[1].trim()\n }\n\n // Fallback: try to find any capitalized word(s) before comma\n const beforeComma = beforeText.match(/([A-Z][a-zA-Z]*(?:\\s+[A-Z][a-zA-Z]*)*),\\s*$/)\n return beforeComma?.[1].trim()\n }\n\n /**\n * Normalizes party name for matching.\n */\n private normalizePartyName(name: string): string {\n return name\n .toLowerCase()\n .replace(/\\s+/g, ' ') // Normalize whitespace\n .trim()\n }\n\n /**\n * Normalizes reporter abbreviation for matching.\n */\n private normalizeReporter(reporter: string): string {\n return reporter\n .toLowerCase()\n .replace(/\\s+/g, '') // Remove spaces (F.2d vs F. 2d)\n .replace(/\\./g, '') // Remove periods\n }\n\n /**\n * Checks if antecedent citation is within scope boundary.\n */\n private isWithinScope(antecedentIndex: number, currentIndex: number): boolean {\n return isWithinBoundary(\n antecedentIndex,\n currentIndex,\n this.context.paragraphMap,\n this.options.scopeStrategy\n )\n }\n\n /**\n * Creates a failure result for unresolved citations.\n */\n private createFailureResult(reason: string): ResolutionResult | undefined {\n if (this.options.reportUnresolved) {\n return {\n resolvedTo: undefined,\n failureReason: reason,\n confidence: 0.0,\n }\n }\n return undefined\n }\n}\n","/**\n * Citation Resolution\n *\n * Resolves short-form citations (Id./supra/short-form case) to their full antecedents.\n *\n * @example\n * ```ts\n * import { resolveCitations } from 'eyecite-ts/resolve'\n * import { extractCitations } from 'eyecite-ts'\n *\n * const text = 'See Smith v. Jones, 500 F.2d 100 (1974). Id. at 105.'\n * const citations = extractCitations(text)\n * const resolved = resolveCitations(citations, text)\n *\n * // resolved[1] is Id. citation with resolution.resolvedTo = 0\n * console.log(resolved[1].resolution?.resolvedTo) // 0 (points to Smith v. Jones)\n * ```\n */\n\nimport type { Citation } from '../types/citation'\nimport type { ResolutionOptions, ResolvedCitation } from './types'\nimport { DocumentResolver } from './DocumentResolver'\n\n/**\n * Resolves short-form citations to their full antecedents.\n *\n * Convenience wrapper around DocumentResolver that handles common use cases.\n *\n * @param citations - Extracted citations in order of appearance\n * @param text - Original document text\n * @param options - Resolution options\n * @returns Citations with resolution metadata\n */\nexport function resolveCitations(\n citations: Citation[],\n text: string,\n options?: ResolutionOptions\n): ResolvedCitation[] {\n const resolver = new DocumentResolver(citations, text, options)\n return resolver.resolve()\n}\n\n// Re-export core types and classes\nexport { DocumentResolver } from './DocumentResolver'\nexport type {\n ResolutionOptions,\n ResolutionResult,\n ResolvedCitation,\n ScopeStrategy,\n} from './types'\n","/**\n * Main Citation Extraction Pipeline\n *\n * Orchestrates the complete citation extraction flow:\n * 1. Clean text (remove HTML, normalize Unicode)\n * 2. Tokenize (apply patterns to find candidates)\n * 3. Extract (parse metadata from tokens)\n *\n * This is the primary public API for citation extraction.\n *\n * @module extract/extractCitations\n */\n\nimport { cleanText } from '@/clean'\nimport { tokenize } from '@/tokenize'\nimport {\n\textractCase,\n\textractStatute,\n\textractJournal,\n\textractNeutral,\n\textractPublicLaw,\n\textractFederalRegister,\n} from '@/extract'\nimport { extractId, extractSupra, extractShortFormCase } from './extractShortForms'\nimport {\n\tcasePatterns,\n\tstatutePatterns,\n\tjournalPatterns,\n\tneutralPatterns,\n\tshortFormPatterns,\n} from '@/patterns'\nimport { resolveCitations } from '../resolve'\nimport type { Citation } from '@/types/citation'\nimport type { Pattern } from '@/patterns'\nimport type { ResolutionOptions, ResolvedCitation } from '../resolve/types'\n\n/**\n * Options for customizing citation extraction behavior.\n */\nexport interface ExtractOptions {\n\t/**\n\t * Custom text cleaners (overrides defaults).\n\t *\n\t * If provided, these cleaners replace the default pipeline:\n\t * [stripHtmlTags, normalizeWhitespace, normalizeUnicode, fixSmartQuotes]\n\t *\n\t * @example\n\t * ```typescript\n\t * // Use only HTML stripping, skip Unicode normalization\n\t * const citations = extractCitations(text, {\n\t * cleaners: [stripHtmlTags]\n\t * })\n\t * ```\n\t */\n\tcleaners?: Array<(text: string) => string>\n\n\t/**\n\t * Custom regex patterns (overrides defaults).\n\t *\n\t * If provided, these patterns replace the default pattern set:\n\t * [casePatterns, statutePatterns, journalPatterns, neutralPatterns, shortFormPatterns]\n\t *\n\t * @example\n\t * ```typescript\n\t * // Extract only case citations\n\t * const citations = extractCitations(text, {\n\t * patterns: casePatterns\n\t * })\n\t * ```\n\t */\n\tpatterns?: Pattern[]\n\n\t/**\n\t * Resolve short-form citations to their full antecedents (default: false).\n\t *\n\t * If true, returns ResolvedCitation[] with resolution metadata for short-form citations\n\t * (Id., supra, short-form case). Full citations are unchanged.\n\t *\n\t * @example\n\t * ```typescript\n\t * const text = \"Smith v. Jones, 500 F.2d 100 (1974). Id. at 105.\"\n\t * const citations = extractCitations(text, { resolve: true })\n\t * // citations[1].resolution.resolvedTo === 0 (points to Smith v. Jones)\n\t * ```\n\t */\n\tresolve?: boolean\n\n\t/**\n\t * Options for citation resolution (only used if resolve: true).\n\t *\n\t * @example\n\t * ```typescript\n\t * const citations = extractCitations(text, {\n\t * resolve: true,\n\t * resolutionOptions: {\n\t * scopeStrategy: 'paragraph',\n\t * fuzzyPartyMatching: true\n\t * }\n\t * })\n\t * ```\n\t */\n\tresolutionOptions?: ResolutionOptions\n}\n\n/**\n * Extracts legal citations from text using the full parsing pipeline.\n *\n * Pipeline flow:\n * 1. **Clean:** Remove HTML tags, normalize Unicode, fix smart quotes\n * 2. **Tokenize:** Apply regex patterns to find citation candidates\n * 3. **Extract:** Parse metadata (volume, reporter, page, etc.)\n * 4. **Translate:** Map positions from cleaned text back to original text\n *\n * This function is synchronous because all stages (cleaning, tokenization,\n * extraction) are synchronous. For async operations (e.g., future reporters-db\n * lookups), use extractCitationsAsync().\n *\n * Position tracking:\n * - TransformationMap is built during cleaning\n * - Tokens contain positions in cleaned text (cleanStart/cleanEnd)\n * - Extraction translates cleaned positions → original positions\n * - Final citations have originalStart/originalEnd pointing to input text\n *\n * Warnings from cleaning layer are attached to all extracted citations.\n *\n * @param text - Raw text to extract citations from (may contain HTML, Unicode)\n * @param options - Optional customization (cleaners, patterns)\n * @returns Array of citations with parsed metadata and accurate positions\n *\n * @example\n * ```typescript\n * const text = \"See Smith v. Doe, 500 F.2d 123 (9th Cir. 2020)\"\n * const citations = extractCitations(text)\n * // citations[0] = {\n * // type: \"case\",\n * // volume: 500,\n * // reporter: \"F.2d\",\n * // page: 123,\n * // court: \"9th Cir.\",\n * // year: 2020,\n * // span: { originalStart: 18, originalEnd: 30, ... }\n * // }\n * ```\n *\n * @example\n * ```typescript\n * // Extract from HTML\n * const html = \"<p>In <b>Smith</b>, 500 F.2d 123, the court held...</p>\"\n * const citations = extractCitations(html)\n * // HTML is stripped, positions point to original HTML\n * ```\n *\n * @example\n * ```typescript\n * // Extract multiple citation types\n * const text = \"See 42 U.S.C. § 1983; Smith, 500 F.2d 123; 123 Harv. L. Rev. 456\"\n * const citations = extractCitations(text)\n * // citations[0].type === \"statute\"\n * // citations[1].type === \"case\"\n * // citations[2].type === \"journal\"\n * ```\n */\nexport function extractCitations(text: string, options: ExtractOptions & { resolve: true }): ResolvedCitation[]\nexport function extractCitations(text: string, options?: ExtractOptions): Citation[]\nexport function extractCitations(\n\ttext: string,\n\toptions?: ExtractOptions,\n): Citation[] | ResolvedCitation[] {\n\tconst startTime = performance.now()\n\n\t// Step 1: Clean text\n\tconst { cleaned, transformationMap, warnings } = cleanText(\n\t\ttext,\n\t\toptions?.cleaners,\n\t)\n\n\t// Step 2: Tokenize (synchronous)\n\t// Note: Pattern order matters for deduplication - more specific patterns first\n\tconst allPatterns = options?.patterns || [\n\t\t...neutralPatterns, // Most specific (year-based format)\n\t\t...shortFormPatterns, // Short-form (requires \" at \" keyword)\n\t\t...casePatterns, // Case citations (reporter-specific)\n\t\t...statutePatterns, // Statutes (code-specific)\n\t\t...journalPatterns, // Least specific (broad pattern)\n\t]\n\tconst tokens = tokenize(cleaned, allPatterns)\n\n\t// Step 3: Deduplicate overlapping tokens\n\t// Multiple patterns may match the same text (e.g., \"500 F.2d 123\" matches both federal-reporter and state-reporter)\n\t// Keep only the most specific match for each position\n\tconst deduplicatedTokens: typeof tokens = []\n\tconst seenPositions = new Set<string>()\n\n\tfor (const token of tokens) {\n\t\tconst posKey = `${token.span.cleanStart}-${token.span.cleanEnd}`\n\t\tif (!seenPositions.has(posKey)) {\n\t\t\tseenPositions.add(posKey)\n\t\t\tdeduplicatedTokens.push(token)\n\t\t}\n\t}\n\n\t// Step 4: Extract citations from deduplicated tokens\n\tconst citations: Citation[] = []\n\tfor (const token of deduplicatedTokens) {\n\t\tlet citation: Citation\n\n\t\tswitch (token.type) {\n\t\t\tcase 'case':\n\t\t\t\t// Check pattern ID to distinguish short-form from full citations\n\t\t\t\tif (token.patternId === 'id' || token.patternId === 'ibid') {\n\t\t\t\t\tcitation = extractId(token, transformationMap)\n\t\t\t\t} else if (token.patternId === 'supra') {\n\t\t\t\t\tcitation = extractSupra(token, transformationMap)\n\t\t\t\t} else if (token.patternId === 'shortFormCase') {\n\t\t\t\t\tcitation = extractShortFormCase(token, transformationMap)\n\t\t\t\t} else {\n\t\t\t\t\tcitation = extractCase(token, transformationMap)\n\t\t\t\t}\n\t\t\t\tbreak\n\t\t\tcase 'statute':\n\t\t\t\tcitation = extractStatute(token, transformationMap)\n\t\t\t\tbreak\n\t\t\tcase 'journal':\n\t\t\t\tcitation = extractJournal(token, transformationMap)\n\t\t\t\tbreak\n\t\t\tcase 'neutral':\n\t\t\t\tcitation = extractNeutral(token, transformationMap)\n\t\t\t\tbreak\n\t\t\tcase 'publicLaw':\n\t\t\t\tcitation = extractPublicLaw(token, transformationMap)\n\t\t\t\tbreak\n\t\t\tcase 'federalRegister':\n\t\t\t\tcitation = extractFederalRegister(token, transformationMap)\n\t\t\t\tbreak\n\t\t\tdefault:\n\t\t\t\t// Unknown type - skip\n\t\t\t\tcontinue\n\t\t}\n\n\t\t// Attach cleaning warnings to citation if any\n\t\tif (warnings.length > 0) {\n\t\t\tcitation.warnings = [...(citation.warnings || []), ...warnings]\n\t\t}\n\n\t\t// Update processing time\n\t\tcitation.processTimeMs = performance.now() - startTime\n\n\t\tcitations.push(citation)\n\t}\n\n\t// Step 5: Resolve short-form citations if requested\n\tif (options?.resolve) {\n\t\treturn resolveCitations(citations, text, options.resolutionOptions)\n\t}\n\n\treturn citations\n}\n\n/**\n * Asynchronous version of extractCitations().\n *\n * Currently wraps the synchronous extractCitations() function. This API\n * exists for future extensibility when async operations are added:\n * - Async reporters-db lookups (Phase 3)\n * - Async resolution/annotation services\n * - Web Workers for parallel processing\n *\n * For now, this function immediately resolves with the same results as\n * the synchronous version.\n *\n * @param text - Raw text to extract citations from\n * @param options - Optional customization (cleaners, patterns, resolve)\n * @returns Promise resolving to array of citations (or ResolvedCitation[] if resolve: true)\n *\n * @example\n * ```typescript\n * const citations = await extractCitationsAsync(text, { resolve: true })\n * // Returns ResolvedCitation[] with resolution metadata\n * ```\n */\nexport async function extractCitationsAsync(text: string, options: ExtractOptions & { resolve: true }): Promise<ResolvedCitation[]>\nexport async function extractCitationsAsync(text: string, options?: ExtractOptions): Promise<Citation[]>\nexport async function extractCitationsAsync(\n\ttext: string,\n\toptions?: ExtractOptions,\n): Promise<Citation[] | ResolvedCitation[]> {\n\t// Async wrapper for future extensibility (e.g., async reporters-db lookup)\n\t// For MVP, wraps synchronous extractCitations\n\treturn extractCitations(text, options)\n}\n"],"mappings":"mEAKA,SAAgB,EAAe,EAA8C,CAC3E,OAAO,EAAS,OAAS,QACpB,EAAS,OAAS,WAClB,EAAS,OAAS,WAClB,EAAS,OAAS,WAClB,EAAS,OAAS,aAClB,EAAS,OAAS,kBAMzB,SAAgB,EAAoB,EAAmD,CACrF,OAAO,EAAS,OAAS,MACpB,EAAS,OAAS,SAClB,EAAS,OAAS,gBAMzB,SAAgB,EAAe,EAAkD,CAC/E,OAAO,EAAS,OAAS,OAO3B,SAAgB,EACd,EACA,EAC+B,CAC/B,OAAO,EAAS,OAAS,EAmB3B,SAAgB,EAAkB,EAAiB,CACjD,MAAU,MAAM,qBAAqB,IAAI,CC5C3C,SAAgB,EAAc,EAAsB,CACnD,OAAO,EAAK,QAAQ,WAAY,GAAG,CAUpC,SAAgB,EAAoB,EAAsB,CACzD,OAAO,EAAK,QAAQ,aAAc,IAAI,CAAC,QAAQ,SAAU,IAAI,CAU9D,SAAgB,EAAiB,EAAsB,CACtD,OAAO,EAAK,UAAU,OAAO,CAU9B,SAAgB,EAAe,EAAsB,CACpD,OAAO,EACL,QAAQ,kBAAmB,IAAI,CAC/B,QAAQ,kBAAmB,IAAI,CCXlC,SAAgB,EACf,EACA,EAA4C,CAC3C,EACA,EACA,EACA,EACA,CACiB,CAElB,IAAI,EAAc,EACd,EAAkB,IAAI,IACtB,EAAkB,IAAI,IAG1B,IAAK,IAAI,EAAI,EAAG,GAAK,EAAS,OAAQ,IACrC,EAAgB,IAAI,EAAG,EAAE,CACzB,EAAgB,IAAI,EAAG,EAAE,CAI1B,IAAK,IAAM,KAAW,EAAU,CAC/B,IAAM,EAAa,EACb,EAAY,EAAQ,EAAY,CAEtC,GAAI,IAAe,EAAW,CAE7B,GAAM,CAAE,qBAAoB,sBAAuB,EAClD,EACA,EACA,EACA,EACA,CAED,EAAkB,EAClB,EAAkB,EAClB,EAAc,GAShB,MAAO,CACN,QAAS,EACT,kBAP4C,CAC5C,kBACA,kBACA,CAKA,SAAU,EAAE,CACZ,CAeF,SAAS,EACR,EACA,EACA,EACA,EAIC,CACD,IAAM,EAAqB,IAAI,IACzB,EAAqB,IAAI,IAE3B,EAAY,EACZ,EAAW,EAGf,KAAO,GAAa,EAAW,QAAU,GAAY,EAAU,QAAQ,CAEtE,GAAI,GAAa,EAAW,QAAU,GAAY,EAAU,OAAQ,CACnE,IAAM,EAAc,EAAmB,IAAI,EAAU,EAAI,EACzD,EAAmB,IAAI,EAAU,EAAY,CAC7C,EAAmB,IAAI,EAAa,EAAS,CAC7C,MAID,GAAI,GAAa,EAAW,OAAQ,CACnC,IAAM,EAAc,EAAmB,IAAI,EAAU,EAAI,EACzD,EAAmB,IAAI,EAAU,EAAY,CAC7C,IACA,SAID,GAAI,GAAY,EAAU,OAAQ,CACjC,IAAM,EAAc,EAAmB,IAAI,EAAU,EAAI,EACzD,EAAmB,IAAI,EAAa,EAAS,CAC7C,IACA,SAID,GAAI,EAAW,KAAe,EAAU,GAAW,CAClD,IAAM,EAAc,EAAmB,IAAI,EAAU,EAAI,EACzD,EAAmB,IAAI,EAAU,EAAY,CAC7C,EAAmB,IAAI,EAAa,EAAS,CAC7C,IACA,QACM,CAGN,IAAI,EAAa,GAIjB,IAAK,IAAI,EAAY,EAAG,GAAa,IAChC,IAAY,GAAa,EAAW,QADU,IAGlD,GAAI,EAAW,EAAY,KAAe,EAAU,GAAW,CAE9D,IAAK,IAAI,EAAI,EAAG,EAAI,EAAW,IAAK,CACnC,IAAM,EACL,EAAmB,IAAI,EAAY,EAAE,EAAI,EAAY,EACtD,EAAmB,IAAI,EAAa,EAAS,CAE9C,GAAa,EACb,EAAa,GACb,MAIF,GAAI,EAAY,SAGhB,IAAK,IAAI,EAAY,EAAG,GAAa,IAChC,IAAW,GAAa,EAAU,QADY,IAGlD,GAAI,EAAW,KAAe,EAAU,EAAW,GAAY,CAE9D,IAAM,EAAc,EAAmB,IAAI,EAAU,EAAI,EACzD,IAAK,IAAI,EAAI,EAAG,EAAI,EAAW,IAC9B,EAAmB,IAAI,EAAW,EAAG,EAAY,CAElD,GAAY,EACZ,EAAa,GACb,MAIF,GAAI,EAAY,SAGhB,IAAM,EAAc,EAAmB,IAAI,EAAU,EAAI,EACzD,EAAmB,IAAI,EAAU,EAAY,CAC7C,EAAmB,IAAI,EAAa,EAAS,CAC7C,IACA,KAIF,MAAO,CAAE,qBAAoB,qBAAoB,CCpLlD,MAAa,EAA0B,CACrC,CACE,GAAI,mBACJ,MAAO,0FACP,YAAa,mDACb,KAAM,OACP,CACD,CACE,GAAI,gBACJ,MAAO,gEACP,YAAa,+BACb,KAAM,OACP,CACD,CACE,GAAI,iBACJ,MAAO,0DACP,YAAa,6EACb,KAAM,OACP,CACF,CC3BY,EAA6B,CACxC,CACE,GAAI,MACJ,MAAO,uCACP,YAAa,iDACb,KAAM,UACP,CACD,CACE,GAAI,aACJ,MAAO,yDACP,YAAa,sEACb,KAAM,UACP,CACF,CCbY,EAA6B,CACxC,CACE,GAAI,aACJ,MAAO,4CACP,YAAa,iGACb,KAAM,UACP,CACF,CCPY,EAA6B,CACxC,CACE,GAAI,UACJ,MAAO,4BACP,YAAa,6CACb,KAAM,UACP,CACD,CACE,GAAI,QACJ,MAAO,wCACP,YAAa,sDACb,KAAM,UACP,CACD,CACE,GAAI,aACJ,MAAO,sCACP,YAAa,oDACb,KAAM,YACP,CACD,CACE,GAAI,mBACJ,MAAO,qCACP,YAAa,0DACb,KAAM,kBACP,CACF,CCIY,EAA+B,CAC1C,CACE,GAAI,KACJ,MAhC8B,+BAiC9B,YAAa,8CACb,KAAM,OACP,CACD,CACE,GAAI,OACJ,MAnCgC,iCAoChC,YAAa,oDACb,KAAM,OACP,CACD,CACE,GAAI,QACJ,MAjCiC,yFAkCjC,YAAa,mEACb,KAAM,OACP,CACD,CACE,GAAI,gBACJ,MAhC2C,6DAiC3C,YAAa,sDACb,KAAM,OACP,CACF,CCcD,SAAgB,EACd,EACA,EAAsB,CACpB,GAAG,EACH,GAAG,EACH,GAAG,EACH,GAAG,EACH,GAAG,EACJ,CACQ,CACT,IAAM,EAAkB,EAAE,CAE1B,IAAK,IAAM,KAAW,EACpB,GAAI,CAEF,IAAM,EAAU,EAAY,SAAS,EAAQ,MAAM,CAEnD,IAAK,IAAM,KAAS,EAElB,EAAO,KAAK,CACV,KAAM,EAAM,GACZ,KAAM,CACJ,WAAY,EAAM,MAClB,SAAU,EAAM,MAAS,EAAM,GAAG,OACnC,CACD,KAAM,EAAQ,KACd,UAAW,EAAQ,GACpB,CAAC,OAEG,EAAO,CAEd,QAAQ,KACN,WAAW,EAAQ,GAAG,yBACtB,aAAiB,MAAQ,EAAM,QAAU,OAAO,EAAM,CACvD,CACD,SAOJ,OAFA,EAAO,MAAM,EAAG,IAAM,EAAE,KAAK,WAAa,EAAE,KAAK,WAAW,CAErD,ECtDT,SAAgB,EACf,EACA,EACmB,CACnB,GAAM,CAAE,OAAM,QAAS,EAMjB,EAD0B,qCACM,KAAK,EAAK,CAEhD,GAAI,CAAC,EAEJ,MAAU,MAAM,kCAAkC,IAAO,CAG1D,IAAM,EAAS,OAAO,SAAS,EAAM,GAAI,GAAG,CACtC,EAAW,EAAM,GAAG,MAAM,CAC1B,EAAO,OAAO,SAAS,EAAM,GAAI,GAAG,CAKpC,EADe,YACa,KAAK,EAAK,CACtC,EAAU,EAAe,OAAO,SAAS,EAAa,GAAI,GAAG,CAAG,IAAA,GAKhE,EADY,0BACU,KAAK,EAAK,CAChC,EAAO,EAAY,OAAO,SAAS,EAAU,GAAI,GAAG,CAAG,IAAA,GAKvD,EADa,2BACW,KAAK,EAAK,CAClC,EAAQ,EAAa,EAAW,GAAG,MAAM,CAAG,IAAA,GAG5C,EACL,EAAkB,gBAAgB,IAAI,EAAK,WAAW,EAAI,EAAK,WAC1D,EACL,EAAkB,gBAAgB,IAAI,EAAK,SAAS,EAAI,EAAK,SAG1D,EAAa,GA+CjB,MA5CwB,4JA2BvB,CAEmB,KAAM,GAAM,EAAS,SAAS,EAAE,CAAC,GACpD,GAAc,IAIX,IAAS,IAAA,IAER,GADgB,IAAI,MAAM,CAAC,aAAa,GAE3C,GAAc,IAKhB,EAAa,KAAK,IAAI,EAAY,EAAI,CAE/B,CACN,KAAM,OACN,OACA,KAAM,CACL,WAAY,EAAK,WACjB,SAAU,EAAK,SACf,gBACA,cACA,CACD,aACA,YAAa,EACb,cAAe,EACf,gBAAiB,EACjB,SACA,WACA,OACA,UACA,QACA,OACA,CCtIF,SAAgB,EACf,EACA,EACkB,CAClB,GAAM,CAAE,OAAM,QAAS,EAKjB,EADe,0DACM,KAAK,EAAK,CAErC,GAAI,CAAC,EACJ,MAAU,MAAM,qCAAqC,IAAO,CAG7D,IAAM,EAAQ,EAAM,GAAK,OAAO,SAAS,EAAM,GAAI,GAAG,CAAG,IAAA,GACnD,EAAO,EAAM,GAAG,MAAM,CACtB,EAAU,EAAM,GAGhB,EACL,EAAkB,gBAAgB,IAAI,EAAK,WAAW,EAAI,EAAK,WAC1D,EACL,EAAkB,gBAAgB,IAAI,EAAK,SAAS,EAAI,EAAK,SAG1D,EAAa,GAkBjB,MAfmB,CAClB,SACA,SACA,iBACA,kBACA,0BACA,8BACA,CAEc,KAAM,GAAM,EAAK,SAAS,EAAE,CAAC,GAC3C,GAAc,IAGf,EAAa,KAAK,IAAI,EAAY,EAAI,CAE/B,CACN,KAAM,UACN,OACA,KAAM,CACL,WAAY,EAAK,WACjB,SAAU,EAAK,SACf,gBACA,cACA,CACD,aACA,YAAa,EACb,cAAe,EACf,gBAAiB,EACjB,QACA,OACA,UACA,CC1DF,SAAgB,EACf,EACA,EACkB,CAClB,GAAM,CAAE,OAAM,QAAS,EAKjB,EADe,mCACM,KAAK,EAAK,CAErC,GAAI,CAAC,EACJ,MAAU,MAAM,qCAAqC,IAAO,CAG7D,IAAM,EAAS,OAAO,SAAS,EAAM,GAAI,GAAG,CACtC,EAAU,EAAM,GAAG,MAAM,CACzB,EAAO,OAAO,SAAS,EAAM,GAAI,GAAG,CAIpC,EADe,YACa,KAAK,EAAK,CACtC,EAAU,EAAe,OAAO,SAAS,EAAa,GAAI,GAAG,CAAG,IAAA,GAGhE,EACL,EAAkB,gBAAgB,IAAI,EAAK,WAAW,EAAI,EAAK,WAC1D,EACL,EAAkB,gBAAgB,IAAI,EAAK,SAAS,EAAI,EAAK,SAK9D,MAAO,CACN,KAAM,UACN,OACA,KAAM,CACL,WAAY,EAAK,WACjB,SAAU,EAAK,SACf,gBACA,cACA,CACD,WAXkB,GAYlB,YAAa,EACb,cAAe,EACf,gBAAiB,EACjB,SACA,UACA,aAAc,EACd,OACA,UACA,CCvDF,SAAgB,EACf,EACA,EACkB,CAClB,GAAM,CAAE,OAAM,QAAS,EAKjB,EADe,+CACM,KAAK,EAAK,CAErC,GAAI,CAAC,EACJ,MAAU,MAAM,qCAAqC,IAAO,CAG7D,IAAM,EAAO,OAAO,SAAS,EAAM,GAAI,GAAG,CACpC,EAAQ,EAAM,GACd,EAAiB,EAAM,GAGvB,EACL,EAAkB,gBAAgB,IAAI,EAAK,WAAW,EAAI,EAAK,WAC1D,EACL,EAAkB,gBAAgB,IAAI,EAAK,SAAS,EAAI,EAAK,SAK9D,MAAO,CACN,KAAM,UACN,OACA,KAAM,CACL,WAAY,EAAK,WACjB,SAAU,EAAK,SACf,gBACA,cACA,CACD,WAXkB,EAYlB,YAAa,EACb,cAAe,EACf,gBAAiB,EACjB,OACA,QACA,iBACA,CC3CF,SAAgB,EACf,EACA,EACoB,CACpB,GAAM,CAAE,OAAM,QAAS,EAKjB,EADiB,wCACM,KAAK,EAAK,CAEvC,GAAI,CAAC,EACJ,MAAU,MAAM,wCAAwC,IAAO,CAGhE,IAAM,EAAW,OAAO,SAAS,EAAM,GAAI,GAAG,CACxC,EAAY,OAAO,SAAS,EAAM,GAAI,GAAG,CAGzC,EACL,EAAkB,gBAAgB,IAAI,EAAK,WAAW,EAAI,EAAK,WAC1D,EACL,EAAkB,gBAAgB,IAAI,EAAK,SAAS,EAAI,EAAK,SAK9D,MAAO,CACN,KAAM,YACN,OACA,KAAM,CACL,WAAY,EAAK,WACjB,SAAU,EAAK,SACf,gBACA,cACA,CACD,WAXkB,GAYlB,YAAa,EACb,cAAe,EACf,gBAAiB,EACjB,WACA,YACA,CC5CF,SAAgB,EACf,EACA,EAC0B,CAC1B,GAAM,CAAE,OAAM,QAAS,EAKjB,EADuB,iCACM,KAAK,EAAK,CAE7C,GAAI,CAAC,EACJ,MAAU,MAAM,8CAA8C,IAAO,CAGtE,IAAM,EAAS,OAAO,SAAS,EAAM,GAAI,GAAG,CACtC,EAAO,OAAO,SAAS,EAAM,GAAI,GAAG,CAKpC,EADY,wBACU,KAAK,EAAK,CAChC,EAAO,EAAY,OAAO,SAAS,EAAU,GAAI,GAAG,CAAG,IAAA,GAGvD,EACL,EAAkB,gBAAgB,IAAI,EAAK,WAAW,EAAI,EAAK,WAC1D,EACL,EAAkB,gBAAgB,IAAI,EAAK,SAAS,EAAI,EAAK,SAK9D,MAAO,CACN,KAAM,kBACN,OACA,KAAM,CACL,WAAY,EAAK,WACjB,SAAU,EAAK,SACf,gBACA,cACA,CACD,WAXkB,GAYlB,YAAa,EACb,cAAe,EACf,gBAAiB,EACjB,SACA,OACA,OACA,CCpDF,SAAgB,EACf,EACA,EACa,CACb,GAAM,CAAE,OAAM,QAAS,EAKjB,EADU,oCACM,KAAK,EAAK,CAEhC,GAAI,CAAC,EACJ,MAAU,MAAM,iCAAiC,IAAO,CAIzD,IAAM,EAAU,EAAM,GAAK,OAAO,SAAS,EAAM,GAAI,GAAG,CAAG,IAAA,GAGrD,EACL,EAAkB,gBAAgB,IAAI,EAAK,WAAW,EAAI,EAAK,WAC1D,EACL,EAAkB,gBAAgB,IAAI,EAAK,SAAS,EAAI,EAAK,SAK9D,MAAO,CACN,KAAM,KACN,OACA,KAAM,CACL,WAAY,EAAK,WACjB,SAAU,EAAK,SACf,gBACA,cACA,CACD,WAXkB,EAYlB,YAAa,EACb,cAAe,EACf,gBAAiB,EACjB,UACA,CAmCF,SAAgB,EACf,EACA,EACgB,CAChB,GAAM,CAAE,OAAM,QAAS,EAMjB,EADa,wFACM,KAAK,EAAK,CAEnC,GAAI,CAAC,EACJ,MAAU,MAAM,mCAAmC,IAAO,CAG3D,IAAM,EAAY,EAAM,GAClB,EAAU,EAAM,GAAK,OAAO,SAAS,EAAM,GAAI,GAAG,CAAG,IAAA,GAGrD,EACL,EAAkB,gBAAgB,IAAI,EAAK,WAAW,EAAI,EAAK,WAC1D,EACL,EAAkB,gBAAgB,IAAI,EAAK,SAAS,EAAI,EAAK,SAK9D,MAAO,CACN,KAAM,QACN,OACA,KAAM,CACL,WAAY,EAAK,WACjB,SAAU,EAAK,SACf,gBACA,cACA,CACD,WAXkB,GAYlB,YAAa,EACb,cAAe,EACf,gBAAiB,EACjB,YACA,UACA,CAqCF,SAAgB,EACf,EACA,EACwB,CACxB,GAAM,CAAE,OAAM,QAAS,EAKjB,EADiB,wDACM,KAAK,EAAK,CAEvC,GAAI,CAAC,EACJ,MAAU,MAAM,6CAA6C,IAAO,CAGrE,IAAM,EAAS,OAAO,SAAS,EAAM,GAAI,GAAG,CACtC,EAAW,EAAM,GAAG,MAAM,CAC1B,EAAU,OAAO,SAAS,EAAM,GAAI,GAAG,CAGvC,EACL,EAAkB,gBAAgB,IAAI,EAAK,WAAW,EAAI,EAAK,WAC1D,EACL,EAAkB,gBAAgB,IAAI,EAAK,SAAS,EAAI,EAAK,SAK9D,MAAO,CACN,KAAM,gBACN,OACA,KAAM,CACL,WAAY,EAAK,WACjB,SAAU,EAAK,SACf,gBACA,cACA,CACD,WAXkB,GAYlB,YAAa,EACb,cAAe,EACf,gBAAiB,EACjB,SACA,WACA,UACA,CCjOF,SAAgB,EACd,EACA,EACA,EAA0B,SACL,CACrB,IAAM,EAAe,IAAI,IAGnB,EAAuB,CAAC,EAAE,CAC5B,EAEJ,MAAQ,EAAQ,EAAgB,KAAK,EAAK,IAAM,MAE9C,EAAW,KAAK,EAAM,MAAQ,EAAM,GAAG,OAAO,CAGhD,EAAW,KAAK,EAAK,OAAO,CAG5B,IAAK,IAAI,EAAI,EAAG,EAAI,EAAU,OAAQ,IAAK,CAEzC,IAAM,EADW,EAAU,GACI,KAAK,cAGhC,EAAe,EACnB,IAAK,IAAI,EAAI,EAAG,EAAI,EAAW,OAAS,EAAG,IACzC,GAAI,GAAiB,EAAW,IAAM,EAAgB,EAAW,EAAI,GAAI,CACvE,EAAe,EACf,MAIJ,EAAa,IAAI,EAAG,EAAa,CAGnC,OAAO,EAYT,SAAgB,EACd,EACA,EACA,EACA,EACS,CACT,GAAI,IAAa,OAEf,MAAO,GAIT,IAAM,EAAsB,EAAa,IAAI,EAAgB,CACvD,EAAmB,EAAa,IAAI,EAAa,CASvD,OANI,IAAwB,IAAA,IAAa,IAAqB,IAAA,GACrD,GAKF,IAAwB,ECpEjC,SAAgB,EAAoB,EAAW,EAAmB,CAEhE,GAAI,EAAE,SAAW,EAAG,OAAO,EAAE,OAC7B,GAAI,EAAE,SAAW,EAAG,OAAO,EAAE,OAI7B,IAAM,EAAiB,MAAM,KAAK,CAAE,OAAQ,EAAE,OAAS,EAAG,KACxD,MAAM,EAAE,OAAS,EAAE,CAAC,KAAK,EAAE,CAC5B,CAGD,IAAK,IAAI,EAAI,EAAG,GAAK,EAAE,OAAQ,IAC7B,EAAG,GAAG,GAAK,EAEb,IAAK,IAAI,EAAI,EAAG,GAAK,EAAE,OAAQ,IAC7B,EAAG,GAAG,GAAK,EAIb,IAAK,IAAI,EAAI,EAAG,GAAK,EAAE,OAAQ,IAC7B,IAAK,IAAI,EAAI,EAAG,GAAK,EAAE,OAAQ,IACzB,EAAE,EAAI,KAAO,EAAE,EAAI,GAErB,EAAG,GAAG,GAAK,EAAG,EAAI,GAAG,EAAI,GAMzB,EAAG,GAAG,GAAK,EAAI,KAAK,IAClB,EAAG,EAAI,GAAG,GACV,EAAG,GAAG,EAAI,GACV,EAAG,EAAI,GAAG,EAAI,GACf,CAKP,OAAO,EAAG,EAAE,QAAQ,EAAE,QAgBxB,SAAgB,EAA8B,EAAW,EAAmB,CAE1E,IAAM,EAAS,EAAE,aAAa,CACxB,EAAS,EAAE,aAAa,CAGxB,EAAW,EAAoB,EAAQ,EAAO,CAG9C,EAAY,KAAK,IAAI,EAAO,OAAQ,EAAO,OAAO,CAIxD,OAHI,IAAc,EAAU,EAGrB,EAAI,EAAW,ECtDxB,IAAa,EAAb,KAA8B,CAa5B,YACE,EACA,EACA,EAA6B,EAAE,CAC/B,CACA,KAAK,UAAY,EACjB,KAAK,KAAO,EAGZ,KAAK,QAAU,CACb,cAAe,EAAQ,eAAiB,YACxC,qBAAsB,EAAQ,sBAAwB,GACtD,yBAA0B,EAAQ,0BAA4B,SAC9D,mBAAoB,EAAQ,oBAAsB,GAClD,oBAAqB,EAAQ,qBAAuB,GACpD,sBAAuB,EAAQ,uBAAyB,GACxD,iBAAkB,EAAQ,kBAAoB,GAC/C,CAGD,KAAK,QAAU,CACb,cAAe,EACf,aAAc,EACd,iBAAkB,IAAA,GAClB,oBAAqB,IAAI,IACzB,aAAc,IAAI,IACnB,CAGG,KAAK,QAAQ,uBACf,KAAK,QAAQ,aAAe,EAC1B,EACA,EACA,KAAK,QAAQ,yBACd,EASL,SAA8B,CAC5B,IAAM,EAA+B,EAAE,CAEvC,IAAK,IAAI,EAAI,EAAG,EAAI,KAAK,UAAU,OAAQ,IAAK,CAC9C,KAAK,QAAQ,cAAgB,EAC7B,IAAM,EAAW,KAAK,UAAU,GAG5B,EAEJ,OAAQ,EAAS,KAAjB,CACE,IAAK,KACH,EAAa,KAAK,UAAU,EAAS,CACrC,MACF,IAAK,QACH,EAAa,KAAK,aAAa,EAAS,CACxC,MACF,IAAK,gBACH,EAAa,KAAK,qBAAqB,EAAS,CAChD,MACF,QAEM,EAAe,EAAS,GAC1B,KAAK,QAAQ,iBAAmB,EAChC,KAAK,kBAAkB,EAAU,EAAE,EAErC,MAKJ,EAAS,KAAK,CACZ,GAAG,EACH,aACD,CAAqB,CAGxB,OAAO,EAMT,UAAkB,EAAoD,CACpE,IAAM,EAAe,KAAK,QAAQ,cAG9B,EACJ,IAAK,IAAI,EAAI,EAAe,EAAG,GAAK,EAAG,IAErC,GADkB,KAAK,UAAU,GACnB,OAAS,OAAQ,CAC7B,EAAkB,EAClB,MAcJ,OATI,IAAoB,IAAA,GACf,KAAK,oBAAoB,wCAAwC,CAIrE,KAAK,cAAc,EAAiB,EAAa,CAI/C,CACL,WAAY,EACZ,WAAY,EACb,CANQ,KAAK,oBAAoB,6CAA6C,CAYjF,aAAqB,EAAuD,CAC1E,IAAM,EAAe,KAAK,QAAQ,cAC5B,EAAkB,KAAK,mBAAmB,EAAS,UAAU,CAG/D,EAEJ,IAAK,GAAM,CAAC,EAAW,KAAkB,KAAK,QAAQ,oBAAqB,CAEzE,GAAI,CAAC,KAAK,cAAc,EAAe,EAAa,CAClD,SAIF,IAAM,EAAa,EAA8B,EAAiB,EAAU,EAGxE,CAAC,GAAa,EAAa,EAAU,cACvC,EAAY,CAAE,MAAO,EAAe,aAAY,EAKpD,GAAI,CAAC,EACH,OAAO,KAAK,oBAAoB,kCAAkC,CAGpE,GAAI,EAAU,WAAa,KAAK,QAAQ,oBACtC,OAAO,KAAK,oBACV,yBAAyB,EAAU,WAAW,QAAQ,EAAE,CAAC,mBAAmB,KAAK,QAAQ,sBAC1F,CAIH,IAAM,EAAqB,EAAE,CAK7B,OAJI,EAAU,WAAa,GACzB,EAAS,KAAK,2BAA2B,EAAU,WAAW,QAAQ,EAAE,GAAG,CAGtE,CACL,WAAY,EAAU,MACtB,WAAY,EAAU,WACtB,SAAU,EAAS,OAAS,EAAI,EAAW,IAAA,GAC5C,CAMH,qBAA6B,EAA+D,CAC1F,IAAM,EAAe,KAAK,QAAQ,cAGlC,IAAK,IAAI,EAAI,EAAe,EAAG,GAAK,EAAG,IAAK,CAC1C,IAAM,EAAY,KAAK,UAAU,GAG7B,KAAU,OAAS,QAMrB,EAAU,SAAW,EAAS,QAC9B,KAAK,kBAAkB,EAAU,SAAS,GAAK,KAAK,kBAAkB,EAAS,SAAS,CAQxF,OALK,KAAK,cAAc,EAAG,EAAa,CAKjC,CACL,WAAY,EACZ,WAAY,IACb,CAPQ,KAAK,oBAAoB,2CAA2C,CAWjF,OAAO,KAAK,oBAAoB,uCAAuC,CAOzE,kBAA0B,EAAoB,EAAqB,CAEjE,GAAI,EAAS,OAAS,OAAQ,CAC5B,IAAM,EAAY,KAAK,iBAAiB,EAAS,CACjD,GAAI,EAAW,CACb,IAAM,EAAa,KAAK,mBAAmB,EAAU,CACrD,KAAK,QAAQ,oBAAoB,IAAI,EAAY,EAAM,GAS7D,iBAAyB,EAAgD,CAKvE,IAAM,EAAgB,EAAS,KAAK,cAE9B,EAAgB,KAAK,IAAI,EAAG,EAAgB,IAAI,CAChD,EAAa,KAAK,KAAK,UAAU,EAAe,EAAc,CAI9D,EAAS,EAAW,MAAM,4FAA4F,CAO5H,OANI,EACK,EAAO,GAAG,MAAM,CAIL,EAAW,MAAM,8CAA8C,GAC9D,GAAG,MAAM,CAMhC,mBAA2B,EAAsB,CAC/C,OAAO,EACJ,aAAa,CACb,QAAQ,OAAQ,IAAI,CACpB,MAAM,CAMX,kBAA0B,EAA0B,CAClD,OAAO,EACJ,aAAa,CACb,QAAQ,OAAQ,GAAG,CACnB,QAAQ,MAAO,GAAG,CAMvB,cAAsB,EAAyB,EAA+B,CAC5E,OAAO,EACL,EACA,EACA,KAAK,QAAQ,aACb,KAAK,QAAQ,cACd,CAMH,oBAA4B,EAA8C,CACxE,GAAI,KAAK,QAAQ,iBACf,MAAO,CACL,WAAY,IAAA,GACZ,cAAe,EACf,WAAY,EACb,GCvSP,SAAgB,EACd,EACA,EACA,EACoB,CAEpB,OADiB,IAAI,EAAiB,EAAW,EAAM,EAAQ,CAC/C,SAAS,CC6H3B,SAAgB,EACf,EACA,EACkC,CAClC,IAAM,EAAY,YAAY,KAAK,CAG7B,CAAE,UAAS,oBAAmB,YAAa,EAChD,EACA,GAAS,SACT,CAWK,EAAS,EAAS,EAPJ,GAAS,UAAY,CACxC,GAAG,EACH,GAAG,EACH,GAAG,EACH,GAAG,EACH,GAAG,EACH,CAC4C,CAKvC,EAAoC,EAAE,CACtC,EAAgB,IAAI,IAE1B,IAAK,IAAM,KAAS,EAAQ,CAC3B,IAAM,EAAS,GAAG,EAAM,KAAK,WAAW,GAAG,EAAM,KAAK,WACjD,EAAc,IAAI,EAAO,GAC7B,EAAc,IAAI,EAAO,CACzB,EAAmB,KAAK,EAAM,EAKhC,IAAM,EAAwB,EAAE,CAChC,IAAK,IAAM,KAAS,EAAoB,CACvC,IAAI,EAEJ,OAAQ,EAAM,KAAd,CACC,IAAK,OAEJ,AAOC,EAPG,EAAM,YAAc,MAAQ,EAAM,YAAc,OACxC,EAAU,EAAO,EAAkB,CACpC,EAAM,YAAc,QACnB,EAAa,EAAO,EAAkB,CACvC,EAAM,YAAc,gBACnB,EAAqB,EAAO,EAAkB,CAE9C,EAAY,EAAO,EAAkB,CAEjD,MACD,IAAK,UACJ,EAAW,EAAe,EAAO,EAAkB,CACnD,MACD,IAAK,UACJ,EAAW,EAAe,EAAO,EAAkB,CACnD,MACD,IAAK,UACJ,EAAW,EAAe,EAAO,EAAkB,CACnD,MACD,IAAK,YACJ,EAAW,EAAiB,EAAO,EAAkB,CACrD,MACD,IAAK,kBACJ,EAAW,EAAuB,EAAO,EAAkB,CAC3D,MACD,QAEC,SAIE,EAAS,OAAS,IACrB,EAAS,SAAW,CAAC,GAAI,EAAS,UAAY,EAAE,CAAG,GAAG,EAAS,EAIhE,EAAS,cAAgB,YAAY,KAAK,CAAG,EAE7C,EAAU,KAAK,EAAS,CAQzB,OAJI,GAAS,QACL,EAAiB,EAAW,EAAM,EAAQ,kBAAkB,CAG7D,EA2BR,eAAsB,EACrB,EACA,EAC2C,CAG3C,OAAO,EAAiB,EAAM,EAAQ"}
|
|
1
|
+
{"version":3,"file":"index.cjs","names":[],"sources":["../src/types/guards.ts","../src/clean/cleaners.ts","../src/clean/cleanText.ts","../src/patterns/casePatterns.ts","../src/patterns/statutePatterns.ts","../src/patterns/journalPatterns.ts","../src/patterns/neutralPatterns.ts","../src/patterns/shortForm.ts","../src/tokenize/tokenizer.ts","../src/extract/dates.ts","../src/extract/extractCase.ts","../src/extract/extractStatute.ts","../src/extract/extractJournal.ts","../src/extract/extractNeutral.ts","../src/extract/extractPublicLaw.ts","../src/extract/extractFederalRegister.ts","../src/extract/extractStatutesAtLarge.ts","../src/extract/extractShortForms.ts","../src/resolve/scopeBoundary.ts","../src/resolve/levenshtein.ts","../src/resolve/DocumentResolver.ts","../src/resolve/index.ts","../src/extract/detectParallel.ts","../src/extract/extractCitations.ts"],"sourcesContent":["import type { Citation, CitationType, CitationOfType, FullCitation, ShortFormCitation, FullCaseCitation } from \"./citation\"\n\n/**\n * Type guard: narrows Citation to a full citation (case, statute, journal, neutral, publicLaw, federalRegister).\n */\nexport function isFullCitation(citation: Citation): citation is FullCitation {\n return citation.type === 'case'\n || citation.type === 'statute'\n || citation.type === 'journal'\n || citation.type === 'neutral'\n || citation.type === 'publicLaw'\n || citation.type === 'federalRegister'\n || citation.type === 'statutesAtLarge'\n}\n\n/**\n * Type guard: narrows Citation to a short-form citation (id, supra, shortFormCase).\n */\nexport function isShortFormCitation(citation: Citation): citation is ShortFormCitation {\n return citation.type === 'id'\n || citation.type === 'supra'\n || citation.type === 'shortFormCase'\n}\n\n/**\n * Type guard: narrows Citation to a full case citation.\n */\nexport function isCaseCitation(citation: Citation): citation is FullCaseCitation {\n return citation.type === 'case'\n}\n\n/**\n * Generic type guard that narrows a Citation to a specific type.\n * Useful when the target type is dynamic or generic.\n */\nexport function isCitationType<T extends CitationType>(\n citation: Citation,\n type: T\n): citation is CitationOfType<T> {\n return citation.type === type\n}\n\n/**\n * Exhaustiveness helper for switch statements on discriminated unions.\n *\n * Place in the `default` branch to get a compile-time error if a new\n * variant is added but not handled.\n *\n * @example\n * ```typescript\n * switch (citation.type) {\n * case 'case': ...\n * case 'statute': ...\n * // If you forget a variant, TypeScript errors here:\n * default: assertUnreachable(citation.type)\n * }\n * ```\n */\nexport function assertUnreachable(x: never): never {\n throw new Error(`Unexpected value: ${x}`)\n}\n","/**\n * Built-in text cleaner functions for preprocessing legal documents.\n *\n * Each cleaner is a simple transformation: (text: string) => string\n * Cleaners can be composed via the cleanText() pipeline.\n */\n\n/**\n * Remove all HTML tags from text.\n *\n * @example\n * stripHtmlTags(\"Smith v. <b>Doe</b>, 500 F.2d 123\")\n * // => \"Smith v. Doe, 500 F.2d 123\"\n */\nexport function stripHtmlTags(text: string): string {\n\treturn text.replace(/<[^>]+>/g, \"\")\n}\n\n/**\n * Normalize whitespace: convert tabs/newlines to spaces, collapse multiple spaces.\n *\n * @example\n * normalizeWhitespace(\"Smith v. Doe, 500 F.2d 123\")\n * // => \"Smith v. Doe, 500 F.2d 123\"\n */\nexport function normalizeWhitespace(text: string): string {\n\treturn text.replace(/[\\t\\n\\r]+/g, \" \").replace(/ {2,}/g, \" \")\n}\n\n/**\n * Apply Unicode NFKC normalization (ligatures → separate chars).\n *\n * @example\n * normalizeUnicode(\"Smith v. Doe, 500 F.2d 123\") // with ligature \"fi\"\n * // => \"Smith v. Doe, 500 F.2d 123\" // normalized\n */\nexport function normalizeUnicode(text: string): string {\n\treturn text.normalize(\"NFKC\")\n}\n\n/**\n * Replace curly quotes and apostrophes with straight quotes.\n *\n * @example\n * fixSmartQuotes(\"\"Smith\" v. 'Doe', 500 F.2d 123\")\n * // => \"\\\"Smith\\\" v. 'Doe', 500 F.2d 123\"\n */\nexport function fixSmartQuotes(text: string): string {\n\treturn text\n\t\t.replace(/[\\u201C\\u201D]/g, '\"') // curly double quotes\n\t\t.replace(/[\\u2018\\u2019]/g, \"'\") // curly single quotes/apostrophes\n}\n\n/**\n * Remove underscore OCR artifacts (common in scanned documents).\n *\n * @example\n * removeOcrArtifacts(\"Smith v. Doe, 500 F._2d 123\")\n * // => \"Smith v. Doe, 500 F.2d 123\"\n */\nexport function removeOcrArtifacts(text: string): string {\n\treturn text.replace(/_/g, \"\")\n}\n","import type { TransformationMap } from \"../types/span\"\nimport type { Warning } from \"../types/citation\"\nimport {\n\tfixSmartQuotes,\n\tnormalizeUnicode,\n\tnormalizeWhitespace,\n\tstripHtmlTags,\n} from \"./cleaners\"\n\n/**\n * Result of text cleaning operation.\n */\nexport interface CleanTextResult {\n\t/** Cleaned text after all transformations */\n\tcleaned: string\n\n\t/** Position mappings between cleaned and original text */\n\ttransformationMap: TransformationMap\n\n\t/** Warnings generated during cleaning (currently unused) */\n\twarnings: Warning[]\n}\n\n/**\n * Clean text using a pipeline of transformation functions.\n *\n * Applies cleaners sequentially while maintaining accurate position mappings\n * between the original and cleaned text. This enables citation extraction from\n * cleaned text while reporting positions in the original text.\n *\n * @param original - Original input text\n * @param cleaners - Array of cleaner functions to apply (default: stripHtmlTags, normalizeWhitespace, normalizeUnicode, fixSmartQuotes)\n * @returns Cleaned text with position mappings and warnings\n *\n * @example\n * const result = cleanText(\"Smith v. <b>Doe</b>, 500 F.2d 123\")\n * // result.cleaned: \"Smith v. Doe, 500 F.2d 123\"\n * // result.transformationMap tracks position shifts from HTML removal\n */\nexport function cleanText(\n\toriginal: string,\n\tcleaners: Array<(text: string) => string> = [\n\t\tstripHtmlTags,\n\t\tnormalizeWhitespace,\n\t\tnormalizeUnicode,\n\t\tfixSmartQuotes,\n\t],\n): CleanTextResult {\n\t// Initialize 1:1 position mapping\n\tlet currentText = original\n\tlet cleanToOriginal = new Map<number, number>()\n\tlet originalToClean = new Map<number, number>()\n\n\t// Identity mapping: cleanToOriginal[i] = i, originalToClean[i] = i\n\tfor (let i = 0; i <= original.length; i++) {\n\t\tcleanToOriginal.set(i, i)\n\t\toriginalToClean.set(i, i)\n\t}\n\n\t// Apply each cleaner sequentially, rebuilding position maps\n\tfor (const cleaner of cleaners) {\n\t\tconst beforeText = currentText\n\t\tconst afterText = cleaner(currentText)\n\n\t\tif (beforeText !== afterText) {\n\t\t\t// Text changed - rebuild position maps\n\t\t\tconst { newCleanToOriginal, newOriginalToClean } = rebuildPositionMaps(\n\t\t\t\tbeforeText,\n\t\t\t\tafterText,\n\t\t\t\tcleanToOriginal,\n\t\t\t\toriginalToClean,\n\t\t\t)\n\n\t\t\tcleanToOriginal = newCleanToOriginal\n\t\t\toriginalToClean = newOriginalToClean\n\t\t\tcurrentText = afterText\n\t\t}\n\t}\n\n\tconst transformationMap: TransformationMap = {\n\t\tcleanToOriginal,\n\t\toriginalToClean,\n\t}\n\n\treturn {\n\t\tcleaned: currentText,\n\t\ttransformationMap,\n\t\twarnings: [],\n\t}\n}\n\n/**\n * Rebuild position maps after a text transformation.\n *\n * Uses a simplified algorithm that scans through both strings, matching\n * characters where possible and tracking the offset accumulation.\n *\n * @param beforeText - Text before transformation\n * @param afterText - Text after transformation\n * @param oldCleanToOriginal - Previous clean-to-original mapping\n * @param oldOriginalToClean - Previous original-to-clean mapping\n * @returns New position maps\n */\nfunction rebuildPositionMaps(\n\tbeforeText: string,\n\tafterText: string,\n\toldCleanToOriginal: Map<number, number>,\n\t_oldOriginalToClean: Map<number, number>,\n): {\n\tnewCleanToOriginal: Map<number, number>\n\tnewOriginalToClean: Map<number, number>\n} {\n\tconst newCleanToOriginal = new Map<number, number>()\n\tconst newOriginalToClean = new Map<number, number>()\n\n\tlet beforeIdx = 0\n\tlet afterIdx = 0\n\n\t// Scan through both strings, matching characters where possible\n\twhile (beforeIdx <= beforeText.length || afterIdx <= afterText.length) {\n\t\t// Both at end\n\t\tif (beforeIdx >= beforeText.length && afterIdx >= afterText.length) {\n\t\t\tconst originalPos = oldCleanToOriginal.get(beforeIdx) ?? beforeIdx\n\t\t\tnewCleanToOriginal.set(afterIdx, originalPos)\n\t\t\tnewOriginalToClean.set(originalPos, afterIdx)\n\t\t\tbreak\n\t\t}\n\n\t\t// Before text exhausted (expansion case)\n\t\tif (beforeIdx >= beforeText.length) {\n\t\t\tconst originalPos = oldCleanToOriginal.get(beforeIdx) ?? beforeIdx\n\t\t\tnewCleanToOriginal.set(afterIdx, originalPos)\n\t\t\tafterIdx++\n\t\t\tcontinue\n\t\t}\n\n\t\t// After text exhausted (removal case)\n\t\tif (afterIdx >= afterText.length) {\n\t\t\tconst originalPos = oldCleanToOriginal.get(beforeIdx) ?? beforeIdx\n\t\t\tnewOriginalToClean.set(originalPos, afterIdx)\n\t\t\tbeforeIdx++\n\t\t\tcontinue\n\t\t}\n\n\t\t// Characters match - carry forward the mapping\n\t\tif (beforeText[beforeIdx] === afterText[afterIdx]) {\n\t\t\tconst originalPos = oldCleanToOriginal.get(beforeIdx) ?? beforeIdx\n\t\t\tnewCleanToOriginal.set(afterIdx, originalPos)\n\t\t\tnewOriginalToClean.set(originalPos, afterIdx)\n\t\t\tbeforeIdx++\n\t\t\tafterIdx++\n\t\t} else {\n\t\t\t// Characters differ - need to determine if this is insertion/deletion/replacement\n\t\t\t// Look ahead to find next match\n\t\t\tlet foundMatch = false\n\t\t\tconst maxLookAhead = 20 // Limit lookahead to avoid performance issues\n\n\t\t\t// Check if something was deleted from before text\n\t\t\tfor (let lookAhead = 1; lookAhead <= maxLookAhead; lookAhead++) {\n\t\t\t\tif (beforeIdx + lookAhead >= beforeText.length) break\n\n\t\t\t\tif (beforeText[beforeIdx + lookAhead] === afterText[afterIdx]) {\n\t\t\t\t\t// Found a match - characters were deleted from before text\n\t\t\t\t\tfor (let i = 0; i < lookAhead; i++) {\n\t\t\t\t\t\tconst originalPos =\n\t\t\t\t\t\t\toldCleanToOriginal.get(beforeIdx + i) ?? beforeIdx + i\n\t\t\t\t\t\tnewOriginalToClean.set(originalPos, afterIdx)\n\t\t\t\t\t}\n\t\t\t\t\tbeforeIdx += lookAhead\n\t\t\t\t\tfoundMatch = true\n\t\t\t\t\tbreak\n\t\t\t\t}\n\t\t\t}\n\n\t\t\tif (foundMatch) continue\n\n\t\t\t// Check if something was inserted into after text\n\t\t\tfor (let lookAhead = 1; lookAhead <= maxLookAhead; lookAhead++) {\n\t\t\t\tif (afterIdx + lookAhead >= afterText.length) break\n\n\t\t\t\tif (beforeText[beforeIdx] === afterText[afterIdx + lookAhead]) {\n\t\t\t\t\t// Found a match - characters were inserted into after text\n\t\t\t\t\tconst originalPos = oldCleanToOriginal.get(beforeIdx) ?? beforeIdx\n\t\t\t\t\tfor (let i = 0; i < lookAhead; i++) {\n\t\t\t\t\t\tnewCleanToOriginal.set(afterIdx + i, originalPos)\n\t\t\t\t\t}\n\t\t\t\t\tafterIdx += lookAhead\n\t\t\t\t\tfoundMatch = true\n\t\t\t\t\tbreak\n\t\t\t\t}\n\t\t\t}\n\n\t\t\tif (foundMatch) continue\n\n\t\t\t// No match found within lookahead - treat as replacement\n\t\t\tconst originalPos = oldCleanToOriginal.get(beforeIdx) ?? beforeIdx\n\t\t\tnewCleanToOriginal.set(afterIdx, originalPos)\n\t\t\tnewOriginalToClean.set(originalPos, afterIdx)\n\t\t\tbeforeIdx++\n\t\t\tafterIdx++\n\t\t}\n\t}\n\n\treturn { newCleanToOriginal, newOriginalToClean }\n}\n","/**\n * Case Citation Regex Patterns\n *\n * These patterns are designed for tokenization (broad matching) not extraction.\n * They identify potential case citations in text for the tokenizer (Plan 3).\n * Metadata parsing and validation against reporters-db happens in Phase 2 Plan 5 (extraction layer).\n *\n * Pattern Design Principles (from RESEARCH.md):\n * - Use \\b word boundaries to avoid matching \"F.\" in \"F.B.I.\"\n * - Avoid nested quantifiers: (a+)+ causes ReDoS\n * - Keep patterns simple: tokenization only needs to find candidates\n * - Use global flag /g for matchAll()\n */\n\nimport type { FullCitationType } from '@/types/citation'\n\nexport interface Pattern {\n id: string\n regex: RegExp\n description: string\n type: FullCitationType\n}\n\nexport const casePatterns: Pattern[] = [\n {\n id: 'federal-reporter',\n regex: /\\b(\\d+(?:-\\d+)?)\\s+(F\\.|F\\.2d|F\\.3d|F\\.4th|F\\.\\s?Supp\\.|F\\.\\s?Supp\\.\\s?2d|F\\.\\s?Supp\\.\\s?3d|F\\.\\s?Supp\\.\\s?4th)\\s+(\\d+|_{3,}|-{3,})(?=\\s|$|\\(|,|;|\\.)/g,\n description: 'Federal Reporter (F., F.2d, F.3d, F.4th, F.Supp., etc.)',\n type: 'case',\n },\n {\n id: 'supreme-court',\n regex: /\\b(\\d+(?:-\\d+)?)\\s+(U\\.\\s?S\\.|S\\.\\s?Ct\\.|L\\.\\s?Ed\\.(?:\\s?2d)?)\\s+(\\d+|_{3,}|-{3,})(?=\\s|$|\\(|,|;|\\.)/g,\n description: 'U.S. Supreme Court reporters',\n type: 'case',\n },\n {\n id: 'state-reporter',\n regex: /\\b(\\d+(?:-\\d+)?)\\s+([A-Z][A-Za-z.]+(?:\\s?2d|\\s?3d|\\s?4th|\\s?5th)?)\\s+(\\d+|_{3,}|-{3,})(?=\\s|$|\\(|,|;|\\.)/g,\n description: 'State reporters (broad pattern, validated against reporters-db in Phase 3)',\n type: 'case',\n },\n]\n","/**\n * Statute Citation Regex Patterns\n *\n * Patterns for U.S. Code and state code citations.\n * These are intentionally broad for tokenization - validation against\n * actual code databases happens in Phase 2 Plan 5 (extraction layer).\n *\n * Pattern Design:\n * - Simple structure to avoid ReDoS\n * - Matches both \"§\" and \"Section\" formats\n * - State codes use broad pattern (validated later)\n */\n\nimport type { Pattern } from './casePatterns'\n\nexport const statutePatterns: Pattern[] = [\n {\n id: 'usc',\n regex: /\\b(\\d+)\\s+U\\.S\\.C\\.?\\s+§+\\s*(\\d+[A-Za-z]*)\\b/g,\n description: 'U.S. Code citations (e.g., \"42 U.S.C. § 1983\")',\n type: 'statute',\n },\n {\n id: 'state-code',\n regex: /\\b([A-Z][a-z]+\\.?\\s+[A-Za-z.]+\\s+Code)\\s+§\\s*(\\d+[A-Za-z]*)\\b/g,\n description: 'State code citations (broad pattern, e.g., \"Cal. Penal Code § 187\")',\n type: 'statute',\n },\n]\n","/**\n * Journal Citation Regex Patterns\n *\n * Patterns for law review and journal citations.\n * These are intentionally broad for tokenization - validation against\n * journals-db happens in Phase 3 (extraction layer).\n *\n * Pattern Design:\n * - Matches volume-journal-page format\n * - Broad journal name matching (validated later)\n * - Simple structure to avoid ReDoS\n */\n\nimport type { Pattern } from './casePatterns'\n\nexport const journalPatterns: Pattern[] = [\n {\n id: 'law-review',\n regex: /\\b(\\d+(?:-\\d+)?)\\s+([A-Z][A-Za-z.\\s]+)\\s+(\\d+)\\b/g,\n description: 'Law review citations (e.g., \"120 Harv. L. Rev. 500\"), validated against journals-db in Phase 3',\n type: 'journal',\n },\n]\n","/**\n * Neutral and Online Citation Regex Patterns\n *\n * Patterns for WestLaw, LexisNexis, public laws, and Federal Register citations.\n * These have predictable formats and don't require external validation.\n *\n * Pattern Design:\n * - Matches year-database-number format for online citations\n * - Matches Pub. L. No. format for public laws\n * - Matches volume-Fed. Reg.-page for Federal Register\n * - Simple structure to avoid ReDoS\n */\n\nimport type { Pattern } from './casePatterns'\n\nexport const neutralPatterns: Pattern[] = [\n {\n id: 'westlaw',\n regex: /\\b(\\d{4})\\s+WL\\s+(\\d+)\\b/g,\n description: 'WestLaw citations (e.g., \"2021 WL 123456\")',\n type: 'neutral',\n },\n {\n id: 'lexis',\n regex: /\\b(\\d{4})\\s+U\\.S\\.\\s+LEXIS\\s+(\\d+)\\b/g,\n description: 'LexisNexis citations (e.g., \"2021 U.S. LEXIS 5000\")',\n type: 'neutral',\n },\n {\n id: 'public-law',\n regex: /\\bPub\\.\\s?L\\.\\s?No\\.\\s?(\\d+-\\d+)\\b/g,\n description: 'Public Law citations (e.g., \"Pub. L. No. 117-58\")',\n type: 'publicLaw',\n },\n {\n id: 'federal-register',\n regex: /\\b(\\d+(?:-\\d+)?)\\s+Fed\\.\\s?Reg\\.\\s+(\\d+)\\b/g,\n description: 'Federal Register citations (e.g., \"86 Fed. Reg. 12345\")',\n type: 'federalRegister',\n },\n {\n id: 'statutes-at-large',\n regex: /\\b(\\d+(?:-\\d+)?)\\s+Stat\\.\\s+(\\d+)\\b/g,\n description: 'Statutes at Large citations (e.g., \"124 Stat. 119\")',\n type: 'statutesAtLarge',\n },\n {\n id: 'compact-law-review',\n regex: /\\b(\\d+(?:-\\d+)?)\\s+([A-Z][A-Za-z.]+L\\.(?:Rev|J|Q)\\.)\\s+(\\d+)\\b/g,\n description: 'Compact law review citations without spaces (e.g., \"93 Harv.L.Rev. 752\")',\n type: 'journal',\n },\n]\n","/**\n * Short-form Citation Regex Patterns\n *\n * Patterns for Id., Ibid., supra, and short-form case citations.\n * These refer to earlier citations in the document.\n *\n * Pattern Design:\n * - Simple structure to avoid ReDoS (no nested quantifiers)\n * - Broad matching for tokenization; validation happens in extraction layer\n * - Word boundaries to prevent false positives (e.g., \"Idaho\" vs \"Id.\")\n */\n\nimport type { Pattern } from './casePatterns'\n\n/** Id. with optional pincite: \"Id.\" or \"Id. at 253\" */\nexport const ID_PATTERN: RegExp = /\\b[Ii]d\\.(?:\\s+at\\s+(\\d+))?/g\n\n/** Ibid. with optional pincite (less common variant) */\nexport const IBID_PATTERN: RegExp = /\\b[Ii]bid\\.(?:\\s+at\\s+(\\d+))?/g\n\n/**\n * Supra with party name and optional pincite.\n * Pattern: word(s), supra [, at page]\n * Captures: (1) party name, (2) pincite\n * Note: Matches party names including \"v.\" (e.g., \"Smith v. Jones, supra\")\n */\nexport const SUPRA_PATTERN: RegExp = /\\b([A-Z][a-zA-Z]+(?:(?:\\s+v\\.?\\s+|\\s+)[A-Z][a-zA-Z]+)*)\\s*,?\\s+supra(?:,?\\s+at\\s+(\\d+))?/g\n\n/**\n * Short-form case: volume reporter at page\n * Pattern: number space abbreviation space \"at\" space number\n * Simplified detection; full parsing in extraction layer\n */\nexport const SHORT_FORM_CASE_PATTERN: RegExp = /\\b(\\d+(?:-\\d+)?)\\s+([A-Z][A-Za-z.\\s]+?(?:\\d[a-z])?)\\s+at\\s+(\\d+)\\b/g\n\n/** All short-form patterns for tokenization */\nexport const SHORT_FORM_PATTERNS: readonly RegExp[] = [\n ID_PATTERN,\n IBID_PATTERN,\n SUPRA_PATTERN,\n SHORT_FORM_CASE_PATTERN,\n] as const\n\n/** Pattern objects for consistency with other pattern modules */\nexport const shortFormPatterns: Pattern[] = [\n {\n id: 'id',\n regex: ID_PATTERN,\n description: 'Id. citations (e.g., \"Id.\" or \"Id. at 253\")',\n type: 'case', // Will be typed as 'id' in extraction layer\n },\n {\n id: 'ibid',\n regex: IBID_PATTERN,\n description: 'Ibid. citations (e.g., \"Ibid.\" or \"Ibid. at 125\")',\n type: 'case', // Will be typed as 'id' in extraction layer\n },\n {\n id: 'supra',\n regex: SUPRA_PATTERN,\n description: 'Supra citations (e.g., \"Smith, supra\" or \"Smith, supra, at 460\")',\n type: 'case', // Will be typed as 'supra' in extraction layer\n },\n {\n id: 'shortFormCase',\n regex: SHORT_FORM_CASE_PATTERN,\n description: 'Short-form case citations (e.g., \"500 F.2d at 125\")',\n type: 'case', // Will be typed as 'shortFormCase' in extraction layer\n },\n]\n","/**\n * Tokenization Layer for Citation Extraction\n *\n * Applies regex patterns to cleaned text to produce citation candidate tokens.\n * This is the second stage of the parsing pipeline:\n * 1. Clean text (remove HTML, normalize Unicode)\n * 2. Tokenize (apply patterns to find candidates) ← THIS MODULE\n * 3. Extract (parse metadata, validate against reporters-db)\n *\n * Tokenization is intentionally broad - it finds potential citations without\n * validating them. The extraction layer (Plan 5) validates tokens against\n * reporters-db and parses metadata.\n *\n * @module tokenize\n */\n\nimport type { Span } from '@/types/span'\nimport type { Pattern } from '@/patterns'\nimport {\n casePatterns,\n statutePatterns,\n journalPatterns,\n neutralPatterns,\n} from '@/patterns'\nimport { shortFormPatterns } from '@/patterns/shortForm'\n\n/**\n * A token representing a potential citation found in cleaned text.\n *\n * Tokens are produced by applying regex patterns to cleaned text.\n * They include matched text, position in cleaned text, and pattern metadata\n * for use in the extraction layer.\n */\nexport interface Token {\n /** Matched text from input */\n text: string\n\n /** Position in cleaned text (cleanStart/cleanEnd only, no original positions yet) */\n span: Pick<Span, 'cleanStart' | 'cleanEnd'>\n\n /** Pattern type that matched this token */\n type: Pattern['type']\n\n /** Pattern ID that matched this token */\n patternId: string\n}\n\n/**\n * Tokenizes cleaned text by applying regex patterns to find citation candidates.\n *\n * For each pattern in the patterns array:\n * 1. Apply pattern.regex.matchAll(cleanedText)\n * 2. Create Token for each match with position, text, and pattern metadata\n * 3. Collect all tokens from all patterns\n * 4. Sort by cleanStart position (ascending)\n *\n * Timeout protection: If a pattern throws (e.g., ReDoS), skip it and continue\n * with remaining patterns. Logs warning to console.\n *\n * Note: This function is synchronous because regex matching is inherently\n * synchronous. This enables both sync (extractCitations) and async\n * (extractCitationsAsync) APIs in Plan 6.\n *\n * @param cleanedText - Text that has been cleaned by cleanText() from Plan 1\n * @param patterns - Regex patterns to apply (defaults to all patterns from Plan 2)\n * @returns Array of tokens sorted by position (cleanStart ascending)\n *\n * @example\n * ```typescript\n * import { tokenize } from '@/tokenize'\n * import { cleanText } from '@/clean'\n *\n * const original = \"See Smith v. Doe, 500 F.2d 123 (9th Cir. 2020)\"\n * const { cleanedText } = cleanText(original)\n * const tokens = tokenize(cleanedText)\n * // tokens[0] = {\n * // text: \"500 F.2d 123\",\n * // span: { cleanStart: 18, cleanEnd: 30 },\n * // type: \"case\",\n * // patternId: \"federal-reporter\"\n * // }\n * ```\n */\nexport function tokenize(\n cleanedText: string,\n patterns: Pattern[] = [\n ...casePatterns,\n ...statutePatterns,\n ...journalPatterns,\n ...neutralPatterns,\n ...shortFormPatterns,\n ]\n): Token[] {\n const tokens: Token[] = []\n\n for (const pattern of patterns) {\n try {\n // Apply pattern to cleaned text\n const matches = cleanedText.matchAll(pattern.regex)\n\n for (const match of matches) {\n // Create token from match\n tokens.push({\n text: match[0],\n span: {\n cleanStart: match.index!,\n cleanEnd: match.index! + match[0].length,\n },\n type: pattern.type,\n patternId: pattern.id,\n })\n }\n } catch (error) {\n // Timeout protection: If pattern throws (ReDoS, etc.), skip it\n console.warn(\n `Pattern ${pattern.id} threw error, skipping:`,\n error instanceof Error ? error.message : String(error)\n )\n }\n }\n\n // Sort tokens by position (cleanStart ascending)\n tokens.sort((a, b) => a.span.cleanStart - b.span.cleanStart)\n\n return tokens\n}\n","/**\n * Date Parsing Utilities for Legal Citations\n *\n * Parses dates from parentheticals in legal citations. Supports three formats:\n * 1. Abbreviated month: \"Jan. 15, 2020\"\n * 2. Full month: \"January 15, 2020\"\n * 3. Numeric US: \"1/15/2020\"\n * 4. Year-only: \"2020\"\n *\n * @module extract/dates\n */\n\n/**\n * Structured date components.\n * Month and day are optional to support year-only dates.\n */\nexport interface ParsedDate {\n year: number\n month?: number\n day?: number\n}\n\n/**\n * Date in both ISO string and structured format.\n */\nexport interface StructuredDate {\n /** ISO 8601 format: YYYY-MM-DD, YYYY-MM, or YYYY */\n iso: string\n /** Structured date components */\n parsed: ParsedDate\n}\n\n/**\n * Month name/abbreviation to numeric value (1-12).\n * Includes both 3-letter and 4-letter (Sept) abbreviations.\n */\nconst MONTH_MAP: Record<string, number> = {\n jan: 1,\n january: 1,\n feb: 2,\n february: 2,\n mar: 3,\n march: 3,\n apr: 4,\n april: 4,\n may: 5,\n jun: 6,\n june: 6,\n jul: 7,\n july: 7,\n aug: 8,\n august: 8,\n sep: 9,\n sept: 9,\n september: 9,\n oct: 10,\n october: 10,\n nov: 11,\n november: 11,\n dec: 12,\n december: 12,\n}\n\n/**\n * Parse a month name or abbreviation to numeric value (1-12).\n *\n * @param monthStr - Month name or abbreviation (e.g., \"Jan\", \"January\", \"Sept.\")\n * @returns Numeric month (1-12)\n * @throws Error if month name is not recognized\n *\n * @example\n * ```typescript\n * parseMonth(\"Jan\") // 1\n * parseMonth(\"Sept.\") // 9\n * parseMonth(\"December\") // 12\n * ```\n */\nexport function parseMonth(monthStr: string): number {\n // Normalize: lowercase, strip trailing period\n const normalized = monthStr.toLowerCase().replace(/\\.$/, '')\n const month = MONTH_MAP[normalized]\n\n if (month === undefined) {\n throw new Error(`Invalid month name: ${monthStr}`)\n }\n\n return month\n}\n\n/**\n * Convert structured date components to ISO 8601 string.\n * Handles full dates, month+year, and year-only formats.\n *\n * @param parsed - Structured date components\n * @returns ISO 8601 string (YYYY-MM-DD, YYYY-MM, or YYYY)\n *\n * @example\n * ```typescript\n * toIsoDate({ year: 2020, month: 1, day: 15 }) // \"2020-01-15\"\n * toIsoDate({ year: 2020, month: 1 }) // \"2020-01\"\n * toIsoDate({ year: 2020 }) // \"2020\"\n * ```\n */\nexport function toIsoDate(parsed: ParsedDate): string {\n const { year, month, day } = parsed\n\n if (month !== undefined && day !== undefined) {\n // Full date: YYYY-MM-DD with zero-padding\n const monthStr = String(month).padStart(2, '0')\n const dayStr = String(day).padStart(2, '0')\n return `${year}-${monthStr}-${dayStr}`\n }\n\n if (month !== undefined) {\n // Month+year: YYYY-MM with zero-padding\n const monthStr = String(month).padStart(2, '0')\n return `${year}-${monthStr}`\n }\n\n // Year-only: YYYY\n return String(year)\n}\n\n/**\n * Parse a date string into structured format.\n * Tries multiple formats in order:\n * 1. Abbreviated month (Jan. 15, 2020)\n * 2. Full month (January 15, 2020)\n * 3. Numeric US format (1/15/2020)\n * 4. Year-only (2020)\n *\n * @param dateStr - Date string in any supported format\n * @returns Structured date with ISO string, or undefined if no match\n *\n * @example\n * ```typescript\n * parseDate(\"Jan. 15, 2020\") // { iso: \"2020-01-15\", parsed: { year: 2020, month: 1, day: 15 } }\n * parseDate(\"January 15, 2020\") // { iso: \"2020-01-15\", parsed: { year: 2020, month: 1, day: 15 } }\n * parseDate(\"1/15/2020\") // { iso: \"2020-01-15\", parsed: { year: 2020, month: 1, day: 15 } }\n * parseDate(\"2020\") // { iso: \"2020\", parsed: { year: 2020 } }\n * parseDate(\"no date\") // undefined\n * ```\n */\nexport function parseDate(dateStr: string): StructuredDate | undefined {\n // Try abbreviated month format: Jan. 15, 2020 or Feb 9, 2015\n const abbrMatch = dateStr.match(/\\b(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Sept|Oct|Nov|Dec)\\.?\\s+(\\d{1,2}),?\\s+(\\d{4})\\b/i)\n if (abbrMatch) {\n const month = parseMonth(abbrMatch[1])\n const day = Number.parseInt(abbrMatch[2], 10)\n const year = Number.parseInt(abbrMatch[3], 10)\n const parsed = { year, month, day }\n return { iso: toIsoDate(parsed), parsed }\n }\n\n // Try full month format: January 15, 2020\n const fullMatch = dateStr.match(/\\b(January|February|March|April|May|June|July|August|September|October|November|December)\\s+(\\d{1,2}),?\\s+(\\d{4})\\b/i)\n if (fullMatch) {\n const month = parseMonth(fullMatch[1])\n const day = Number.parseInt(fullMatch[2], 10)\n const year = Number.parseInt(fullMatch[3], 10)\n const parsed = { year, month, day }\n return { iso: toIsoDate(parsed), parsed }\n }\n\n // Try numeric US format: 1/15/2020\n const numericMatch = dateStr.match(/\\b(\\d{1,2})\\/(\\d{1,2})\\/(\\d{4})\\b/)\n if (numericMatch) {\n const month = Number.parseInt(numericMatch[1], 10)\n const day = Number.parseInt(numericMatch[2], 10)\n const year = Number.parseInt(numericMatch[3], 10)\n const parsed = { year, month, day }\n return { iso: toIsoDate(parsed), parsed }\n }\n\n // Try year-only: 2020\n const yearMatch = dateStr.match(/\\b(\\d{4})\\b/)\n if (yearMatch) {\n const year = Number.parseInt(yearMatch[1], 10)\n const parsed = { year }\n return { iso: toIsoDate(parsed), parsed }\n }\n\n // No match\n return undefined\n}\n","/**\n * Case Citation Extraction\n *\n * Parses tokenized case citations to extract volume, reporter, page, and\n * optional metadata (pincite, court, year). This is the third stage of\n * the parsing pipeline:\n * 1. Clean text (remove HTML, normalize Unicode)\n * 2. Tokenize (apply patterns to find candidates)\n * 3. Extract (parse metadata, validate) ← THIS MODULE\n *\n * Extraction parses structured data from token text. Validation against\n * reporters-db happens in Phase 3 (resolution layer).\n *\n * @module extract/extractCase\n */\n\nimport type { Token } from '@/tokenize'\nimport type { FullCaseCitation } from '@/types/citation'\nimport type { TransformationMap, Span } from '@/types/span'\nimport { parseDate, type StructuredDate } from './dates'\n\n/** Parse a volume string as number when purely numeric, string when hyphenated */\nfunction parseVolume(raw: string): number | string {\n\tconst num = Number.parseInt(raw, 10)\n\treturn String(num) === raw ? num : raw\n}\n\n/** Month abbreviations and full names found in legal citation parentheticals */\nconst MONTH_PATTERN = /(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Sept|Oct|Nov|Dec|January|February|March|April|May|June|July|August|September|October|November|December)\\.?/\n\n// ============================================================================\n// Compiled regex patterns for performance (hoisted to module level)\n// ============================================================================\n\n/** Matches volume-reporter-page format in citation core */\nconst VOLUME_REPORTER_PAGE_REGEX = /^(\\d+(?:-\\d+)?)\\s+([A-Za-z0-9.\\s]+)\\s+(\\d+|_{3,}|-{3,})/\n\n/** Detects blank page placeholders (3+ underscores or dashes) */\nconst BLANK_PAGE_REGEX = /^[_-]{3,}$/\n\n/** Extracts pincite (page reference after comma) */\nconst PINCITE_REGEX = /,\\s*(\\d+)/\n\n/** Matches parenthetical content */\nconst PAREN_REGEX = /\\(([^)]+)\\)/\n\n/** Look-ahead pattern for parenthetical after token */\nconst LOOKAHEAD_PAREN_REGEX = /^(?:,\\s*\\d+)*\\s*\\(([^)]+)\\)/\n\n/** Extracts pincite from look-ahead text */\nconst LOOKAHEAD_PINCITE_REGEX = /^,\\s*(\\d+)/\n\n/** Matches chained parentheticals with disposition */\nconst CHAINED_DISPOSITION_REGEX = /\\([^)]+\\)\\s*\\((en banc|per curiam)\\)/i\n\n/** Identifies Supreme Court reporters for court inference */\nconst SCOTUS_REPORTER_REGEX = /^(?:U\\.?\\s?S\\.|S\\.?\\s?Ct\\.|L\\.?\\s?Ed\\.)/\n\n/** Citation boundary pattern (digit-period-space) */\nconst CITATION_BOUNDARY_REGEX = /\\d\\.\\s+/g\n\n/** Standard \"v.\" or \"vs.\" case name format */\nconst V_CASE_NAME_REGEX = /([A-Z][A-Za-z0-9\\s.,'&()/-]+?)\\s+v(?:s)?\\.?\\s+([A-Za-z0-9\\s.,'&()/-]+?)\\s*,\\s*$/\n\n/** Procedural prefix case name format */\nconst PROCEDURAL_PREFIX_REGEX = /\\b(In re|Ex parte|Matter of|Estate of|State ex rel\\.|United States ex rel\\.|Application of|Petition of)\\s+([A-Za-z0-9\\s.,'&()/-]+?)\\s*,\\s*$/i\n\n/**\n * Strips date components (month, day, year) from parenthetical content\n * to isolate the court abbreviation.\n * E.g., \"2d Cir. Jan. 15, 2020\" → \"2d Cir.\"\n * \"C.D. Cal. Feb. 9, 2015\" → \"C.D. Cal.\"\n * \"D. Mass. Mar. 2020\" → \"D. Mass.\"\n * \"D. Mass. 1/15/2020\" → \"D. Mass.\"\n */\nfunction stripDateFromCourt(content: string): string | undefined {\n\t// Strip trailing numeric date format first (1/15/2020)\n\tlet court = content.replace(/\\s*\\d{1,2}\\/\\d{1,2}\\/\\d{4}\\s*$/, '').trim()\n\t// Strip trailing year\n\tcourt = court.replace(/\\s*\\d{4}\\s*$/, '').trim()\n\t// Strip trailing date components: optional day+comma, month abbreviation or full name\n\tcourt = court.replace(/\\s*,?\\s*\\d{1,2}\\s*,?\\s*$/, '').trim()\n\tcourt = court.replace(new RegExp(`\\\\s*${MONTH_PATTERN.source}\\\\s*$`, 'i'), '').trim()\n\t// Strip any trailing commas left over\n\tcourt = court.replace(/,\\s*$/, '').trim()\n\treturn court && /[A-Za-z]/.test(court) ? court : undefined\n}\n\n/**\n * Extract case name via backward search from citation core.\n * Looks for \"v.\" pattern or procedural prefixes (In re, Ex parte, Matter of).\n *\n * @param cleanedText - Full cleaned text\n * @param coreStart - Position where citation core begins (volume start)\n * @param maxLookback - Maximum characters to search backward (default 150)\n * @returns Case name and start position, or undefined if not found\n *\n * @example\n * ```typescript\n * extractCaseName(text, 20, 150)\n * // Returns: { caseName: \"Smith v. Jones\", nameStart: 0 }\n * ```\n */\nfunction extractCaseName(\n\tcleanedText: string,\n\tcoreStart: number,\n\tmaxLookback = 150,\n): { caseName: string; nameStart: number } | undefined {\n\tconst searchStart = Math.max(0, coreStart - maxLookback)\n\tlet precedingText = cleanedText.substring(searchStart, coreStart)\n\tlet adjustedSearchStart = searchStart\n\n\t// Split at last sentence boundary to avoid crossing citation boundaries\n\t// Find last occurrence of digit-period-space pattern (end of reporter page like \"10. Jones\")\n\t// This is more specific than generic \". [A-Z]\" which would match \"v.\" or \"United States v.\"\n\tlet lastBoundaryIndex = -1\n\tlet match: RegExpExecArray | null\n\twhile ((match = CITATION_BOUNDARY_REGEX.exec(precedingText)) !== null) {\n\t\tlastBoundaryIndex = match.index + match[0].length\n\t}\n\n\tif (lastBoundaryIndex !== -1) {\n\t\tprecedingText = precedingText.substring(lastBoundaryIndex)\n\t\tadjustedSearchStart = searchStart + lastBoundaryIndex\n\t}\n\n\t// Priority 1: Standard \"v.\" or \"vs.\" format with comma before citation\n\t// Match party names with letters, numbers (for \"Doe No. 2\"), periods, apostrophes, ampersands, hyphens, slashes\n\tconst vMatch = V_CASE_NAME_REGEX.exec(precedingText)\n\tif (vMatch) {\n\t\t// Check for semicolon in matched text (multi-citation separator)\n\t\tif (!vMatch[0].includes(';')) {\n\t\t\tconst caseName = `${vMatch[1].trim()} v. ${vMatch[2].trim()}`\n\t\t\tconst nameStart = adjustedSearchStart + vMatch.index\n\t\t\treturn { caseName, nameStart }\n\t\t}\n\t}\n\n\t// Priority 2: Procedural prefixes (including Estate of)\n\tconst procMatch = PROCEDURAL_PREFIX_REGEX.exec(precedingText)\n\tif (procMatch) {\n\t\t// Check for semicolon in matched text (multi-citation separator)\n\t\tif (!procMatch[0].includes(';')) {\n\t\t\tconst caseName = `${procMatch[1]} ${procMatch[2].trim()}`\n\t\t\tconst nameStart = adjustedSearchStart + procMatch.index\n\t\t\treturn { caseName, nameStart }\n\t\t}\n\t}\n\n\treturn undefined\n}\n\n/**\n * Find the end of parenthetical content, including chained parentheticals and subsequent history.\n * Tracks paren depth to handle nested parens, and continues scanning for chained parens.\n *\n * @param cleanedText - Full cleaned text\n * @param searchStart - Position to start searching from (after citation core)\n * @param maxLookahead - Maximum characters to search forward (default 200)\n * @returns Position after final closing paren (exclusive), or searchStart if no parens\n *\n * @example\n * ```typescript\n * findParentheticalEnd(text, 20, 200)\n * // For \"(2020) (en banc)\" returns position after final \")\"\n * ```\n */\nfunction findParentheticalEnd(\n\tcleanedText: string,\n\tsearchStart: number,\n\tmaxLookahead = 200,\n): number {\n\tlet pos = searchStart\n\tconst endLimit = Math.min(cleanedText.length, searchStart + maxLookahead)\n\tlet depth = 0\n\tlet foundAnyParen = false\n\n\twhile (pos < endLimit) {\n\t\tconst char = cleanedText[pos]\n\n\t\tif (char === '(') {\n\t\t\tdepth++\n\t\t\tfoundAnyParen = true\n\t\t\tpos++\n\t\t} else if (char === ')') {\n\t\t\tdepth--\n\t\t\tpos++\n\n\t\t\t// When depth returns to 0, check for chained paren or subsequent history\n\t\t\tif (depth === 0) {\n\t\t\t\t// Skip whitespace\n\t\t\t\tlet nextPos = pos\n\t\t\t\twhile (nextPos < endLimit && /\\s/.test(cleanedText[nextPos])) {\n\t\t\t\t\tnextPos++\n\t\t\t\t}\n\n\t\t\t\t// Check for chained parenthetical\n\t\t\t\tif (cleanedText[nextPos] === '(') {\n\t\t\t\t\tpos = nextPos\n\t\t\t\t\tcontinue\n\t\t\t\t}\n\n\t\t\t\t// Check for subsequent history signals\n\t\t\t\tconst remainingText = cleanedText.substring(nextPos, endLimit)\n\t\t\t\tconst historyRegex =\n\t\t\t\t\t/^,\\s*(aff'd|rev'd|cert\\.\\s*denied|overruled\\s+by|vacated\\s+by)/i\n\t\t\t\tif (historyRegex.test(remainingText)) {\n\t\t\t\t\t// Continue scanning - subsequent history has its own paren\n\t\t\t\t\tpos = nextPos\n\t\t\t\t\tcontinue\n\t\t\t\t}\n\n\t\t\t\t// No chained paren or subsequent history - we're done\n\t\t\t\treturn pos\n\t\t\t}\n\t\t} else {\n\t\t\tpos++\n\t\t}\n\t}\n\n\t// If we found parens but didn't close them all, return where we stopped\n\t// If we never found parens, return searchStart\n\treturn foundAnyParen ? pos : searchStart\n}\n\n/**\n * Parse parenthetical content to extract court, year, date, and disposition.\n * Unified parser replacing the old year-only logic.\n *\n * @param content - Parenthetical content (without the parens themselves)\n * @returns Structured parenthetical data\n *\n * @example\n * ```typescript\n * parseParenthetical(\"9th Cir. 2020\")\n * // Returns: { court: \"9th Cir.\", year: 2020, date: { iso: \"2020\", parsed: { year: 2020 } } }\n *\n * parseParenthetical(\"2d Cir. Jan. 15, 2020\")\n * // Returns: { court: \"2d Cir.\", year: 2020, date: { iso: \"2020-01-15\", parsed: { year: 2020, month: 1, day: 15 } } }\n *\n * parseParenthetical(\"en banc\")\n * // Returns: { disposition: \"en banc\" }\n * ```\n */\nfunction parseParenthetical(content: string): {\n\tcourt?: string\n\tyear?: number\n\tdate?: StructuredDate\n\tdisposition?: string\n} {\n\tconst result: {\n\t\tcourt?: string\n\t\tyear?: number\n\t\tdate?: StructuredDate\n\t\tdisposition?: string\n\t} = {}\n\n\t// Parse structured date using dates.ts\n\tconst dateResult = parseDate(content)\n\tif (dateResult) {\n\t\tresult.date = dateResult\n\t\tresult.year = dateResult.parsed.year\n\t}\n\n\t// Extract court (strips date components)\n\tconst courtResult = stripDateFromCourt(content)\n\tif (courtResult) {\n\t\tresult.court = courtResult\n\t}\n\n\t// Check for disposition\n\tif (/\\ben banc\\b/i.test(content)) {\n\t\tresult.disposition = 'en banc'\n\t} else if (/\\bper curiam\\b/i.test(content)) {\n\t\tresult.disposition = 'per curiam'\n\t}\n\n\treturn result\n}\n\n/**\n * Normalize party name for matching by removing legal noise.\n * Normalization pipeline:\n * 1. Strip \"et al.\" (case-insensitive)\n * 2. Strip \"d/b/a\" and everything after (case-insensitive)\n * 3. Strip \"aka\" and everything after (case-insensitive, word boundary)\n * 4. Strip trailing corporate suffixes (Inc., LLC, Corp., Ltd., Co., LLP, LP, P.C.) - iterative\n * 5. Strip leading articles (The, A, An)\n * 6. Normalize whitespace\n * 7. Trim and lowercase\n *\n * @param name - Raw party name\n * @returns Normalized party name\n *\n * @example\n * ```typescript\n * normalizePartyName(\"The Smith Corp., Inc.\") // \"smith\"\n * normalizePartyName(\"Doe et al.\") // \"doe\"\n * normalizePartyName(\"United States\") // \"united states\" (not stripped)\n * ```\n */\nfunction normalizePartyName(name: string): string {\n\tlet normalized = name\n\n\t// Strip \"et al.\" (with or without period, case-insensitive)\n\tnormalized = normalized.replace(/\\bet\\s+al\\.?/gi, '')\n\n\t// Strip \"d/b/a\" and everything after it (case-insensitive)\n\tnormalized = normalized.replace(/\\s+d\\/b\\/a\\b.*/gi, '')\n\n\t// Strip \"aka\" and everything after it (case-insensitive, word boundary)\n\tnormalized = normalized.replace(/\\s+aka\\b.*/gi, '')\n\n\t// Strip trailing corporate suffixes (with or without trailing period, handle comma)\n\t// Repeat to handle multiple suffixes like \"Corp., Inc.\"\n\tlet prev = ''\n\twhile (prev !== normalized) {\n\t\tprev = normalized\n\t\tnormalized = normalized.replace(/,?\\s*(Inc|LLC|Corp|Ltd|Co|LLP|LP|P\\.C)\\.?$/gi, '')\n\t}\n\n\t// Strip leading articles (only at start)\n\tnormalized = normalized.replace(/^(The|A|An)\\s+/i, '')\n\n\t// Normalize whitespace (collapse multiple spaces)\n\tnormalized = normalized.replace(/\\s+/g, ' ')\n\n\t// Trim and lowercase\n\treturn normalized.trim().toLowerCase()\n}\n\n/**\n * Extract plaintiff and defendant party names from case name.\n * Handles adversarial cases (v.) and procedural prefixes (In re, Ex parte, etc.).\n *\n * @param caseName - Case name string\n * @returns Party name data with raw and normalized fields\n *\n * @example\n * ```typescript\n * extractPartyNames(\"Smith v. Jones\")\n * // Returns: { plaintiff: \"Smith\", plaintiffNormalized: \"smith\", defendant: \"Jones\", defendantNormalized: \"jones\" }\n *\n * extractPartyNames(\"In re Smith\")\n * // Returns: { plaintiff: \"In re Smith\", plaintiffNormalized: \"smith\", proceduralPrefix: \"In re\" }\n *\n * extractPartyNames(\"People v. Smith\")\n * // Returns: { plaintiff: \"People\", plaintiffNormalized: \"people\", defendant: \"Smith\", defendantNormalized: \"smith\" }\n * ```\n */\nfunction extractPartyNames(caseName: string): {\n\tplaintiff?: string\n\tplaintiffNormalized?: string\n\tdefendant?: string\n\tdefendantNormalized?: string\n\tproceduralPrefix?: string\n} {\n\t// Procedural prefix patterns (anchored to start, case-insensitive)\n\tconst proceduralPrefixes = [\n\t\t'In re',\n\t\t'Ex parte',\n\t\t'Matter of',\n\t\t'State ex rel.',\n\t\t'United States ex rel.',\n\t\t'Application of',\n\t\t'Petition of',\n\t\t'Estate of',\n\t]\n\n\t// Check for procedural prefix first\n\tfor (const prefix of proceduralPrefixes) {\n\t\tconst prefixRegex = new RegExp(`^(${prefix})\\\\s+(.+)$`, 'i')\n\t\tconst match = prefixRegex.exec(caseName)\n\t\tif (match) {\n\t\t\tconst matchedPrefix = match[1]\n\t\t\tconst subject = match[2]\n\n\t\t\t// Check if there's a \"v.\" after the prefix (adversarial case)\n\t\t\tif (/\\s+v\\.?\\s+/i.test(subject)) {\n\t\t\t\t// Adversarial case with procedural-looking plaintiff (e.g., \"Estate of X v. Y\")\n\t\t\t\t// Split on \"v.\"\n\t\t\t\tconst vMatch = /^(.+?)\\s+v\\.?\\s+(.+)$/i.exec(caseName)\n\t\t\t\tif (vMatch) {\n\t\t\t\t\tconst plaintiff = vMatch[1].trim()\n\t\t\t\t\tconst defendant = vMatch[2].trim()\n\t\t\t\t\treturn {\n\t\t\t\t\t\tplaintiff,\n\t\t\t\t\t\tplaintiffNormalized: normalizePartyName(plaintiff),\n\t\t\t\t\t\tdefendant,\n\t\t\t\t\t\tdefendantNormalized: normalizePartyName(defendant),\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t} else {\n\t\t\t\t// Pure procedural (no \"v.\")\n\t\t\t\treturn {\n\t\t\t\t\tplaintiff: caseName,\n\t\t\t\t\tplaintiffNormalized: normalizePartyName(subject),\n\t\t\t\t\tproceduralPrefix: matchedPrefix,\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n\n\t// Split on \"v.\" for adversarial cases\n\tconst vRegex = /^(.+?)\\s+v\\.?\\s+(.+)$/i\n\tconst vMatch = vRegex.exec(caseName)\n\tif (vMatch) {\n\t\tlet plaintiff = vMatch[1].trim()\n\t\tconst defendant = vMatch[2].trim()\n\n\t\t// Strip signal words from plaintiff (e.g., \"In Smith\" → \"Smith\", \"See Jones\" → \"Jones\")\n\t\t// Preserve \"In re\" which is a procedural prefix, not a signal word\n\t\tplaintiff = plaintiff.replace(/^(?:In(?!\\s+re\\b)|See(?:\\s+[Aa]lso)?|Compare|But(?:\\s+[Ss]ee)?|Cf\\.?|Also)\\s+/i, '').trim()\n\n\t\treturn {\n\t\t\tplaintiff: plaintiff || vMatch[1].trim(), // Fallback to original if strip leaves nothing\n\t\t\tplaintiffNormalized: normalizePartyName(plaintiff || vMatch[1].trim()),\n\t\t\tdefendant,\n\t\t\tdefendantNormalized: normalizePartyName(defendant),\n\t\t}\n\t}\n\n\t// No \"v.\" and no procedural prefix - no parties extracted\n\treturn {}\n}\n\n/**\n * Extracts case citation metadata from a tokenized citation.\n *\n * Parses token text to extract:\n * - Volume: Leading digits (e.g., \"500\" from \"500 F.2d 123\")\n * - Reporter: Alphabetic abbreviation (e.g., \"F.2d\")\n * - Page: Trailing digits after reporter (e.g., \"123\")\n * - Pincite: Optional page reference after comma (e.g., \", 125\")\n * - Court: Optional court abbreviation in parentheses (e.g., \"(9th Cir.)\")\n * - Year: Optional year in parentheses (e.g., \"(2020)\")\n *\n * Confidence scoring:\n * - Base: 0.5\n * - Common reporter pattern (F., U.S., etc.): +0.3\n * - Valid year (not future): +0.2\n * - Capped at 1.0\n *\n * Position translation:\n * - Uses TransformationMap to convert clean positions → original positions\n * - cleanStart/cleanEnd from token span\n * - originalStart/originalEnd via transformationMap.cleanToOriginal\n *\n * Note: This function does NOT validate against reporters-db. That happens\n * in Phase 3 (resolution layer). Phase 2 extraction only parses structure.\n *\n * @param token - Token from tokenizer containing matched text and clean positions\n * @param transformationMap - Position mapping from clean → original text\n * @returns FullCaseCitation with parsed metadata and translated positions\n *\n * @example\n * ```typescript\n * const token = {\n * text: \"500 F.2d 123, 125\",\n * span: { cleanStart: 10, cleanEnd: 27 },\n * type: \"case\",\n * patternId: \"federal-reporter\"\n * }\n * const citation = extractCase(token, transformationMap)\n * // citation = {\n * // type: \"case\",\n * // text: \"500 F.2d 123, 125\",\n * // volume: 500,\n * // reporter: \"F.2d\",\n * // page: 123,\n * // pincite: 125,\n * // span: { cleanStart: 10, cleanEnd: 27, originalStart: 10, originalEnd: 27 },\n * // confidence: 0.8,\n * // ...\n * // }\n * ```\n */\nexport function extractCase(\n\ttoken: Token,\n\ttransformationMap: TransformationMap,\n\tcleanedText?: string,\n): FullCaseCitation {\n\tconst { text, span } = token\n\n\t// Parse volume-reporter-page using regex\n\t// Pattern: volume (digits) + reporter (letters/periods/spaces/numbers) + page (digits or blank placeholder)\n\t// Use greedy matching for reporter to capture full abbreviation including spaces\n\tconst match = VOLUME_REPORTER_PAGE_REGEX.exec(text)\n\n\tif (!match) {\n\t\t// Fallback if pattern doesn't match (shouldn't happen if tokenizer is correct)\n\t\tthrow new Error(`Failed to parse case citation: ${text}`)\n\t}\n\n\tconst volume = parseVolume(match[1])\n\tconst reporter = match[2].trim()\n\n\t// Check if page is a blank placeholder\n\tconst pageStr = match[3]\n\tconst isBlankPage = BLANK_PAGE_REGEX.test(pageStr)\n\tconst page = isBlankPage ? undefined : Number.parseInt(pageStr, 10)\n\tconst hasBlankPage = isBlankPage ? true : undefined\n\n\t// Extract optional pincite (page reference after comma)\n\t// Pattern: \", digits\" (e.g., \", 125\")\n\tconst pinciteMatch = PINCITE_REGEX.exec(text)\n\tlet pincite = pinciteMatch ? Number.parseInt(pinciteMatch[1], 10) : undefined\n\n\t// Initialize Phase 6 fields\n\tlet year: number | undefined\n\tlet court: string | undefined\n\tlet date: StructuredDate | undefined\n\tlet disposition: string | undefined\n\tlet caseName: string | undefined\n\tlet fullSpan: Span | undefined\n\n\t// Extract parenthetical from token text\n\tlet parentheticalContent: string | undefined\n\t// Match any parenthetical (with or without letters)\n\tconst parenMatch = PAREN_REGEX.exec(text)\n\tif (parenMatch) {\n\t\tparentheticalContent = parenMatch[1]\n\t\t// Parse parenthetical using unified parser\n\t\tconst parenResult = parseParenthetical(parentheticalContent)\n\t\tyear = parenResult.year\n\t\tcourt = parenResult.court\n\t\tdate = parenResult.date\n\t\tdisposition = parenResult.disposition\n\t}\n\n\t// Look ahead in cleaned text for parenthetical after the token\n\t// Tokenization patterns only capture volume-reporter-page, so parentheticals\n\t// like \"(1989)\" or \"(9th Cir. 2020)\" are not in the token text.\n\tif (cleanedText && !parentheticalContent) {\n\t\tconst afterToken = cleanedText.substring(span.cleanEnd)\n\t\tconst lookAheadMatch = LOOKAHEAD_PAREN_REGEX.exec(afterToken)\n\t\tif (lookAheadMatch) {\n\t\t\tparentheticalContent = lookAheadMatch[1]\n\t\t\t// Parse parenthetical using unified parser\n\t\t\tconst parenResult = parseParenthetical(parentheticalContent)\n\t\t\tyear = parenResult.year\n\t\t\tcourt = parenResult.court\n\t\t\tdate = parenResult.date\n\t\t\tdisposition = parenResult.disposition\n\n\t\t\t// Extract pincite from look-ahead if not already found in token text\n\t\t\tif (pincite === undefined) {\n\t\t\t\tconst laPinciteMatch = LOOKAHEAD_PINCITE_REGEX.exec(afterToken)\n\t\t\t\tif (laPinciteMatch) {\n\t\t\t\t\tpincite = Number.parseInt(laPinciteMatch[1], 10)\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\t}\n\n\t// Check for chained parentheticals with disposition (e.g., \"(2020) (en banc)\")\n\tif (cleanedText && !disposition) {\n\t\tconst afterToken = cleanedText.substring(span.cleanEnd)\n\t\t// Look for second parenthetical after first one\n\t\tconst chainedMatch = CHAINED_DISPOSITION_REGEX.exec(afterToken)\n\t\tif (chainedMatch) {\n\t\t\tdisposition = chainedMatch[1].toLowerCase()\n\t\t}\n\t}\n\n\t// Infer court from reporter for known Supreme Court reporters\n\tif (!court && SCOTUS_REPORTER_REGEX.test(reporter)) {\n\t\tcourt = 'scotus'\n\t}\n\n\t// Phase 6: Extract case name via backward search\n\tif (cleanedText) {\n\t\tconst caseNameResult = extractCaseName(cleanedText, span.cleanStart)\n\t\tif (caseNameResult) {\n\t\t\tcaseName = caseNameResult.caseName\n\n\t\t\t// Calculate fullSpan: case name start through parenthetical end\n\t\t\tconst parenEnd = findParentheticalEnd(cleanedText, span.cleanEnd)\n\t\t\tconst fullCleanStart = caseNameResult.nameStart\n\t\t\tconst fullCleanEnd = parenEnd > span.cleanEnd ? parenEnd : span.cleanEnd\n\n\t\t\t// Translate to original positions\n\t\t\tconst fullOriginalStart =\n\t\t\t\ttransformationMap.cleanToOriginal.get(fullCleanStart) ?? fullCleanStart\n\t\t\tconst fullOriginalEnd =\n\t\t\t\ttransformationMap.cleanToOriginal.get(fullCleanEnd) ?? fullCleanEnd\n\n\t\t\tfullSpan = {\n\t\t\t\tcleanStart: fullCleanStart,\n\t\t\t\tcleanEnd: fullCleanEnd,\n\t\t\t\toriginalStart: fullOriginalStart,\n\t\t\t\toriginalEnd: fullOriginalEnd,\n\t\t\t}\n\t\t}\n\t}\n\n\t// Phase 7: Extract party names from case name\n\tlet plaintiff: string | undefined\n\tlet plaintiffNormalized: string | undefined\n\tlet defendant: string | undefined\n\tlet defendantNormalized: string | undefined\n\tlet proceduralPrefix: string | undefined\n\n\tif (caseName) {\n\t\tconst partyResult = extractPartyNames(caseName)\n\t\tplaintiff = partyResult.plaintiff\n\t\tplaintiffNormalized = partyResult.plaintiffNormalized\n\t\tdefendant = partyResult.defendant\n\t\tdefendantNormalized = partyResult.defendantNormalized\n\t\tproceduralPrefix = partyResult.proceduralPrefix\n\t}\n\n\t// Translate positions from clean → original (citation core only - span unchanged)\n\tconst originalStart =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanStart) ?? span.cleanStart\n\tconst originalEnd =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanEnd) ?? span.cleanEnd\n\n\t// Calculate confidence score\n\tlet confidence = 0.5 // Base confidence\n\n\t// Common reporter patterns (F., U.S., S. Ct., etc.)\n\tconst commonReporters = [\n\t\t'F.',\n\t\t'F.2d',\n\t\t'F.3d',\n\t\t'F.4th',\n\t\t'U.S.',\n\t\t'S. Ct.',\n\t\t'L. Ed.',\n\t\t'P.',\n\t\t'P.2d',\n\t\t'P.3d',\n\t\t'A.',\n\t\t'A.2d',\n\t\t'A.3d',\n\t\t'N.E.',\n\t\t'N.E.2d',\n\t\t'N.E.3d',\n\t\t'N.W.',\n\t\t'N.W.2d',\n\t\t'S.E.',\n\t\t'S.E.2d',\n\t\t'S.W.',\n\t\t'S.W.2d',\n\t\t'S.W.3d',\n\t\t'So.',\n\t\t'So. 2d',\n\t\t'So. 3d',\n\t]\n\n\tif (commonReporters.some((r) => reporter.includes(r))) {\n\t\tconfidence += 0.3\n\t}\n\n\t// Valid year check (not in future)\n\tif (year !== undefined) {\n\t\tconst currentYear = new Date().getFullYear()\n\t\tif (year <= currentYear) {\n\t\t\tconfidence += 0.2\n\t\t}\n\t}\n\n\t// Cap at 1.0\n\tconfidence = Math.min(confidence, 1.0)\n\n\t// Override confidence for blank page citations\n\tif (hasBlankPage) {\n\t\tconfidence = 0.8\n\t}\n\n\treturn {\n\t\ttype: 'case',\n\t\ttext,\n\t\tspan: {\n\t\t\tcleanStart: span.cleanStart,\n\t\t\tcleanEnd: span.cleanEnd,\n\t\t\toriginalStart,\n\t\t\toriginalEnd,\n\t\t},\n\t\tconfidence,\n\t\tmatchedText: text,\n\t\tprocessTimeMs: 0, // Placeholder - timing handled by orchestration layer\n\t\tpatternsChecked: 1, // Single token processed\n\t\tvolume,\n\t\treporter,\n\t\tpage,\n\t\tpincite,\n\t\tcourt,\n\t\tyear,\n\t\thasBlankPage,\n\t\tdate,\n\t\tfullSpan,\n\t\tcaseName,\n\t\tdisposition,\n\t\tplaintiff,\n\t\tplaintiffNormalized,\n\t\tdefendant,\n\t\tdefendantNormalized,\n\t\tproceduralPrefix,\n\t}\n}\n","/**\n * Statute Citation Extraction\n *\n * Parses tokenized statute citations to extract title, code, section, and\n * optional subsections. Examples: \"42 U.S.C. § 1983\", \"Cal. Civ. Code § 1234(a)(1)\"\n *\n * @module extract/extractStatute\n */\n\nimport type { Token } from '@/tokenize'\nimport type { StatuteCitation } from '@/types/citation'\nimport type { TransformationMap } from '@/types/span'\n\n/**\n * Extracts statute citation metadata from a tokenized citation.\n *\n * Parses token text to extract:\n * - Title: Optional leading digits (e.g., \"42\" from \"42 U.S.C. § 1983\")\n * - Code: Statutory code abbreviation (e.g., \"U.S.C.\", \"Cal. Civ. Code\")\n * - Section: Section number after § symbol (e.g., \"1983\")\n * - Subsections: Optional parenthetical subdivisions (e.g., \"(a)(1)\")\n *\n * Confidence scoring:\n * - Base: 0.5\n * - Known code pattern (U.S.C., C.F.R., state codes): +0.3\n * - Capped at 1.0\n *\n * @param token - Token from tokenizer containing matched text and clean positions\n * @param transformationMap - Position mapping from clean → original text\n * @returns StatuteCitation with parsed metadata and translated positions\n *\n * @example\n * ```typescript\n * const token = {\n * text: \"42 U.S.C. § 1983\",\n * span: { cleanStart: 10, cleanEnd: 26 },\n * type: \"statute\",\n * patternId: \"usc\"\n * }\n * const citation = extractStatute(token, transformationMap)\n * // citation = {\n * // type: \"statute\",\n * // title: 42,\n * // code: \"U.S.C.\",\n * // section: \"1983\",\n * // ...\n * // }\n * ```\n */\nexport function extractStatute(\n\ttoken: Token,\n\ttransformationMap: TransformationMap,\n): StatuteCitation {\n\tconst { text, span } = token\n\n\t// Parse title-code-section using regex\n\t// Pattern: optional title (digits) + code (letters/periods/spaces) + § + section\n\tconst statuteRegex = /^(?:(\\d+)\\s+)?([A-Za-z.\\s]+?)\\s*§\\s*(\\d+[A-Za-z0-9-]*)/\n\tconst match = statuteRegex.exec(text)\n\n\tif (!match) {\n\t\tthrow new Error(`Failed to parse statute citation: ${text}`)\n\t}\n\n\tconst title = match[1] ? Number.parseInt(match[1], 10) : undefined\n\tconst code = match[2].trim()\n\tconst section = match[3]\n\n\t// Translate positions from clean → original\n\tconst originalStart =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanStart) ?? span.cleanStart\n\tconst originalEnd =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanEnd) ?? span.cleanEnd\n\n\t// Calculate confidence score\n\tlet confidence = 0.5 // Base confidence\n\n\t// Known statutory code patterns\n\tconst knownCodes = [\n\t\t'U.S.C.',\n\t\t'C.F.R.',\n\t\t'Cal. Civ. Code',\n\t\t'Cal. Penal Code',\n\t\t'N.Y. Civ. Prac. L. & R.',\n\t\t'Tex. Civ. Prac. & Rem. Code',\n\t]\n\n\tif (knownCodes.some((c) => code.includes(c))) {\n\t\tconfidence += 0.3\n\t}\n\n\tconfidence = Math.min(confidence, 1.0)\n\n\treturn {\n\t\ttype: 'statute',\n\t\ttext,\n\t\tspan: {\n\t\t\tcleanStart: span.cleanStart,\n\t\t\tcleanEnd: span.cleanEnd,\n\t\t\toriginalStart,\n\t\t\toriginalEnd,\n\t\t},\n\t\tconfidence,\n\t\tmatchedText: text,\n\t\tprocessTimeMs: 0,\n\t\tpatternsChecked: 1,\n\t\ttitle,\n\t\tcode,\n\t\tsection,\n\t}\n}\n","/**\n * Journal Citation Extraction\n *\n * Parses tokenized journal citations to extract volume, journal name, page,\n * and optional metadata. Examples: \"123 Harv. L. Rev. 456\", \"75 Yale L.J. 789, 791\"\n *\n * @module extract/extractJournal\n */\n\nimport type { Token } from '@/tokenize'\nimport type { JournalCitation } from '@/types/citation'\nimport type { TransformationMap } from '@/types/span'\n\n/**\n * Extracts journal citation metadata from a tokenized citation.\n *\n * Parses token text to extract:\n * - Volume: Leading digits (e.g., \"123\" from \"123 Harv. L. Rev. 456\")\n * - Journal: Journal abbreviation (e.g., \"Harv. L. Rev.\")\n * - Page: Starting page number (e.g., \"456\")\n * - Pincite: Optional specific page reference after comma (e.g., \", 458\")\n *\n * Confidence scoring:\n * - Base: 0.6 (journal validation happens in Phase 3)\n *\n * Note: Author and title extraction from preceding text is not implemented\n * in Phase 2. That requires context analysis in Phase 3.\n *\n * @param token - Token from tokenizer containing matched text and clean positions\n * @param transformationMap - Position mapping from clean → original text\n * @returns JournalCitation with parsed metadata and translated positions\n *\n * @example\n * ```typescript\n * const token = {\n * text: \"123 Harv. L. Rev. 456\",\n * span: { cleanStart: 10, cleanEnd: 31 },\n * type: \"journal\",\n * patternId: \"journal-standard\"\n * }\n * const citation = extractJournal(token, transformationMap)\n * // citation = {\n * // type: \"journal\",\n * // volume: 123,\n * // journal: \"Harv. L. Rev.\",\n * // abbreviation: \"Harv. L. Rev.\",\n * // page: 456,\n * // ...\n * // }\n * ```\n */\nexport function extractJournal(\n\ttoken: Token,\n\ttransformationMap: TransformationMap,\n): JournalCitation {\n\tconst { text, span } = token\n\n\t// Parse volume-journal-page using regex\n\t// Pattern: volume (digits) + journal (letters/periods/spaces) + page (digits)\n\tconst journalRegex = /^(\\d+(?:-\\d+)?)\\s+([A-Za-z.\\s]+?)\\s+(\\d+)/\n\tconst match = journalRegex.exec(text)\n\n\tif (!match) {\n\t\tthrow new Error(`Failed to parse journal citation: ${text}`)\n\t}\n\n\tconst rawVolume = match[1]\n\tconst volume = /^\\d+$/.test(rawVolume) ? Number.parseInt(rawVolume, 10) : rawVolume\n\tconst journal = match[2].trim()\n\tconst page = Number.parseInt(match[3], 10)\n\n\t// Extract optional pincite (page reference after comma)\n\tconst pinciteRegex = /,\\s*(\\d+)/\n\tconst pinciteMatch = pinciteRegex.exec(text)\n\tconst pincite = pinciteMatch ? Number.parseInt(pinciteMatch[1], 10) : undefined\n\n\t// Translate positions from clean → original\n\tconst originalStart =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanStart) ?? span.cleanStart\n\tconst originalEnd =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanEnd) ?? span.cleanEnd\n\n\t// Confidence: 0.6 base (journal validation against database happens in Phase 3)\n\tconst confidence = 0.6\n\n\treturn {\n\t\ttype: 'journal',\n\t\ttext,\n\t\tspan: {\n\t\t\tcleanStart: span.cleanStart,\n\t\t\tcleanEnd: span.cleanEnd,\n\t\t\toriginalStart,\n\t\t\toriginalEnd,\n\t\t},\n\t\tconfidence,\n\t\tmatchedText: text,\n\t\tprocessTimeMs: 0,\n\t\tpatternsChecked: 1,\n\t\tvolume,\n\t\tjournal,\n\t\tabbreviation: journal, // For Phase 2, abbreviation = journal name\n\t\tpage,\n\t\tpincite,\n\t}\n}\n","/**\n * Neutral Citation Extraction\n *\n * Parses tokenized neutral (vendor-neutral) citations to extract year, court,\n * and document number. Examples: \"2020 WL 123456\", \"2020 U.S. LEXIS 456\"\n *\n * @module extract/extractNeutral\n */\n\nimport type { Token } from '@/tokenize'\nimport type { NeutralCitation } from '@/types/citation'\nimport type { TransformationMap } from '@/types/span'\n\n/**\n * Extracts neutral citation metadata from a tokenized citation.\n *\n * Parses token text to extract:\n * - Year: 4-digit year (e.g., \"2020\")\n * - Court: Vendor identifier (e.g., \"WL\", \"U.S. LEXIS\")\n * - Document number: Unique document identifier (e.g., \"123456\")\n *\n * Confidence scoring:\n * - 1.0 (neutral format is unambiguous and standardized)\n *\n * @param token - Token from tokenizer containing matched text and clean positions\n * @param transformationMap - Position mapping from clean → original text\n * @returns NeutralCitation with parsed metadata and translated positions\n *\n * @example\n * ```typescript\n * const token = {\n * text: \"2020 WL 123456\",\n * span: { cleanStart: 10, cleanEnd: 24 },\n * type: \"neutral\",\n * patternId: \"westlaw-neutral\"\n * }\n * const citation = extractNeutral(token, transformationMap)\n * // citation = {\n * // type: \"neutral\",\n * // year: 2020,\n * // court: \"WL\",\n * // documentNumber: \"123456\",\n * // confidence: 1.0,\n * // ...\n * // }\n * ```\n */\nexport function extractNeutral(\n\ttoken: Token,\n\ttransformationMap: TransformationMap,\n): NeutralCitation {\n\tconst { text, span } = token\n\n\t// Parse year-court-documentNumber using regex\n\t// Pattern: 4-digit year + court identifier (WL, LEXIS, etc.) + document number\n\tconst neutralRegex = /^(\\d{4})\\s+(WL|LEXIS|U\\.S\\.\\s+LEXIS)\\s+(\\d+)/\n\tconst match = neutralRegex.exec(text)\n\n\tif (!match) {\n\t\tthrow new Error(`Failed to parse neutral citation: ${text}`)\n\t}\n\n\tconst year = Number.parseInt(match[1], 10)\n\tconst court = match[2]\n\tconst documentNumber = match[3]\n\n\t// Translate positions from clean → original\n\tconst originalStart =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanStart) ?? span.cleanStart\n\tconst originalEnd =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanEnd) ?? span.cleanEnd\n\n\t// Confidence: 1.0 (neutral format is unambiguous)\n\tconst confidence = 1.0\n\n\treturn {\n\t\ttype: 'neutral',\n\t\ttext,\n\t\tspan: {\n\t\t\tcleanStart: span.cleanStart,\n\t\t\tcleanEnd: span.cleanEnd,\n\t\t\toriginalStart,\n\t\t\toriginalEnd,\n\t\t},\n\t\tconfidence,\n\t\tmatchedText: text,\n\t\tprocessTimeMs: 0,\n\t\tpatternsChecked: 1,\n\t\tyear,\n\t\tcourt,\n\t\tdocumentNumber,\n\t}\n}\n","/**\n * Public Law Citation Extraction\n *\n * Parses tokenized public law citations to extract congress number and law number.\n * Examples: \"Pub. L. No. 116-283\", \"Pub. L. 117-58\"\n *\n * @module extract/extractPublicLaw\n */\n\nimport type { Token } from '@/tokenize'\nimport type { PublicLawCitation } from '@/types/citation'\nimport type { TransformationMap } from '@/types/span'\n\n/**\n * Extracts public law citation metadata from a tokenized citation.\n *\n * Parses token text to extract:\n * - Congress: Congress number (e.g., \"116\" from \"Pub. L. No. 116-283\")\n * - Law number: Law number within that Congress (e.g., \"283\")\n *\n * Confidence scoring:\n * - 0.9 (public law format is fairly standard)\n *\n * Note: Bill title extraction from nearby text is not implemented in Phase 2.\n * That requires context analysis in Phase 3.\n *\n * @param token - Token from tokenizer containing matched text and clean positions\n * @param transformationMap - Position mapping from clean → original text\n * @returns PublicLawCitation with parsed metadata and translated positions\n *\n * @example\n * ```typescript\n * const token = {\n * text: \"Pub. L. No. 116-283\",\n * span: { cleanStart: 10, cleanEnd: 29 },\n * type: \"publicLaw\",\n * patternId: \"public-law\"\n * }\n * const citation = extractPublicLaw(token, transformationMap)\n * // citation = {\n * // type: \"publicLaw\",\n * // congress: 116,\n * // lawNumber: 283,\n * // confidence: 0.9,\n * // ...\n * // }\n * ```\n */\nexport function extractPublicLaw(\n\ttoken: Token,\n\ttransformationMap: TransformationMap,\n): PublicLawCitation {\n\tconst { text, span } = token\n\n\t// Parse congress-lawNumber using regex\n\t// Pattern: \"Pub. L.\" (with optional \"No.\") + congress number + \"-\" + law number\n\tconst publicLawRegex = /Pub\\.\\s?L\\.(?:\\s?No\\.)?\\s?(\\d+)-(\\d+)/\n\tconst match = publicLawRegex.exec(text)\n\n\tif (!match) {\n\t\tthrow new Error(`Failed to parse public law citation: ${text}`)\n\t}\n\n\tconst congress = Number.parseInt(match[1], 10)\n\tconst lawNumber = Number.parseInt(match[2], 10)\n\n\t// Translate positions from clean → original\n\tconst originalStart =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanStart) ?? span.cleanStart\n\tconst originalEnd =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanEnd) ?? span.cleanEnd\n\n\t// Confidence: 0.9 (public law format is fairly standard)\n\tconst confidence = 0.9\n\n\treturn {\n\t\ttype: 'publicLaw',\n\t\ttext,\n\t\tspan: {\n\t\t\tcleanStart: span.cleanStart,\n\t\t\tcleanEnd: span.cleanEnd,\n\t\t\toriginalStart,\n\t\t\toriginalEnd,\n\t\t},\n\t\tconfidence,\n\t\tmatchedText: text,\n\t\tprocessTimeMs: 0,\n\t\tpatternsChecked: 1,\n\t\tcongress,\n\t\tlawNumber,\n\t}\n}\n","/**\n * Federal Register Citation Extraction\n *\n * Parses tokenized Federal Register citations to extract volume, page, and\n * optional year. Examples: \"85 Fed. Reg. 12345\", \"86 Fed. Reg. 56789 (Jan. 15, 2021)\"\n *\n * @module extract/extractFederalRegister\n */\n\nimport type { Token } from '@/tokenize'\nimport type { FederalRegisterCitation } from '@/types/citation'\nimport type { TransformationMap } from '@/types/span'\n\n/**\n * Extracts Federal Register citation metadata from a tokenized citation.\n *\n * Parses token text to extract:\n * - Volume: Federal Register volume number (e.g., \"85\")\n * - Page: Page number (e.g., \"12345\")\n * - Year: Optional publication year in parentheses (e.g., \"(2021)\")\n *\n * Confidence scoring:\n * - 0.9 (Federal Register format is standardized)\n *\n * @param token - Token from tokenizer containing matched text and clean positions\n * @param transformationMap - Position mapping from clean → original text\n * @returns FederalRegisterCitation with parsed metadata and translated positions\n *\n * @example\n * ```typescript\n * const token = {\n * text: \"85 Fed. Reg. 12345\",\n * span: { cleanStart: 10, cleanEnd: 28 },\n * type: \"federalRegister\",\n * patternId: \"federal-register\"\n * }\n * const citation = extractFederalRegister(token, transformationMap)\n * // citation = {\n * // type: \"federalRegister\",\n * // volume: 85,\n * // page: 12345,\n * // confidence: 0.9,\n * // ...\n * // }\n * ```\n */\nexport function extractFederalRegister(\n\ttoken: Token,\n\ttransformationMap: TransformationMap,\n): FederalRegisterCitation {\n\tconst { text, span } = token\n\n\t// Parse volume-page using regex\n\t// Pattern: volume (digits) + \"Fed. Reg.\" + page (digits)\n\tconst federalRegisterRegex = /^(\\d+(?:-\\d+)?)\\s+Fed\\.\\s?Reg\\.\\s+(\\d+)/\n\tconst match = federalRegisterRegex.exec(text)\n\n\tif (!match) {\n\t\tthrow new Error(`Failed to parse Federal Register citation: ${text}`)\n\t}\n\n\tconst rawVolume = match[1]\n\tconst volume = /^\\d+$/.test(rawVolume) ? Number.parseInt(rawVolume, 10) : rawVolume\n\tconst page = Number.parseInt(match[2], 10)\n\n\t// Extract optional year in parentheses\n\t// Pattern: \"(year)\" or \"(month day, year)\"\n\tconst yearRegex = /\\((?:.*?\\s)?(\\d{4})\\)/\n\tconst yearMatch = yearRegex.exec(text)\n\tconst year = yearMatch ? Number.parseInt(yearMatch[1], 10) : undefined\n\n\t// Translate positions from clean → original\n\tconst originalStart =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanStart) ?? span.cleanStart\n\tconst originalEnd =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanEnd) ?? span.cleanEnd\n\n\t// Confidence: 0.9 (Federal Register format is standardized)\n\tconst confidence = 0.9\n\n\treturn {\n\t\ttype: 'federalRegister',\n\t\ttext,\n\t\tspan: {\n\t\t\tcleanStart: span.cleanStart,\n\t\t\tcleanEnd: span.cleanEnd,\n\t\t\toriginalStart,\n\t\t\toriginalEnd,\n\t\t},\n\t\tconfidence,\n\t\tmatchedText: text,\n\t\tprocessTimeMs: 0,\n\t\tpatternsChecked: 1,\n\t\tvolume,\n\t\tpage,\n\t\tyear,\n\t}\n}\n","/**\n * Statutes at Large Citation Extractor\n *\n * Extracts session law citations from the Statutes at Large (e.g., \"124 Stat. 119\").\n * These are chronological compilations of federal laws, distinct from both\n * codified statutes (U.S.C.) and case reporters.\n *\n * Format: volume Stat. page [(year)]\n *\n * @module extract/extractStatutesAtLarge\n */\n\nimport type { StatutesAtLargeCitation } from \"@/types/citation\"\nimport type { TransformationMap } from \"@/types/span\"\nimport type { Token } from \"@/tokenize/tokenizer\"\n\nexport function extractStatutesAtLarge(\n\ttoken: Token,\n\ttransformationMap: TransformationMap,\n): StatutesAtLargeCitation {\n\tconst { text, span } = token\n\n\t// Parse volume-Stat.-page\n\tconst statRegex = /^(\\d+(?:-\\d+)?)\\s+Stat\\.\\s+(\\d+)/\n\tconst match = statRegex.exec(text)\n\n\tif (!match) {\n\t\tthrow new Error(`Failed to parse Statutes at Large citation: ${text}`)\n\t}\n\n\tconst rawVolume = match[1]\n\tconst volume = /^\\d+$/.test(rawVolume) ? Number.parseInt(rawVolume, 10) : rawVolume\n\tconst page = Number.parseInt(match[2], 10)\n\n\t// Extract optional year in parentheses\n\tconst yearRegex = /\\((?:.*?\\s)?(\\d{4})\\)/\n\tconst yearMatch = yearRegex.exec(text)\n\tconst year = yearMatch ? Number.parseInt(yearMatch[1], 10) : undefined\n\n\t// Translate positions from clean → original\n\tconst originalStart =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanStart) ?? span.cleanStart\n\tconst originalEnd =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanEnd) ?? span.cleanEnd\n\n\t// Confidence: 0.9 (Statutes at Large format is standardized)\n\tconst confidence = 0.9\n\n\treturn {\n\t\ttype: 'statutesAtLarge',\n\t\ttext,\n\t\tspan: {\n\t\t\tcleanStart: span.cleanStart,\n\t\t\tcleanEnd: span.cleanEnd,\n\t\t\toriginalStart,\n\t\t\toriginalEnd,\n\t\t},\n\t\tconfidence,\n\t\tmatchedText: text,\n\t\tprocessTimeMs: 0,\n\t\tpatternsChecked: 1,\n\t\tvolume,\n\t\tpage,\n\t\tyear,\n\t}\n}\n","/**\n * Short-form Citation Extraction\n *\n * Parses tokenized short-form citations (Id., supra, short-form case) to extract\n * metadata. Short-form citations refer to earlier citations in the document.\n *\n * @module extract/extractShortForms\n */\n\nimport type { Token } from '@/tokenize'\nimport type { IdCitation, SupraCitation, ShortFormCaseCitation } from '@/types/citation'\nimport type { TransformationMap } from '@/types/span'\n\n/**\n * Extracts Id. citation metadata from a tokenized citation.\n *\n * Parses token text to extract:\n * - Pincite: Optional page reference (e.g., \"253\" from \"Id. at 253\")\n *\n * Confidence scoring:\n * - 1.0 (Id. format is unambiguous and standardized)\n *\n * @param token - Token from tokenizer containing matched text and clean positions\n * @param transformationMap - Position mapping from clean → original text\n * @returns IdCitation with parsed metadata and translated positions\n *\n * @example\n * ```typescript\n * const token = {\n * text: \"Id. at 253\",\n * span: { cleanStart: 10, cleanEnd: 20 },\n * type: \"case\",\n * patternId: \"id\"\n * }\n * const citation = extractId(token, transformationMap)\n * // citation = {\n * // type: \"id\",\n * // pincite: 253,\n * // confidence: 1.0,\n * // ...\n * // }\n * ```\n */\nexport function extractId(\n\ttoken: Token,\n\ttransformationMap: TransformationMap,\n): IdCitation {\n\tconst { text, span } = token\n\n\t// Parse Id. with optional pincite\n\t// Pattern: Id. or Ibid. with optional \"at [page]\"\n\tconst idRegex = /[Ii](?:d|bid)\\.(?:\\s+at\\s+(\\d+))?/\n\tconst match = idRegex.exec(text)\n\n\tif (!match) {\n\t\tthrow new Error(`Failed to parse Id. citation: ${text}`)\n\t}\n\n\t// Extract pincite if present\n\tconst pincite = match[1] ? Number.parseInt(match[1], 10) : undefined\n\n\t// Translate positions from clean → original\n\tconst originalStart =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanStart) ?? span.cleanStart\n\tconst originalEnd =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanEnd) ?? span.cleanEnd\n\n\t// Confidence: 1.0 (Id. format is unambiguous)\n\tconst confidence = 1.0\n\n\treturn {\n\t\ttype: 'id',\n\t\ttext,\n\t\tspan: {\n\t\t\tcleanStart: span.cleanStart,\n\t\t\tcleanEnd: span.cleanEnd,\n\t\t\toriginalStart,\n\t\t\toriginalEnd,\n\t\t},\n\t\tconfidence,\n\t\tmatchedText: text,\n\t\tprocessTimeMs: 0,\n\t\tpatternsChecked: 1,\n\t\tpincite,\n\t}\n}\n\n/**\n * Extracts supra citation metadata from a tokenized citation.\n *\n * Parses token text to extract:\n * - Party name: Name preceding \"supra\" (e.g., \"Smith\" from \"Smith, supra\")\n * - Pincite: Optional page reference (e.g., \"460\" from \"Smith, supra, at 460\")\n *\n * Confidence scoring:\n * - 0.9 (supra format is fairly standard but party name extraction can vary)\n *\n * @param token - Token from tokenizer containing matched text and clean positions\n * @param transformationMap - Position mapping from clean → original text\n * @returns SupraCitation with parsed metadata and translated positions\n *\n * @example\n * ```typescript\n * const token = {\n * text: \"Smith, supra, at 460\",\n * span: { cleanStart: 10, cleanEnd: 30 },\n * type: \"case\",\n * patternId: \"supra\"\n * }\n * const citation = extractSupra(token, transformationMap)\n * // citation = {\n * // type: \"supra\",\n * // partyName: \"Smith\",\n * // pincite: 460,\n * // confidence: 0.9,\n * // ...\n * // }\n * ```\n */\nexport function extractSupra(\n\ttoken: Token,\n\ttransformationMap: TransformationMap,\n): SupraCitation {\n\tconst { text, span } = token\n\n\t// Parse party name and optional pincite\n\t// Pattern: word(s), supra [, at page]\n\t// Note: Matches party names including \"v.\" (e.g., \"Smith v. Jones\")\n\tconst supraRegex = /\\b([A-Z][a-zA-Z]+(?:(?:\\s+v\\.?\\s+|\\s+)[A-Z][a-zA-Z]+)*)\\s*,?\\s+supra(?:,?\\s+at\\s+(\\d+))?/\n\tconst match = supraRegex.exec(text)\n\n\tif (!match) {\n\t\tthrow new Error(`Failed to parse supra citation: ${text}`)\n\t}\n\n\tconst partyName = match[1]\n\tconst pincite = match[2] ? Number.parseInt(match[2], 10) : undefined\n\n\t// Translate positions from clean → original\n\tconst originalStart =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanStart) ?? span.cleanStart\n\tconst originalEnd =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanEnd) ?? span.cleanEnd\n\n\t// Confidence: 0.9 (supra format is fairly standard)\n\tconst confidence = 0.9\n\n\treturn {\n\t\ttype: 'supra',\n\t\ttext,\n\t\tspan: {\n\t\t\tcleanStart: span.cleanStart,\n\t\t\tcleanEnd: span.cleanEnd,\n\t\t\toriginalStart,\n\t\t\toriginalEnd,\n\t\t},\n\t\tconfidence,\n\t\tmatchedText: text,\n\t\tprocessTimeMs: 0,\n\t\tpatternsChecked: 1,\n\t\tpartyName,\n\t\tpincite,\n\t}\n}\n\n/**\n * Extracts short-form case citation metadata from a tokenized citation.\n *\n * Parses token text to extract:\n * - Volume: Volume number\n * - Reporter: Reporter abbreviation\n * - Pincite: Page reference (from \"at [page]\" pattern)\n *\n * Confidence scoring:\n * - 0.7 (short-form case citations are more ambiguous than full citations)\n *\n * @param token - Token from tokenizer containing matched text and clean positions\n * @param transformationMap - Position mapping from clean → original text\n * @returns ShortFormCaseCitation with parsed metadata and translated positions\n *\n * @example\n * ```typescript\n * const token = {\n * text: \"500 F.2d at 125\",\n * span: { cleanStart: 10, cleanEnd: 25 },\n * type: \"case\",\n * patternId: \"short-form-case\"\n * }\n * const citation = extractShortFormCase(token, transformationMap)\n * // citation = {\n * // type: \"shortFormCase\",\n * // volume: 500,\n * // reporter: \"F.2d\",\n * // pincite: 125,\n * // confidence: 0.7,\n * // ...\n * // }\n * ```\n */\nexport function extractShortFormCase(\n\ttoken: Token,\n\ttransformationMap: TransformationMap,\n): ShortFormCaseCitation {\n\tconst { text, span } = token\n\n\t// Parse volume-reporter-at-page\n\t// Pattern: number space abbreviation space \"at\" space number\n\tconst shortFormRegex = /(\\d+(?:-\\d+)?)\\s+([A-Z][A-Za-z.\\s]+?(?:\\d[a-z])?)\\s+at\\s+(\\d+)/\n\tconst match = shortFormRegex.exec(text)\n\n\tif (!match) {\n\t\tthrow new Error(`Failed to parse short-form case citation: ${text}`)\n\t}\n\n\tconst rawVolume = match[1]\n\tconst volume = /^\\d+$/.test(rawVolume) ? Number.parseInt(rawVolume, 10) : rawVolume\n\tconst reporter = match[2].trim() // Remove trailing spaces\n\tconst pincite = Number.parseInt(match[3], 10)\n\n\t// Translate positions from clean → original\n\tconst originalStart =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanStart) ?? span.cleanStart\n\tconst originalEnd =\n\t\ttransformationMap.cleanToOriginal.get(span.cleanEnd) ?? span.cleanEnd\n\n\t// Confidence: 0.7 (short-form citations are more ambiguous)\n\tconst confidence = 0.7\n\n\treturn {\n\t\ttype: 'shortFormCase',\n\t\ttext,\n\t\tspan: {\n\t\t\tcleanStart: span.cleanStart,\n\t\t\tcleanEnd: span.cleanEnd,\n\t\t\toriginalStart,\n\t\t\toriginalEnd,\n\t\t},\n\t\tconfidence,\n\t\tmatchedText: text,\n\t\tprocessTimeMs: 0,\n\t\tpatternsChecked: 1,\n\t\tvolume,\n\t\treporter,\n\t\tpincite,\n\t}\n}\n","/**\n * Scope Boundary Detection\n *\n * Detects paragraph/section boundaries in text and validates whether\n * an antecedent citation is within the resolution scope.\n */\n\nimport type { Citation } from '../types/citation'\nimport type { ScopeStrategy } from './types'\n\n/**\n * Detects paragraph boundaries from text and assigns each citation to a paragraph.\n *\n * @param text - Original document text\n * @param citations - Extracted citations with position spans\n * @param boundaryPattern - Regex pattern to detect boundaries (default: /\\n\\n+/)\n * @returns Map of citation index to paragraph number (0-based)\n */\nexport function detectParagraphBoundaries(\n text: string,\n citations: Citation[],\n boundaryPattern: RegExp = /\\n\\n+/g\n): Map<number, number> {\n const paragraphMap = new Map<number, number>()\n\n // Find all paragraph boundaries (positions in text)\n const boundaries: number[] = [0] // Start of document is first boundary\n let match: RegExpExecArray | null\n\n while ((match = boundaryPattern.exec(text)) !== null) {\n // Boundary is at end of match (start of next paragraph)\n boundaries.push(match.index + match[0].length)\n }\n\n boundaries.push(text.length) // End of document\n\n // Assign each citation to a paragraph\n for (let i = 0; i < citations.length; i++) {\n const citation = citations[i]\n const citationStart = citation.span.originalStart\n\n // Find which paragraph this citation belongs to\n let paragraphNum = 0\n for (let j = 0; j < boundaries.length - 1; j++) {\n if (citationStart >= boundaries[j] && citationStart < boundaries[j + 1]) {\n paragraphNum = j\n break\n }\n }\n\n paragraphMap.set(i, paragraphNum)\n }\n\n return paragraphMap\n}\n\n/**\n * Checks if an antecedent citation is within resolution scope.\n *\n * @param antecedentIndex - Index of the antecedent citation\n * @param currentIndex - Index of current citation being resolved\n * @param paragraphMap - Map of citation index to paragraph number\n * @param strategy - Scope boundary strategy\n * @returns true if antecedent is within scope, false otherwise\n */\nexport function isWithinBoundary(\n antecedentIndex: number,\n currentIndex: number,\n paragraphMap: Map<number, number>,\n strategy: ScopeStrategy\n): boolean {\n if (strategy === 'none') {\n // No boundary restriction - can resolve across entire document\n return true\n }\n\n // Get paragraph numbers for both citations\n const antecedentParagraph = paragraphMap.get(antecedentIndex)\n const currentParagraph = paragraphMap.get(currentIndex)\n\n // If either is undefined, default to allowing resolution\n if (antecedentParagraph === undefined || currentParagraph === undefined) {\n return true\n }\n\n // For paragraph/section/footnote strategies, citations must be in same boundary\n // (In this MVP, section and footnote behave same as paragraph - future enhancement)\n return antecedentParagraph === currentParagraph\n}\n","/**\n * Levenshtein Distance\n *\n * Calculates edit distance between strings for fuzzy party name matching\n * in supra citation resolution.\n *\n * Uses dynamic programming for O(m*n) time complexity.\n */\n\n/**\n * Calculates Levenshtein distance (edit distance) between two strings.\n *\n * The edit distance is the minimum number of single-character edits\n * (insertions, deletions, substitutions) needed to change one string into the other.\n *\n * @param a - First string\n * @param b - Second string\n * @returns Number of edits required (0 = identical)\n */\nexport function levenshteinDistance(a: string, b: string): number {\n // Handle empty strings\n if (a.length === 0) return b.length\n if (b.length === 0) return a.length\n\n // Create 2D array for dynamic programming\n // dp[i][j] = edit distance between a[0...i-1] and b[0...j-1]\n const dp: number[][] = Array.from({ length: a.length + 1 }, () =>\n Array(b.length + 1).fill(0)\n )\n\n // Initialize base cases\n for (let i = 0; i <= a.length; i++) {\n dp[i][0] = i // Distance from a[0...i-1] to empty string\n }\n for (let j = 0; j <= b.length; j++) {\n dp[0][j] = j // Distance from empty string to b[0...j-1]\n }\n\n // Fill the DP table\n for (let i = 1; i <= a.length; i++) {\n for (let j = 1; j <= b.length; j++) {\n if (a[i - 1] === b[j - 1]) {\n // Characters match - no edit needed\n dp[i][j] = dp[i - 1][j - 1]\n } else {\n // Characters differ - take minimum of:\n // 1. Insert: dp[i][j-1] + 1\n // 2. Delete: dp[i-1][j] + 1\n // 3. Substitute: dp[i-1][j-1] + 1\n dp[i][j] = 1 + Math.min(\n dp[i - 1][j], // Delete from a\n dp[i][j - 1], // Insert into a\n dp[i - 1][j - 1] // Substitute\n )\n }\n }\n }\n\n return dp[a.length][b.length]\n}\n\n/**\n * Calculates normalized Levenshtein similarity (0-1 scale).\n *\n * Returns similarity score where:\n * - 1.0 = identical strings\n * - 0.0 = completely different\n *\n * Comparison is case-insensitive.\n *\n * @param a - First string\n * @param b - Second string\n * @returns Similarity score from 0 to 1\n */\nexport function normalizedLevenshteinDistance(a: string, b: string): number {\n // Normalize to lowercase for case-insensitive comparison\n const lowerA = a.toLowerCase()\n const lowerB = b.toLowerCase()\n\n // Calculate raw edit distance\n const distance = levenshteinDistance(lowerA, lowerB)\n\n // Normalize by max length\n const maxLength = Math.max(lowerA.length, lowerB.length)\n if (maxLength === 0) return 1.0 // Both empty strings\n\n // Convert distance to similarity: 1 - (distance / maxLength)\n return 1 - distance / maxLength\n}\n","/**\n * Document-Scoped Citation Resolver\n *\n * Resolves short-form citations (Id./supra/short-form case) to their full antecedent citations\n * by maintaining resolution context and enforcing scope boundaries.\n *\n * Resolution rules:\n * - Id. resolves to immediately preceding full citation (within scope)\n * - Supra resolves to full citation with matching party name (within scope)\n * - Short-form case resolves to full case with matching volume/reporter (within scope)\n */\n\nimport type {\n Citation,\n FullCaseCitation,\n IdCitation,\n SupraCitation,\n ShortFormCaseCitation,\n} from '../types/citation'\nimport { isFullCitation } from '../types/guards'\nimport type {\n ResolutionOptions,\n ResolutionResult,\n ResolvedCitation,\n ResolutionContext,\n} from './types'\nimport { detectParagraphBoundaries, isWithinBoundary } from './scopeBoundary'\nimport { normalizedLevenshteinDistance } from './levenshtein'\n\n/**\n * Document-scoped resolver that processes citations sequentially\n * and resolves short-form citations to their antecedents.\n */\nexport class DocumentResolver {\n private readonly citations: Citation[]\n private readonly text: string\n private readonly options: Required<ResolutionOptions>\n private readonly context: ResolutionContext\n\n /**\n * Creates a new DocumentResolver.\n *\n * @param citations - All citations in document (in order of appearance)\n * @param text - Original document text\n * @param options - Resolution options\n */\n constructor(\n citations: Citation[],\n text: string,\n options: ResolutionOptions = {}\n ) {\n this.citations = citations\n this.text = text\n\n // Apply defaults to options\n this.options = {\n scopeStrategy: options.scopeStrategy ?? 'paragraph',\n autoDetectParagraphs: options.autoDetectParagraphs ?? true,\n paragraphBoundaryPattern: options.paragraphBoundaryPattern ?? /\\n\\n+/g,\n fuzzyPartyMatching: options.fuzzyPartyMatching ?? true,\n partyMatchThreshold: options.partyMatchThreshold ?? 0.8,\n allowNestedResolution: options.allowNestedResolution ?? false,\n reportUnresolved: options.reportUnresolved ?? true,\n }\n\n // Initialize resolution context\n this.context = {\n citationIndex: 0,\n allCitations: citations,\n lastFullCitation: undefined,\n fullCitationHistory: new Map(),\n paragraphMap: new Map(),\n }\n\n // Detect paragraph boundaries if enabled\n if (this.options.autoDetectParagraphs) {\n this.context.paragraphMap = detectParagraphBoundaries(\n text,\n citations,\n this.options.paragraphBoundaryPattern\n )\n }\n }\n\n /**\n * Resolves all citations in the document.\n *\n * @returns Array of citations with resolution metadata\n */\n resolve(): ResolvedCitation[] {\n const resolved: ResolvedCitation[] = []\n\n for (let i = 0; i < this.citations.length; i++) {\n this.context.citationIndex = i\n const citation = this.citations[i]\n\n // Resolve based on citation type\n let resolution: ResolutionResult | undefined\n\n switch (citation.type) {\n case 'id':\n resolution = this.resolveId(citation)\n break\n case 'supra':\n resolution = this.resolveSupra(citation)\n break\n case 'shortFormCase':\n resolution = this.resolveShortFormCase(citation)\n break\n default:\n // Full citation - update context for future resolutions\n if (isFullCitation(citation)) {\n this.context.lastFullCitation = i\n this.trackFullCitation(citation, i)\n }\n break\n }\n\n // Add citation with resolution metadata\n // Type assertion is safe: runtime logic only sets resolution on short-form citations\n resolved.push({\n ...citation,\n resolution,\n } as ResolvedCitation)\n }\n\n return resolved\n }\n\n /**\n * Resolves Id. citation to immediately preceding full case citation.\n */\n private resolveId(_citation: IdCitation): ResolutionResult | undefined {\n const currentIndex = this.context.citationIndex\n\n // Find most recent full case citation (Id. only resolves to case citations, not statutes/journals)\n let antecedentIndex: number | undefined\n for (let i = currentIndex - 1; i >= 0; i--) {\n const candidate = this.citations[i]\n if (candidate.type === 'case') {\n antecedentIndex = i\n break\n }\n }\n\n // Check if we have a previous case citation\n if (antecedentIndex === undefined) {\n return this.createFailureResult('No preceding full case citation found')\n }\n\n // Check scope boundary\n if (!this.isWithinScope(antecedentIndex, currentIndex)) {\n return this.createFailureResult('Antecedent citation outside scope boundary')\n }\n\n return {\n resolvedTo: antecedentIndex,\n confidence: 1.0, // Id. resolution is unambiguous when successful\n }\n }\n\n /**\n * Resolves supra citation by matching party name.\n */\n private resolveSupra(citation: SupraCitation): ResolutionResult | undefined {\n const currentIndex = this.context.citationIndex\n const targetPartyName = this.normalizePartyName(citation.partyName)\n\n // Search full citation history for matching party name\n let bestMatch: { index: number; similarity: number } | undefined\n\n for (const [partyName, citationIndex] of this.context.fullCitationHistory) {\n // Check scope boundary\n if (!this.isWithinScope(citationIndex, currentIndex)) {\n continue\n }\n\n // Calculate similarity\n const similarity = normalizedLevenshteinDistance(targetPartyName, partyName)\n\n // Update best match if this is better\n if (!bestMatch || similarity > bestMatch.similarity) {\n bestMatch = { index: citationIndex, similarity }\n }\n }\n\n // Check if we found a match above threshold\n if (!bestMatch) {\n return this.createFailureResult('No full citation found in scope')\n }\n\n if (bestMatch.similarity < this.options.partyMatchThreshold) {\n return this.createFailureResult(\n `Party name similarity ${bestMatch.similarity.toFixed(2)} below threshold ${this.options.partyMatchThreshold}`\n )\n }\n\n // Return successful resolution with confidence based on similarity\n const warnings: string[] = []\n if (bestMatch.similarity < 1.0) {\n warnings.push(`Fuzzy match: similarity ${bestMatch.similarity.toFixed(2)}`)\n }\n\n return {\n resolvedTo: bestMatch.index,\n confidence: bestMatch.similarity,\n warnings: warnings.length > 0 ? warnings : undefined,\n }\n }\n\n /**\n * Resolves short-form case citation by matching volume/reporter.\n */\n private resolveShortFormCase(citation: ShortFormCaseCitation): ResolutionResult | undefined {\n const currentIndex = this.context.citationIndex\n\n // Search backwards for matching full case citation\n for (let i = currentIndex - 1; i >= 0; i--) {\n const candidate = this.citations[i]\n\n // Only match against full case citations\n if (candidate.type !== 'case') {\n continue\n }\n\n // Check if volume and reporter match\n if (\n candidate.volume === citation.volume &&\n this.normalizeReporter(candidate.reporter) === this.normalizeReporter(citation.reporter)\n ) {\n // Check scope boundary\n if (!this.isWithinScope(i, currentIndex)) {\n return this.createFailureResult('Matching citation outside scope boundary')\n }\n\n // Found a match\n return {\n resolvedTo: i,\n confidence: 0.95, // High confidence but not perfect (multiple cases could have same volume/reporter)\n }\n }\n }\n\n return this.createFailureResult('No matching full case citation found')\n }\n\n /**\n * Tracks a full citation in the resolution history.\n * Extracts party name for supra resolution.\n * Uses extracted party names (Phase 7) when available, falls back to backward search.\n */\n private trackFullCitation(citation: Citation, index: number): void {\n // Only case citations have party names for supra resolution\n if (citation.type === 'case') {\n // Phase 7: Use extracted party names when available\n // Defendant name stored first (preferred for Bluebook-style supra matching)\n if (citation.defendantNormalized) {\n this.context.fullCitationHistory.set(citation.defendantNormalized, index)\n }\n if (citation.plaintiffNormalized) {\n this.context.fullCitationHistory.set(citation.plaintiffNormalized, index)\n }\n\n // Fallback: backward search from text (pre-Phase 7 compatibility)\n if (!citation.plaintiffNormalized && !citation.defendantNormalized) {\n const partyName = this.extractPartyName(citation)\n if (partyName) {\n const normalized = this.normalizePartyName(partyName)\n this.context.fullCitationHistory.set(normalized, index)\n }\n }\n }\n }\n\n /**\n * Extracts party name from full case citation text.\n * Handles \"Party v. Party\" format by looking at text before citation span.\n */\n private extractPartyName(citation: FullCaseCitation): string | undefined {\n // Look at text before citation span to find party names\n // Case citations typically appear as: \"Smith v. Jones, 100 F.2d 10\"\n // But tokenizer only captures \"100 F.2d 10\" - we need to look backwards in text\n\n const citationStart = citation.span.originalStart\n // Look backwards up to 100 characters for party name\n const lookbackStart = Math.max(0, citationStart - 100)\n const beforeText = this.text.substring(lookbackStart, citationStart)\n\n // Match pattern: \"FirstParty v. SecondParty, \" before the citation\n // Capture the first party name (handles single-letter party names like \"A\" or \"B\")\n const vMatch = beforeText.match(/([A-Z][a-zA-Z]*(?:\\s+[A-Z][a-zA-Z]*)*)\\s+v\\.?\\s+[A-Z][a-zA-Z]*(?:\\s+[A-Z][a-zA-Z]*)*,\\s*$/)\n if (vMatch) {\n return this.stripSignalWords(vMatch[1].trim())\n }\n\n // Fallback: try to find any capitalized word(s) before comma\n const beforeComma = beforeText.match(/([A-Z][a-zA-Z]*(?:\\s+[A-Z][a-zA-Z]*)*),\\s*$/)\n if (beforeComma) {\n return this.stripSignalWords(beforeComma[1].trim())\n }\n return undefined\n }\n\n /**\n * Strips citation signal words that may precede party names.\n * E.g., \"In Smith\" → \"Smith\", \"See Also Jones\" → \"Jones\"\n * Preserves \"In re\" which is a case name format, not a signal word.\n */\n private stripSignalWords(name: string): string {\n const stripped = name.replace(/^(?:In(?!\\s+re\\b)|See(?:\\s+[Aa]lso)?|Compare|But(?:\\s+[Ss]ee)?|Cf\\.?|Also)\\s+/i, '').trim()\n // Only return stripped version if something remains\n return stripped.length > 0 ? stripped : name\n }\n\n /**\n * Normalizes party name for matching.\n */\n private normalizePartyName(name: string): string {\n return name\n .toLowerCase()\n .replace(/\\s+/g, ' ') // Normalize whitespace\n .trim()\n }\n\n /**\n * Normalizes reporter abbreviation for matching.\n */\n private normalizeReporter(reporter: string): string {\n return reporter\n .toLowerCase()\n .replace(/\\s+/g, '') // Remove spaces (F.2d vs F. 2d)\n .replace(/\\./g, '') // Remove periods\n }\n\n /**\n * Checks if antecedent citation is within scope boundary.\n */\n private isWithinScope(antecedentIndex: number, currentIndex: number): boolean {\n return isWithinBoundary(\n antecedentIndex,\n currentIndex,\n this.context.paragraphMap,\n this.options.scopeStrategy\n )\n }\n\n /**\n * Creates a failure result for unresolved citations.\n */\n private createFailureResult(reason: string): ResolutionResult | undefined {\n if (this.options.reportUnresolved) {\n return {\n resolvedTo: undefined,\n failureReason: reason,\n confidence: 0.0,\n }\n }\n return undefined\n }\n}\n","/**\n * Citation Resolution\n *\n * Resolves short-form citations (Id./supra/short-form case) to their full antecedents.\n *\n * @example\n * ```ts\n * import { resolveCitations } from 'eyecite-ts/resolve'\n * import { extractCitations } from 'eyecite-ts'\n *\n * const text = 'See Smith v. Jones, 500 F.2d 100 (1974). Id. at 105.'\n * const citations = extractCitations(text)\n * const resolved = resolveCitations(citations, text)\n *\n * // resolved[1] is Id. citation with resolution.resolvedTo = 0\n * console.log(resolved[1].resolution?.resolvedTo) // 0 (points to Smith v. Jones)\n * ```\n */\n\nimport type { Citation } from '../types/citation'\nimport type { ResolutionOptions, ResolvedCitation } from './types'\nimport { DocumentResolver } from './DocumentResolver'\n\n/**\n * Resolves short-form citations to their full antecedents.\n *\n * Convenience wrapper around DocumentResolver that handles common use cases.\n *\n * @param citations - Extracted citations in order of appearance\n * @param text - Original document text\n * @param options - Resolution options\n * @returns Citations with resolution metadata\n */\nexport function resolveCitations(\n citations: Citation[],\n text: string,\n options?: ResolutionOptions\n): ResolvedCitation[] {\n const resolver = new DocumentResolver(citations, text, options)\n return resolver.resolve()\n}\n\n// Re-export core types and classes\nexport { DocumentResolver } from './DocumentResolver'\nexport type {\n ResolutionOptions,\n ResolutionResult,\n ResolvedCitation,\n ScopeStrategy,\n} from './types'\n","/**\n * Parallel Citation Detection\n *\n * Detects parallel citation groups (same case in multiple reporters) using\n * comma-separated case citations sharing a closing parenthetical.\n *\n * Detection happens after tokenization and deduplication, before extraction\n * in the main extractCitations pipeline.\n *\n * @module extract/detectParallel\n */\n\nimport type { Token } from '@/tokenize/tokenizer'\n\n/**\n * Maximum characters allowed between end of comma and start of next citation.\n * Bluebook standard uses tight spacing: \"500 F.2d 123, 200 F. Supp. 456\"\n */\nconst MAX_PROXIMITY = 5\n\n/**\n * Maximum total gap (chars) between end of one citation and start of next\n * to even consider them as parallel candidates. Beyond this distance, we can\n * skip all other checks (comma, parenthetical, etc.) for performance.\n * Includes comma, spaces, and potential pincite: \", 125, \" = ~10 chars\n */\nconst MAX_GAP_FOR_PARALLEL = 20\n\n/**\n * Detect parallel citation groups from tokenized citations.\n *\n * Returns a map of primary citation index to array of secondary citation indices.\n * Parallel citations are comma-separated case citations sharing a parenthetical.\n *\n * Detection algorithm:\n * 1. Iterate tokens with lookahead (i, i+1, i+2...)\n * 2. Check if token[i] and token[i+1] are both case citations\n * 3. Check if comma separates them (within MAX_PROXIMITY chars)\n * 4. Check if both citations share a closing parenthetical (via cleaned text)\n * 5. If all conditions met, add to parallel group\n * 6. Continue for chain (i+1, i+2, i+3...) until no more matches\n *\n * @param tokens - Tokenized citations (after deduplication)\n * @param cleanedText - Cleaned text to check for commas and parentheticals\n * @returns Map of primary index to array of secondary indices\n *\n * @example\n * ```typescript\n * const tokens = [\n * { text: \"410 U.S. 113\", span: { cleanStart: 0, cleanEnd: 12 }, type: \"case\" },\n * { text: \"93 S. Ct. 705\", span: { cleanStart: 14, cleanEnd: 27 }, type: \"case\" }\n * ]\n * const cleaned = \"410 U.S. 113, 93 S. Ct. 705 (1973)\"\n * const result = detectParallelCitations(tokens, cleaned)\n * // result = Map { 0 => [1] }\n * ```\n */\nexport function detectParallelCitations(\n\ttokens: Token[],\n\tcleanedText = '',\n): Map<number, number[]> {\n\tconst parallelGroups = new Map<number, number[]>()\n\n\t// Edge cases: empty array or no text\n\tif (tokens.length === 0 || cleanedText === '') {\n\t\treturn parallelGroups\n\t}\n\n\t// Track which tokens are already in a parallel group (as secondary)\n\tconst usedAsSecondary = new Set<number>()\n\n\tfor (let i = 0; i < tokens.length; i++) {\n\t\tconst primary = tokens[i]\n\n\t\t// Skip if not a case citation\n\t\tif (primary.type !== 'case') {\n\t\t\tcontinue\n\t\t}\n\n\t\t// Skip if already used as secondary in another group\n\t\tif (usedAsSecondary.has(i)) {\n\t\t\tcontinue\n\t\t}\n\n\t\tconst secondaryIndices: number[] = []\n\n\t\t// Look ahead for potential secondary citations\n\t\t// Chain detection: \"A, B, C (year)\" where A is primary, B and C are secondaries\n\t\tfor (let j = i + 1; j < tokens.length; j++) {\n\t\t\tconst secondary = tokens[j]\n\n\t\t\t// Only case citations can be parallel\n\t\t\tif (secondary.type !== 'case') {\n\t\t\t\tbreak // Stop looking once we hit non-case citation\n\t\t\t}\n\n\t\t\t// Check proximity: comma should be right after primary (or previous secondary in chain)\n\t\t\tconst prevToken = j === i + 1 ? primary : tokens[j - 1]\n\t\t\tconst gapStart = prevToken.span.cleanEnd\n\t\t\tconst gapEnd = secondary.span.cleanStart\n\n\t\t\t// Early exit: If gap is too large, no need to check comma/parenthetical\n\t\t\t// This optimization reduces O(n²) to O(n×k) where k is avg tokens within MAX_GAP\n\t\t\tconst gapSize = gapEnd - gapStart\n\t\t\tif (gapSize > MAX_GAP_FOR_PARALLEL) {\n\t\t\t\tbreak // Too far apart to be parallel, stop looking\n\t\t\t}\n\n\t\t\t// Extract the gap text between citations\n\t\t\tconst gapText = cleanedText.substring(gapStart, gapEnd)\n\n\t\t\t// Bluebook requires comma separator for parallel citations\n\t\t\tif (!gapText.includes(',')) {\n\t\t\t\tbreak // No comma = not parallel, stop looking\n\t\t\t}\n\n\t\t\t// Check proximity: distance from comma to next citation start\n\t\t\t// MAX_PROXIMITY enforces tight spacing: \"A, B\" not \"A, B\"\n\t\t\tconst commaIndex = gapText.indexOf(',')\n\t\t\tconst distanceAfterComma = gapText.length - commaIndex - 1\n\n\t\t\tif (distanceAfterComma > MAX_PROXIMITY) {\n\t\t\t\tbreak // Too far apart, stop looking\n\t\t\t}\n\n\t\t\t// Check for shared parenthetical\n\t\t\t// Both citations must share the SAME closing parenthetical\n\t\t\t// Reject: \"A (1970), B (1971)\" - separate parens = different cases\n\t\t\t// Accept: \"A, B (1970)\" - shared paren = parallel citations\n\t\t\tconst textBetween = cleanedText.substring(primary.span.cleanEnd, secondary.span.cleanEnd)\n\t\t\tif (textBetween.includes(')')) {\n\t\t\t\tbreak // Separate parentheticals = not parallel, stop looking\n\t\t\t}\n\n\t\t\t// Check that there IS a parenthetical after the secondary citation\n\t\t\tif (!hasSharedParenthetical(cleanedText, secondary.span.cleanEnd)) {\n\t\t\t\tbreak // No shared parenthetical, stop looking\n\t\t\t}\n\n\t\t\t// All conditions met - this is a parallel citation\n\t\t\tsecondaryIndices.push(j)\n\t\t\tusedAsSecondary.add(j)\n\t\t}\n\n\t\t// If we found any secondary citations, record the group\n\t\tif (secondaryIndices.length > 0) {\n\t\t\tparallelGroups.set(i, secondaryIndices)\n\t\t}\n\t}\n\n\treturn parallelGroups\n}\n\n/**\n * Check if there's a closing parenthetical after the given position.\n *\n * This is a simple heuristic: look for \"(...)\" pattern within reasonable distance.\n * Full parenthetical parsing happens in extractCase, this just validates presence.\n *\n * @param cleanedText - Cleaned text\n * @param position - Position to start searching from\n * @returns true if closing parenthetical found\n */\nfunction hasSharedParenthetical(cleanedText: string, position: number): boolean {\n\t// Look ahead up to 200 characters for opening parenthesis\n\tconst searchText = cleanedText.substring(position, position + 200)\n\n\t// Find opening parenthesis\n\tconst openIndex = searchText.indexOf('(')\n\tif (openIndex === -1) {\n\t\treturn false\n\t}\n\n\t// Find matching closing parenthesis (simple depth tracking)\n\tlet depth = 0\n\tfor (let i = openIndex; i < searchText.length; i++) {\n\t\tif (searchText[i] === '(') {\n\t\t\tdepth++\n\t\t} else if (searchText[i] === ')') {\n\t\t\tdepth--\n\t\t\tif (depth === 0) {\n\t\t\t\t// Found matching closing parenthesis\n\t\t\t\treturn true\n\t\t\t}\n\t\t}\n\t}\n\n\treturn false\n}\n","/**\n * Main Citation Extraction Pipeline\n *\n * Orchestrates the complete citation extraction flow:\n * 1. Clean text (remove HTML, normalize Unicode)\n * 2. Tokenize (apply patterns to find candidates)\n * 3. Extract (parse metadata from tokens)\n *\n * This is the primary public API for citation extraction.\n *\n * @module extract/extractCitations\n */\n\nimport { cleanText } from '@/clean'\nimport { tokenize } from '@/tokenize'\nimport {\n\textractCase,\n\textractStatute,\n\textractJournal,\n\textractNeutral,\n\textractPublicLaw,\n\textractFederalRegister,\n\textractStatutesAtLarge,\n} from '@/extract'\nimport { extractId, extractSupra, extractShortFormCase } from './extractShortForms'\nimport {\n\tcasePatterns,\n\tstatutePatterns,\n\tjournalPatterns,\n\tneutralPatterns,\n\tshortFormPatterns,\n} from '@/patterns'\nimport { resolveCitations } from '../resolve'\nimport { detectParallelCitations } from './detectParallel'\nimport type { Citation } from '@/types/citation'\nimport type { Pattern } from '@/patterns'\nimport type { ResolutionOptions, ResolvedCitation } from '../resolve/types'\n\n/**\n * Options for customizing citation extraction behavior.\n */\nexport interface ExtractOptions {\n\t/**\n\t * Custom text cleaners (overrides defaults).\n\t *\n\t * If provided, these cleaners replace the default pipeline:\n\t * [stripHtmlTags, normalizeWhitespace, normalizeUnicode, fixSmartQuotes]\n\t *\n\t * @example\n\t * ```typescript\n\t * // Use only HTML stripping, skip Unicode normalization\n\t * const citations = extractCitations(text, {\n\t * cleaners: [stripHtmlTags]\n\t * })\n\t * ```\n\t */\n\tcleaners?: Array<(text: string) => string>\n\n\t/**\n\t * Custom regex patterns (overrides defaults).\n\t *\n\t * If provided, these patterns replace the default pattern set:\n\t * [casePatterns, statutePatterns, journalPatterns, neutralPatterns, shortFormPatterns]\n\t *\n\t * @example\n\t * ```typescript\n\t * // Extract only case citations\n\t * const citations = extractCitations(text, {\n\t * patterns: casePatterns\n\t * })\n\t * ```\n\t */\n\tpatterns?: Pattern[]\n\n\t/**\n\t * Resolve short-form citations to their full antecedents (default: false).\n\t *\n\t * If true, returns ResolvedCitation[] with resolution metadata for short-form citations\n\t * (Id., supra, short-form case). Full citations are unchanged.\n\t *\n\t * @example\n\t * ```typescript\n\t * const text = \"Smith v. Jones, 500 F.2d 100 (1974). Id. at 105.\"\n\t * const citations = extractCitations(text, { resolve: true })\n\t * // citations[1].resolution.resolvedTo === 0 (points to Smith v. Jones)\n\t * ```\n\t */\n\tresolve?: boolean\n\n\t/**\n\t * Options for citation resolution (only used if resolve: true).\n\t *\n\t * @example\n\t * ```typescript\n\t * const citations = extractCitations(text, {\n\t * resolve: true,\n\t * resolutionOptions: {\n\t * scopeStrategy: 'paragraph',\n\t * fuzzyPartyMatching: true\n\t * }\n\t * })\n\t * ```\n\t */\n\tresolutionOptions?: ResolutionOptions\n}\n\n/**\n * Extracts legal citations from text using the full parsing pipeline.\n *\n * Pipeline flow:\n * 1. **Clean:** Remove HTML tags, normalize Unicode, fix smart quotes\n * 2. **Tokenize:** Apply regex patterns to find citation candidates\n * 3. **Extract:** Parse metadata (volume, reporter, page, etc.)\n * 4. **Translate:** Map positions from cleaned text back to original text\n *\n * This function is synchronous because all stages (cleaning, tokenization,\n * extraction) are synchronous. For async operations (e.g., future reporters-db\n * lookups), use extractCitationsAsync().\n *\n * Position tracking:\n * - TransformationMap is built during cleaning\n * - Tokens contain positions in cleaned text (cleanStart/cleanEnd)\n * - Extraction translates cleaned positions → original positions\n * - Final citations have originalStart/originalEnd pointing to input text\n *\n * Warnings from cleaning layer are attached to all extracted citations.\n *\n * @param text - Raw text to extract citations from (may contain HTML, Unicode)\n * @param options - Optional customization (cleaners, patterns)\n * @returns Array of citations with parsed metadata and accurate positions\n *\n * @example\n * ```typescript\n * const text = \"See Smith v. Doe, 500 F.2d 123 (9th Cir. 2020)\"\n * const citations = extractCitations(text)\n * // citations[0] = {\n * // type: \"case\",\n * // volume: 500,\n * // reporter: \"F.2d\",\n * // page: 123,\n * // court: \"9th Cir.\",\n * // year: 2020,\n * // span: { originalStart: 18, originalEnd: 30, ... }\n * // }\n * ```\n *\n * @example\n * ```typescript\n * // Extract from HTML\n * const html = \"<p>In <b>Smith</b>, 500 F.2d 123, the court held...</p>\"\n * const citations = extractCitations(html)\n * // HTML is stripped, positions point to original HTML\n * ```\n *\n * @example\n * ```typescript\n * // Extract multiple citation types\n * const text = \"See 42 U.S.C. § 1983; Smith, 500 F.2d 123; 123 Harv. L. Rev. 456\"\n * const citations = extractCitations(text)\n * // citations[0].type === \"statute\"\n * // citations[1].type === \"case\"\n * // citations[2].type === \"journal\"\n * ```\n */\nexport function extractCitations(text: string, options: ExtractOptions & { resolve: true }): ResolvedCitation[]\nexport function extractCitations(text: string, options?: ExtractOptions): Citation[]\nexport function extractCitations(\n\ttext: string,\n\toptions?: ExtractOptions,\n): Citation[] | ResolvedCitation[] {\n\tconst startTime = performance.now()\n\n\t// Step 1: Clean text\n\tconst { cleaned, transformationMap, warnings } = cleanText(\n\t\ttext,\n\t\toptions?.cleaners,\n\t)\n\n\t// Step 2: Tokenize (synchronous)\n\t// Note: Pattern order matters for deduplication - more specific patterns first\n\tconst allPatterns = options?.patterns || [\n\t\t...neutralPatterns, // Most specific (year-based format)\n\t\t...shortFormPatterns, // Short-form (requires \" at \" keyword)\n\t\t...casePatterns, // Case citations (reporter-specific)\n\t\t...statutePatterns, // Statutes (code-specific)\n\t\t...journalPatterns, // Least specific (broad pattern)\n\t]\n\tconst tokens = tokenize(cleaned, allPatterns)\n\n\t// Step 3: Deduplicate overlapping tokens\n\t// Multiple patterns may match the same text (e.g., \"500 F.2d 123\" matches both federal-reporter and state-reporter)\n\t// Keep only the most specific match for each position\n\tconst deduplicatedTokens: typeof tokens = []\n\tconst seenPositions = new Set<number | string>()\n\n\t// Performance optimization: Use bitpacking for typical documents (<65K chars)\n\t// For larger documents, fall back to string keys\n\tconst useBitpacking = cleaned.length < 65536\n\n\tfor (const token of tokens) {\n\t\tconst posKey = useBitpacking\n\t\t\t? (token.span.cleanStart << 16) | token.span.cleanEnd\n\t\t\t: `${token.span.cleanStart}-${token.span.cleanEnd}`\n\t\tif (!seenPositions.has(posKey)) {\n\t\t\tseenPositions.add(posKey)\n\t\t\tdeduplicatedTokens.push(token)\n\t\t}\n\t}\n\n\t// Step 3.5: Detect parallel citation groups\n\t// Map of primary token index -> array of secondary token indices\n\tconst parallelGroups = detectParallelCitations(deduplicatedTokens, cleaned)\n\n\t// Step 4: Extract citations from deduplicated tokens\n\tconst citations: Citation[] = []\n\tfor (let i = 0; i < deduplicatedTokens.length; i++) {\n\t\tconst token = deduplicatedTokens[i]\n\t\tlet citation: Citation\n\n\t\tswitch (token.type) {\n\t\t\tcase 'case':\n\t\t\t\t// Check pattern ID to distinguish short-form from full citations\n\t\t\t\tif (token.patternId === 'id' || token.patternId === 'ibid') {\n\t\t\t\t\tcitation = extractId(token, transformationMap)\n\t\t\t\t} else if (token.patternId === 'supra') {\n\t\t\t\t\tcitation = extractSupra(token, transformationMap)\n\t\t\t\t} else if (token.patternId === 'shortFormCase') {\n\t\t\t\t\tcitation = extractShortFormCase(token, transformationMap)\n\t\t\t\t} else {\n\t\t\t\t\tcitation = extractCase(token, transformationMap, cleaned)\n\t\t\t\t}\n\t\t\t\tbreak\n\t\t\tcase 'statute':\n\t\t\t\tcitation = extractStatute(token, transformationMap)\n\t\t\t\tbreak\n\t\t\tcase 'journal':\n\t\t\t\tcitation = extractJournal(token, transformationMap)\n\t\t\t\tbreak\n\t\t\tcase 'neutral':\n\t\t\t\tcitation = extractNeutral(token, transformationMap)\n\t\t\t\tbreak\n\t\t\tcase 'publicLaw':\n\t\t\t\tcitation = extractPublicLaw(token, transformationMap)\n\t\t\t\tbreak\n\t\t\tcase 'federalRegister':\n\t\t\t\tcitation = extractFederalRegister(token, transformationMap)\n\t\t\t\tbreak\n\t\t\tcase 'statutesAtLarge':\n\t\t\t\tcitation = extractStatutesAtLarge(token, transformationMap)\n\t\t\t\tbreak\n\t\t\tdefault:\n\t\t\t\t// Unknown type - skip\n\t\t\t\tcontinue\n\t\t}\n\n\t\t// Attach cleaning warnings to citation if any\n\t\tif (warnings.length > 0) {\n\t\t\tcitation.warnings = [...(citation.warnings || []), ...warnings]\n\t\t}\n\n\t\t// Update processing time\n\t\tcitation.processTimeMs = performance.now() - startTime\n\n\t\t// Populate parallel citation metadata (Phase 8)\n\t\tif (citation.type === 'case') {\n\t\t\t// Check if this citation is part of a parallel group\n\t\t\tconst isPrimary = parallelGroups.has(i)\n\t\t\tconst isSecondary = Array.from(parallelGroups.values()).some(secondaries => secondaries.includes(i))\n\n\t\t\tif (isPrimary || isSecondary) {\n\t\t\t\t// Find the primary citation for this group\n\t\t\t\tlet primaryIndex = i\n\t\t\t\tif (isSecondary) {\n\t\t\t\t\t// Find which group this is a secondary of\n\t\t\t\t\tfor (const [primary, secondaries] of parallelGroups.entries()) {\n\t\t\t\t\t\tif (secondaries.includes(i)) {\n\t\t\t\t\t\t\tprimaryIndex = primary\n\t\t\t\t\t\t\tbreak\n\t\t\t\t\t\t}\n\t\t\t\t\t}\n\t\t\t\t}\n\n\t\t\t\t// Get the primary token to build groupId\n\t\t\t\tconst primaryToken = deduplicatedTokens[primaryIndex]\n\t\t\t\t// Extract volume, reporter, page from primary token text\n\t\t\t\t// Match: \"volume reporter page\" where reporter is everything except last number\n\t\t\t\tconst match = /^(\\S+)\\s+(.+)\\s+(\\d+)$/.exec(primaryToken.text)\n\t\t\t\tif (match) {\n\t\t\t\t\tconst [, volume, reporter, page] = match\n\t\t\t\t\tcitation.groupId = `${volume}-${reporter.replace(/\\s+/g, '.')}-${page}`\n\n\t\t\t\t\t// Only primary citation gets parallelCitations array\n\t\t\t\t\tif (isPrimary) {\n\t\t\t\t\t\tconst secondaryIndices = parallelGroups.get(i)!\n\t\t\t\t\t\tcitation.parallelCitations = secondaryIndices.map(secIdx => {\n\t\t\t\t\t\t\tconst secToken = deduplicatedTokens[secIdx]\n\t\t\t\t\t\t\tconst secMatch = /^(\\S+)\\s+(.+)\\s+(\\d+)$/.exec(secToken.text)\n\t\t\t\t\t\t\tif (secMatch) {\n\t\t\t\t\t\t\t\tconst [, secVol, secRep, secPage] = secMatch\n\t\t\t\t\t\t\t\treturn {\n\t\t\t\t\t\t\t\t\tvolume: /^\\d+$/.test(secVol) ? Number.parseInt(secVol, 10) : secVol,\n\t\t\t\t\t\t\t\t\treporter: secRep,\n\t\t\t\t\t\t\t\t\tpage: Number.parseInt(secPage, 10),\n\t\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\t}\n\t\t\t\t\t\t\treturn { volume: 0, reporter: '', page: 0 } // Fallback (shouldn't happen)\n\t\t\t\t\t\t})\n\t\t\t\t\t}\n\t\t\t\t}\n\t\t\t}\n\t\t}\n\n\t\tcitations.push(citation)\n\t}\n\n\t// Step 5: Resolve short-form citations if requested\n\tif (options?.resolve) {\n\t\treturn resolveCitations(citations, text, options.resolutionOptions)\n\t}\n\n\treturn citations\n}\n\n/**\n * Asynchronous version of extractCitations().\n *\n * Currently wraps the synchronous extractCitations() function. This API\n * exists for future extensibility when async operations are added:\n * - Async reporters-db lookups (Phase 3)\n * - Async resolution/annotation services\n * - Web Workers for parallel processing\n *\n * For now, this function immediately resolves with the same results as\n * the synchronous version.\n *\n * @param text - Raw text to extract citations from\n * @param options - Optional customization (cleaners, patterns, resolve)\n * @returns Promise resolving to array of citations (or ResolvedCitation[] if resolve: true)\n *\n * @example\n * ```typescript\n * const citations = await extractCitationsAsync(text, { resolve: true })\n * // Returns ResolvedCitation[] with resolution metadata\n * ```\n */\nexport async function extractCitationsAsync(text: string, options: ExtractOptions & { resolve: true }): Promise<ResolvedCitation[]>\nexport async function extractCitationsAsync(text: string, options?: ExtractOptions): Promise<Citation[]>\nexport async function extractCitationsAsync(\n\ttext: string,\n\toptions?: ExtractOptions,\n): Promise<Citation[] | ResolvedCitation[]> {\n\t// Async wrapper for future extensibility (e.g., async reporters-db lookup)\n\t// For MVP, wraps synchronous extractCitations\n\treturn extractCitations(text, options)\n}\n"],"mappings":"mEAKA,SAAgB,EAAe,EAA8C,CAC3E,OAAO,EAAS,OAAS,QACpB,EAAS,OAAS,WAClB,EAAS,OAAS,WAClB,EAAS,OAAS,WAClB,EAAS,OAAS,aAClB,EAAS,OAAS,mBAClB,EAAS,OAAS,kBAMzB,SAAgB,EAAoB,EAAmD,CACrF,OAAO,EAAS,OAAS,MACpB,EAAS,OAAS,SAClB,EAAS,OAAS,gBAMzB,SAAgB,EAAe,EAAkD,CAC/E,OAAO,EAAS,OAAS,OAO3B,SAAgB,EACd,EACA,EAC+B,CAC/B,OAAO,EAAS,OAAS,EAmB3B,SAAgB,EAAkB,EAAiB,CACjD,MAAU,MAAM,qBAAqB,IAAI,CC7C3C,SAAgB,EAAc,EAAsB,CACnD,OAAO,EAAK,QAAQ,WAAY,GAAG,CAUpC,SAAgB,EAAoB,EAAsB,CACzD,OAAO,EAAK,QAAQ,aAAc,IAAI,CAAC,QAAQ,SAAU,IAAI,CAU9D,SAAgB,EAAiB,EAAsB,CACtD,OAAO,EAAK,UAAU,OAAO,CAU9B,SAAgB,EAAe,EAAsB,CACpD,OAAO,EACL,QAAQ,kBAAmB,IAAI,CAC/B,QAAQ,kBAAmB,IAAI,CCXlC,SAAgB,EACf,EACA,EAA4C,CAC3C,EACA,EACA,EACA,EACA,CACiB,CAElB,IAAI,EAAc,EACd,EAAkB,IAAI,IACtB,EAAkB,IAAI,IAG1B,IAAK,IAAI,EAAI,EAAG,GAAK,EAAS,OAAQ,IACrC,EAAgB,IAAI,EAAG,EAAE,CACzB,EAAgB,IAAI,EAAG,EAAE,CAI1B,IAAK,IAAM,KAAW,EAAU,CAC/B,IAAM,EAAa,EACb,EAAY,EAAQ,EAAY,CAEtC,GAAI,IAAe,EAAW,CAE7B,GAAM,CAAE,qBAAoB,sBAAuB,EAClD,EACA,EACA,EACA,EACA,CAED,EAAkB,EAClB,EAAkB,EAClB,EAAc,GAShB,MAAO,CACN,QAAS,EACT,kBAP4C,CAC5C,kBACA,kBACA,CAKA,SAAU,EAAE,CACZ,CAeF,SAAS,EACR,EACA,EACA,EACA,EAIC,CACD,IAAM,EAAqB,IAAI,IACzB,EAAqB,IAAI,IAE3B,EAAY,EACZ,EAAW,EAGf,KAAO,GAAa,EAAW,QAAU,GAAY,EAAU,QAAQ,CAEtE,GAAI,GAAa,EAAW,QAAU,GAAY,EAAU,OAAQ,CACnE,IAAM,EAAc,EAAmB,IAAI,EAAU,EAAI,EACzD,EAAmB,IAAI,EAAU,EAAY,CAC7C,EAAmB,IAAI,EAAa,EAAS,CAC7C,MAID,GAAI,GAAa,EAAW,OAAQ,CACnC,IAAM,EAAc,EAAmB,IAAI,EAAU,EAAI,EACzD,EAAmB,IAAI,EAAU,EAAY,CAC7C,IACA,SAID,GAAI,GAAY,EAAU,OAAQ,CACjC,IAAM,EAAc,EAAmB,IAAI,EAAU,EAAI,EACzD,EAAmB,IAAI,EAAa,EAAS,CAC7C,IACA,SAID,GAAI,EAAW,KAAe,EAAU,GAAW,CAClD,IAAM,EAAc,EAAmB,IAAI,EAAU,EAAI,EACzD,EAAmB,IAAI,EAAU,EAAY,CAC7C,EAAmB,IAAI,EAAa,EAAS,CAC7C,IACA,QACM,CAGN,IAAI,EAAa,GAIjB,IAAK,IAAI,EAAY,EAAG,GAAa,IAChC,IAAY,GAAa,EAAW,QADU,IAGlD,GAAI,EAAW,EAAY,KAAe,EAAU,GAAW,CAE9D,IAAK,IAAI,EAAI,EAAG,EAAI,EAAW,IAAK,CACnC,IAAM,EACL,EAAmB,IAAI,EAAY,EAAE,EAAI,EAAY,EACtD,EAAmB,IAAI,EAAa,EAAS,CAE9C,GAAa,EACb,EAAa,GACb,MAIF,GAAI,EAAY,SAGhB,IAAK,IAAI,EAAY,EAAG,GAAa,IAChC,IAAW,GAAa,EAAU,QADY,IAGlD,GAAI,EAAW,KAAe,EAAU,EAAW,GAAY,CAE9D,IAAM,EAAc,EAAmB,IAAI,EAAU,EAAI,EACzD,IAAK,IAAI,EAAI,EAAG,EAAI,EAAW,IAC9B,EAAmB,IAAI,EAAW,EAAG,EAAY,CAElD,GAAY,EACZ,EAAa,GACb,MAIF,GAAI,EAAY,SAGhB,IAAM,EAAc,EAAmB,IAAI,EAAU,EAAI,EACzD,EAAmB,IAAI,EAAU,EAAY,CAC7C,EAAmB,IAAI,EAAa,EAAS,CAC7C,IACA,KAIF,MAAO,CAAE,qBAAoB,qBAAoB,CCpLlD,MAAa,EAA0B,CACrC,CACE,GAAI,mBACJ,MAAO,yJACP,YAAa,0DACb,KAAM,OACP,CACD,CACE,GAAI,gBACJ,MAAO,wGACP,YAAa,+BACb,KAAM,OACP,CACD,CACE,GAAI,iBACJ,MAAO,4GACP,YAAa,6EACb,KAAM,OACP,CACF,CC3BY,EAA6B,CACxC,CACE,GAAI,MACJ,MAAO,gDACP,YAAa,iDACb,KAAM,UACP,CACD,CACE,GAAI,aACJ,MAAO,iEACP,YAAa,sEACb,KAAM,UACP,CACF,CCbY,EAA6B,CACxC,CACE,GAAI,aACJ,MAAO,oDACP,YAAa,iGACb,KAAM,UACP,CACF,CCPY,EAA6B,CACxC,CACE,GAAI,UACJ,MAAO,4BACP,YAAa,6CACb,KAAM,UACP,CACD,CACE,GAAI,QACJ,MAAO,wCACP,YAAa,sDACb,KAAM,UACP,CACD,CACE,GAAI,aACJ,MAAO,sCACP,YAAa,oDACb,KAAM,YACP,CACD,CACE,GAAI,mBACJ,MAAO,8CACP,YAAa,0DACb,KAAM,kBACP,CACD,CACE,GAAI,oBACJ,MAAO,uCACP,YAAa,sDACb,KAAM,kBACP,CACD,CACE,GAAI,qBACJ,MAAO,kEACP,YAAa,2EACb,KAAM,UACP,CACF,CCRY,EAA+B,CAC1C,CACE,GAAI,KACJ,MAhC8B,+BAiC9B,YAAa,8CACb,KAAM,OACP,CACD,CACE,GAAI,OACJ,MAnCgC,iCAoChC,YAAa,oDACb,KAAM,OACP,CACD,CACE,GAAI,QACJ,MAjCiC,4FAkCjC,YAAa,mEACb,KAAM,OACP,CACD,CACE,GAAI,gBACJ,MAhC2C,sEAiC3C,YAAa,sDACb,KAAM,OACP,CACF,CCcD,SAAgB,EACd,EACA,EAAsB,CACpB,GAAG,EACH,GAAG,EACH,GAAG,EACH,GAAG,EACH,GAAG,EACJ,CACQ,CACT,IAAM,EAAkB,EAAE,CAE1B,IAAK,IAAM,KAAW,EACpB,GAAI,CAEF,IAAM,EAAU,EAAY,SAAS,EAAQ,MAAM,CAEnD,IAAK,IAAM,KAAS,EAElB,EAAO,KAAK,CACV,KAAM,EAAM,GACZ,KAAM,CACJ,WAAY,EAAM,MAClB,SAAU,EAAM,MAAS,EAAM,GAAG,OACnC,CACD,KAAM,EAAQ,KACd,UAAW,EAAQ,GACpB,CAAC,OAEG,EAAO,CAEd,QAAQ,KACN,WAAW,EAAQ,GAAG,yBACtB,aAAiB,MAAQ,EAAM,QAAU,OAAO,EAAM,CACvD,CAOL,OAFA,EAAO,MAAM,EAAG,IAAM,EAAE,KAAK,WAAa,EAAE,KAAK,WAAW,CAErD,ECxFT,MAAM,EAAoC,CACxC,IAAK,EACL,QAAS,EACT,IAAK,EACL,SAAU,EACV,IAAK,EACL,MAAO,EACP,IAAK,EACL,MAAO,EACP,IAAK,EACL,IAAK,EACL,KAAM,EACN,IAAK,EACL,KAAM,EACN,IAAK,EACL,OAAQ,EACR,IAAK,EACL,KAAM,EACN,UAAW,EACX,IAAK,GACL,QAAS,GACT,IAAK,GACL,SAAU,GACV,IAAK,GACL,SAAU,GACX,CAgBD,SAAgB,EAAW,EAA0B,CAGnD,IAAM,EAAQ,EADK,EAAS,aAAa,CAAC,QAAQ,MAAO,GAAG,EAG5D,GAAI,IAAU,IAAA,GACZ,MAAU,MAAM,uBAAuB,IAAW,CAGpD,OAAO,EAiBT,SAAgB,EAAU,EAA4B,CACpD,GAAM,CAAE,OAAM,QAAO,OAAQ,EAgB7B,OAdI,IAAU,IAAA,IAAa,IAAQ,IAAA,GAI1B,GAAG,EAAK,GAFE,OAAO,EAAM,CAAC,SAAS,EAAG,IAAI,CAEpB,GADZ,OAAO,EAAI,CAAC,SAAS,EAAG,IAAI,GAIzC,IAAU,IAAA,GAOP,OAAO,EAAK,CAJV,GAAG,EAAK,GADE,OAAO,EAAM,CAAC,SAAS,EAAG,IAAI,GA4BnD,SAAgB,EAAU,EAA6C,CAErE,IAAM,EAAY,EAAQ,MAAM,yFAAyF,CACzH,GAAI,EAAW,CACb,IAAM,EAAQ,EAAW,EAAU,GAAG,CAChC,EAAM,OAAO,SAAS,EAAU,GAAI,GAAG,CAEvC,EAAS,CAAE,KADJ,OAAO,SAAS,EAAU,GAAI,GAAG,CACvB,QAAO,MAAK,CACnC,MAAO,CAAE,IAAK,EAAU,EAAO,CAAE,SAAQ,CAI3C,IAAM,EAAY,EAAQ,MAAM,uHAAuH,CACvJ,GAAI,EAAW,CACb,IAAM,EAAQ,EAAW,EAAU,GAAG,CAChC,EAAM,OAAO,SAAS,EAAU,GAAI,GAAG,CAEvC,EAAS,CAAE,KADJ,OAAO,SAAS,EAAU,GAAI,GAAG,CACvB,QAAO,MAAK,CACnC,MAAO,CAAE,IAAK,EAAU,EAAO,CAAE,SAAQ,CAI3C,IAAM,EAAe,EAAQ,MAAM,oCAAoC,CACvE,GAAI,EAAc,CAChB,IAAM,EAAQ,OAAO,SAAS,EAAa,GAAI,GAAG,CAC5C,EAAM,OAAO,SAAS,EAAa,GAAI,GAAG,CAE1C,EAAS,CAAE,KADJ,OAAO,SAAS,EAAa,GAAI,GAAG,CAC1B,QAAO,MAAK,CACnC,MAAO,CAAE,IAAK,EAAU,EAAO,CAAE,SAAQ,CAI3C,IAAM,EAAY,EAAQ,MAAM,cAAc,CAC9C,GAAI,EAAW,CAEb,IAAM,EAAS,CAAE,KADJ,OAAO,SAAS,EAAU,GAAI,GAAG,CACvB,CACvB,MAAO,CAAE,IAAK,EAAU,EAAO,CAAE,SAAQ,EC7J7C,SAAS,EAAY,EAA8B,CAClD,IAAM,EAAM,OAAO,SAAS,EAAK,GAAG,CACpC,OAAO,OAAO,EAAI,GAAK,EAAM,EAAM,EAIpC,MAAM,EAAgB,oJAOhB,EAA6B,0DAG7B,EAAmB,aAGnB,EAAgB,YAGhB,GAAc,cAGd,GAAwB,8BAGxB,EAA0B,aAG1B,EAA4B,wCAG5B,EAAwB,0CAGxB,EAA0B,WAG1B,EAAoB,kFAGpB,EAA0B,+IAUhC,SAAS,EAAmB,EAAqC,CAEhE,IAAI,EAAQ,EAAQ,QAAQ,iCAAkC,GAAG,CAAC,MAAM,CAQxE,MANA,GAAQ,EAAM,QAAQ,eAAgB,GAAG,CAAC,MAAM,CAEhD,EAAQ,EAAM,QAAQ,2BAA4B,GAAG,CAAC,MAAM,CAC5D,EAAQ,EAAM,QAAY,OAAO,OAAO,EAAc,OAAO,OAAQ,IAAI,CAAE,GAAG,CAAC,MAAM,CAErF,EAAQ,EAAM,QAAQ,QAAS,GAAG,CAAC,MAAM,CAClC,GAAS,WAAW,KAAK,EAAM,CAAG,EAAQ,IAAA,GAkBlD,SAAS,EACR,EACA,EACA,EAAc,IACwC,CACtD,IAAM,EAAc,KAAK,IAAI,EAAG,EAAY,EAAY,CACpD,EAAgB,EAAY,UAAU,EAAa,EAAU,CAC7D,EAAsB,EAKtB,EAAoB,GACpB,EACJ,MAAQ,EAAQ,EAAwB,KAAK,EAAc,IAAM,MAChE,EAAoB,EAAM,MAAQ,EAAM,GAAG,OAGxC,IAAsB,KACzB,EAAgB,EAAc,UAAU,EAAkB,CAC1D,EAAsB,EAAc,GAKrC,IAAM,EAAS,EAAkB,KAAK,EAAc,CACpD,GAAI,GAEC,CAAC,EAAO,GAAG,SAAS,IAAI,CAG3B,MAAO,CAAE,SAFQ,GAAG,EAAO,GAAG,MAAM,CAAC,MAAM,EAAO,GAAG,MAAM,GAExC,UADD,EAAsB,EAAO,MACjB,CAKhC,IAAM,EAAY,EAAwB,KAAK,EAAc,CAC7D,GAAI,GAEC,CAAC,EAAU,GAAG,SAAS,IAAI,CAG9B,MAAO,CAAE,SAFQ,GAAG,EAAU,GAAG,GAAG,EAAU,GAAG,MAAM,GAEpC,UADD,EAAsB,EAAU,MACpB,CAsBjC,SAAS,GACR,EACA,EACA,EAAe,IACN,CACT,IAAI,EAAM,EACJ,EAAW,KAAK,IAAI,EAAY,OAAQ,EAAc,EAAa,CACrE,EAAQ,EACR,EAAgB,GAEpB,KAAO,EAAM,GAAU,CACtB,IAAM,EAAO,EAAY,GAEzB,GAAI,IAAS,IACZ,IACA,EAAgB,GAChB,YACU,IAAS,IAKnB,IAJA,IACA,IAGI,IAAU,EAAG,CAEhB,IAAI,EAAU,EACd,KAAO,EAAU,GAAY,KAAK,KAAK,EAAY,GAAS,EAC3D,IAID,GAAI,EAAY,KAAa,IAAK,CACjC,EAAM,EACN,SAID,IAAM,EAAgB,EAAY,UAAU,EAAS,EAAS,CAG9D,GADC,kEACgB,KAAK,EAAc,CAAE,CAErC,EAAM,EACN,SAID,OAAO,QAGR,IAMF,OAAO,EAAgB,EAAM,EAsB9B,SAAS,EAAmB,EAK1B,CACD,IAAM,EAKF,EAAE,CAGA,EAAa,EAAU,EAAQ,CACjC,IACH,EAAO,KAAO,EACd,EAAO,KAAO,EAAW,OAAO,MAIjC,IAAM,EAAc,EAAmB,EAAQ,CAY/C,OAXI,IACH,EAAO,MAAQ,GAIZ,eAAe,KAAK,EAAQ,CAC/B,EAAO,YAAc,UACX,kBAAkB,KAAK,EAAQ,GACzC,EAAO,YAAc,cAGf,EAwBR,SAAS,EAAmB,EAAsB,CACjD,IAAI,EAAa,EAGjB,EAAa,EAAW,QAAQ,iBAAkB,GAAG,CAGrD,EAAa,EAAW,QAAQ,mBAAoB,GAAG,CAGvD,EAAa,EAAW,QAAQ,eAAgB,GAAG,CAInD,IAAI,EAAO,GACX,KAAO,IAAS,GACf,EAAO,EACP,EAAa,EAAW,QAAQ,+CAAgD,GAAG,CAUpF,MANA,GAAa,EAAW,QAAQ,kBAAmB,GAAG,CAGtD,EAAa,EAAW,QAAQ,OAAQ,IAAI,CAGrC,EAAW,MAAM,CAAC,aAAa,CAsBvC,SAAS,EAAkB,EAMzB,CAcD,IAAK,IAAM,IAZgB,CAC1B,QACA,WACA,YACA,gBACA,wBACA,iBACA,cACA,YACA,CAGwC,CAExC,IAAM,EADkB,OAAO,KAAK,EAAO,YAAa,IAAI,CAClC,KAAK,EAAS,CACxC,GAAI,EAAO,CACV,IAAM,EAAgB,EAAM,GACtB,EAAU,EAAM,GAGtB,GAAI,cAAc,KAAK,EAAQ,CAAE,CAGhC,IAAM,EAAS,yBAAyB,KAAK,EAAS,CACtD,GAAI,EAAQ,CACX,IAAM,EAAY,EAAO,GAAG,MAAM,CAC5B,EAAY,EAAO,GAAG,MAAM,CAClC,MAAO,CACN,YACA,oBAAqB,EAAmB,EAAU,CAClD,YACA,oBAAqB,EAAmB,EAAU,CAClD,OAIF,MAAO,CACN,UAAW,EACX,oBAAqB,EAAmB,EAAQ,CAChD,iBAAkB,EAClB,EAOJ,IAAM,EADS,yBACO,KAAK,EAAS,CACpC,GAAI,EAAQ,CACX,IAAI,EAAY,EAAO,GAAG,MAAM,CAC1B,EAAY,EAAO,GAAG,MAAM,CAMlC,MAFA,GAAY,EAAU,QAAQ,iFAAkF,GAAG,CAAC,MAAM,CAEnH,CACN,UAAW,GAAa,EAAO,GAAG,MAAM,CACxC,oBAAqB,EAAmB,GAAa,EAAO,GAAG,MAAM,CAAC,CACtE,YACA,oBAAqB,EAAmB,EAAU,CAClD,CAIF,MAAO,EAAE,CAsDV,SAAgB,EACf,EACA,EACA,EACmB,CACnB,GAAM,CAAE,OAAM,QAAS,EAKjB,EAAQ,EAA2B,KAAK,EAAK,CAEnD,GAAI,CAAC,EAEJ,MAAU,MAAM,kCAAkC,IAAO,CAG1D,IAAM,EAAS,EAAY,EAAM,GAAG,CAC9B,EAAW,EAAM,GAAG,MAAM,CAG1B,EAAU,EAAM,GAChB,EAAc,EAAiB,KAAK,EAAQ,CAC5C,EAAO,EAAc,IAAA,GAAY,OAAO,SAAS,EAAS,GAAG,CAC7D,EAAe,EAAc,GAAO,IAAA,GAIpC,EAAe,EAAc,KAAK,EAAK,CACzC,EAAU,EAAe,OAAO,SAAS,EAAa,GAAI,GAAG,CAAG,IAAA,GAGhE,EACA,EACA,EACA,EACA,EACA,EAGA,EAEE,EAAa,GAAY,KAAK,EAAK,CACzC,GAAI,EAAY,CACf,EAAuB,EAAW,GAElC,IAAM,EAAc,EAAmB,EAAqB,CAC5D,EAAO,EAAY,KACnB,EAAQ,EAAY,MACpB,EAAO,EAAY,KACnB,EAAc,EAAY,YAM3B,GAAI,GAAe,CAAC,EAAsB,CACzC,IAAM,EAAa,EAAY,UAAU,EAAK,SAAS,CACjD,EAAiB,GAAsB,KAAK,EAAW,CAC7D,GAAI,EAAgB,CACnB,EAAuB,EAAe,GAEtC,IAAM,EAAc,EAAmB,EAAqB,CAO5D,GANA,EAAO,EAAY,KACnB,EAAQ,EAAY,MACpB,EAAO,EAAY,KACnB,EAAc,EAAY,YAGtB,IAAY,IAAA,GAAW,CAC1B,IAAM,EAAiB,EAAwB,KAAK,EAAW,CAC3D,IACH,EAAU,OAAO,SAAS,EAAe,GAAI,GAAG,IAOpD,GAAI,GAAe,CAAC,EAAa,CAChC,IAAM,EAAa,EAAY,UAAU,EAAK,SAAS,CAEjD,EAAe,EAA0B,KAAK,EAAW,CAC3D,IACH,EAAc,EAAa,GAAG,aAAa,EAU7C,GALI,CAAC,GAAS,EAAsB,KAAK,EAAS,GACjD,EAAQ,UAIL,EAAa,CAChB,IAAM,EAAiB,EAAgB,EAAa,EAAK,WAAW,CACpE,GAAI,EAAgB,CACnB,EAAW,EAAe,SAG1B,IAAM,EAAW,GAAqB,EAAa,EAAK,SAAS,CAC3D,EAAiB,EAAe,UAChC,EAAe,EAAW,EAAK,SAAW,EAAW,EAAK,SAQhE,EAAW,CACV,WAAY,EACZ,SAAU,EACV,cAPA,EAAkB,gBAAgB,IAAI,EAAe,EAAI,EAQzD,YANA,EAAkB,gBAAgB,IAAI,EAAa,EAAI,EAOvD,EAKH,IAAI,EACA,EACA,EACA,EACA,EAEJ,GAAI,EAAU,CACb,IAAM,EAAc,EAAkB,EAAS,CAC/C,EAAY,EAAY,UACxB,EAAsB,EAAY,oBAClC,EAAY,EAAY,UACxB,EAAsB,EAAY,oBAClC,EAAmB,EAAY,iBAIhC,IAAM,EACL,EAAkB,gBAAgB,IAAI,EAAK,WAAW,EAAI,EAAK,WAC1D,EACL,EAAkB,gBAAgB,IAAI,EAAK,SAAS,EAAI,EAAK,SAG1D,EAAa,GAqCjB,GAlCwB,4JA2BvB,CAEmB,KAAM,GAAM,EAAS,SAAS,EAAE,CAAC,GACpD,GAAc,IAIX,IAAS,IAAA,GAAW,CACvB,IAAM,EAAc,IAAI,MAAM,CAAC,aAAa,CACxC,GAAQ,IACX,GAAc,IAYhB,MAPA,GAAa,KAAK,IAAI,EAAY,EAAI,CAGlC,IACH,EAAa,IAGP,CACN,KAAM,OACN,OACA,KAAM,CACL,WAAY,EAAK,WACjB,SAAU,EAAK,SACf,gBACA,cACA,CACD,aACA,YAAa,EACb,cAAe,EACf,gBAAiB,EACjB,SACA,WACA,OACA,UACA,QACA,OACA,eACA,OACA,WACA,WACA,cACA,YACA,sBACA,YACA,sBACA,mBACA,CC3oBF,SAAgB,EACf,EACA,EACkB,CAClB,GAAM,CAAE,OAAM,QAAS,EAKjB,EADe,yDACM,KAAK,EAAK,CAErC,GAAI,CAAC,EACJ,MAAU,MAAM,qCAAqC,IAAO,CAG7D,IAAM,EAAQ,EAAM,GAAK,OAAO,SAAS,EAAM,GAAI,GAAG,CAAG,IAAA,GACnD,EAAO,EAAM,GAAG,MAAM,CACtB,EAAU,EAAM,GAGhB,EACL,EAAkB,gBAAgB,IAAI,EAAK,WAAW,EAAI,EAAK,WAC1D,EACL,EAAkB,gBAAgB,IAAI,EAAK,SAAS,EAAI,EAAK,SAG1D,EAAa,GAkBjB,MAfmB,CAClB,SACA,SACA,iBACA,kBACA,0BACA,8BACA,CAEc,KAAM,GAAM,EAAK,SAAS,EAAE,CAAC,GAC3C,GAAc,IAGf,EAAa,KAAK,IAAI,EAAY,EAAI,CAE/B,CACN,KAAM,UACN,OACA,KAAM,CACL,WAAY,EAAK,WACjB,SAAU,EAAK,SACf,gBACA,cACA,CACD,aACA,YAAa,EACb,cAAe,EACf,gBAAiB,EACjB,QACA,OACA,UACA,CC1DF,SAAgB,EACf,EACA,EACkB,CAClB,GAAM,CAAE,OAAM,QAAS,EAKjB,EADe,4CACM,KAAK,EAAK,CAErC,GAAI,CAAC,EACJ,MAAU,MAAM,qCAAqC,IAAO,CAG7D,IAAM,EAAY,EAAM,GAClB,EAAS,QAAQ,KAAK,EAAU,CAAG,OAAO,SAAS,EAAW,GAAG,CAAG,EACpE,EAAU,EAAM,GAAG,MAAM,CACzB,EAAO,OAAO,SAAS,EAAM,GAAI,GAAG,CAIpC,EADe,YACa,KAAK,EAAK,CACtC,EAAU,EAAe,OAAO,SAAS,EAAa,GAAI,GAAG,CAAG,IAAA,GAGhE,EACL,EAAkB,gBAAgB,IAAI,EAAK,WAAW,EAAI,EAAK,WAC1D,EACL,EAAkB,gBAAgB,IAAI,EAAK,SAAS,EAAI,EAAK,SAK9D,MAAO,CACN,KAAM,UACN,OACA,KAAM,CACL,WAAY,EAAK,WACjB,SAAU,EAAK,SACf,gBACA,cACA,CACD,WAXkB,GAYlB,YAAa,EACb,cAAe,EACf,gBAAiB,EACjB,SACA,UACA,aAAc,EACd,OACA,UACA,CCxDF,SAAgB,EACf,EACA,EACkB,CAClB,GAAM,CAAE,OAAM,QAAS,EAKjB,EADe,+CACM,KAAK,EAAK,CAErC,GAAI,CAAC,EACJ,MAAU,MAAM,qCAAqC,IAAO,CAG7D,IAAM,EAAO,OAAO,SAAS,EAAM,GAAI,GAAG,CACpC,EAAQ,EAAM,GACd,EAAiB,EAAM,GAGvB,EACL,EAAkB,gBAAgB,IAAI,EAAK,WAAW,EAAI,EAAK,WAC1D,EACL,EAAkB,gBAAgB,IAAI,EAAK,SAAS,EAAI,EAAK,SAK9D,MAAO,CACN,KAAM,UACN,OACA,KAAM,CACL,WAAY,EAAK,WACjB,SAAU,EAAK,SACf,gBACA,cACA,CACD,WAXkB,EAYlB,YAAa,EACb,cAAe,EACf,gBAAiB,EACjB,OACA,QACA,iBACA,CC3CF,SAAgB,EACf,EACA,EACoB,CACpB,GAAM,CAAE,OAAM,QAAS,EAKjB,EADiB,wCACM,KAAK,EAAK,CAEvC,GAAI,CAAC,EACJ,MAAU,MAAM,wCAAwC,IAAO,CAGhE,IAAM,EAAW,OAAO,SAAS,EAAM,GAAI,GAAG,CACxC,EAAY,OAAO,SAAS,EAAM,GAAI,GAAG,CAGzC,EACL,EAAkB,gBAAgB,IAAI,EAAK,WAAW,EAAI,EAAK,WAC1D,EACL,EAAkB,gBAAgB,IAAI,EAAK,SAAS,EAAI,EAAK,SAK9D,MAAO,CACN,KAAM,YACN,OACA,KAAM,CACL,WAAY,EAAK,WACjB,SAAU,EAAK,SACf,gBACA,cACA,CACD,WAXkB,GAYlB,YAAa,EACb,cAAe,EACf,gBAAiB,EACjB,WACA,YACA,CC5CF,SAAgB,EACf,EACA,EAC0B,CAC1B,GAAM,CAAE,OAAM,QAAS,EAKjB,EADuB,0CACM,KAAK,EAAK,CAE7C,GAAI,CAAC,EACJ,MAAU,MAAM,8CAA8C,IAAO,CAGtE,IAAM,EAAY,EAAM,GAClB,EAAS,QAAQ,KAAK,EAAU,CAAG,OAAO,SAAS,EAAW,GAAG,CAAG,EACpE,EAAO,OAAO,SAAS,EAAM,GAAI,GAAG,CAKpC,EADY,wBACU,KAAK,EAAK,CAChC,EAAO,EAAY,OAAO,SAAS,EAAU,GAAI,GAAG,CAAG,IAAA,GAGvD,EACL,EAAkB,gBAAgB,IAAI,EAAK,WAAW,EAAI,EAAK,WAC1D,EACL,EAAkB,gBAAgB,IAAI,EAAK,SAAS,EAAI,EAAK,SAK9D,MAAO,CACN,KAAM,kBACN,OACA,KAAM,CACL,WAAY,EAAK,WACjB,SAAU,EAAK,SACf,gBACA,cACA,CACD,WAXkB,GAYlB,YAAa,EACb,cAAe,EACf,gBAAiB,EACjB,SACA,OACA,OACA,CChFF,SAAgB,EACf,EACA,EAC0B,CAC1B,GAAM,CAAE,OAAM,QAAS,EAIjB,EADY,mCACM,KAAK,EAAK,CAElC,GAAI,CAAC,EACJ,MAAU,MAAM,+CAA+C,IAAO,CAGvE,IAAM,EAAY,EAAM,GAClB,EAAS,QAAQ,KAAK,EAAU,CAAG,OAAO,SAAS,EAAW,GAAG,CAAG,EACpE,EAAO,OAAO,SAAS,EAAM,GAAI,GAAG,CAIpC,EADY,wBACU,KAAK,EAAK,CAChC,EAAO,EAAY,OAAO,SAAS,EAAU,GAAI,GAAG,CAAG,IAAA,GAGvD,EACL,EAAkB,gBAAgB,IAAI,EAAK,WAAW,EAAI,EAAK,WAC1D,EACL,EAAkB,gBAAgB,IAAI,EAAK,SAAS,EAAI,EAAK,SAK9D,MAAO,CACN,KAAM,kBACN,OACA,KAAM,CACL,WAAY,EAAK,WACjB,SAAU,EAAK,SACf,gBACA,cACA,CACD,WAXkB,GAYlB,YAAa,EACb,cAAe,EACf,gBAAiB,EACjB,SACA,OACA,OACA,CCrBF,SAAgB,EACf,EACA,EACa,CACb,GAAM,CAAE,OAAM,QAAS,EAKjB,EADU,oCACM,KAAK,EAAK,CAEhC,GAAI,CAAC,EACJ,MAAU,MAAM,iCAAiC,IAAO,CAIzD,IAAM,EAAU,EAAM,GAAK,OAAO,SAAS,EAAM,GAAI,GAAG,CAAG,IAAA,GAGrD,EACL,EAAkB,gBAAgB,IAAI,EAAK,WAAW,EAAI,EAAK,WAC1D,EACL,EAAkB,gBAAgB,IAAI,EAAK,SAAS,EAAI,EAAK,SAK9D,MAAO,CACN,KAAM,KACN,OACA,KAAM,CACL,WAAY,EAAK,WACjB,SAAU,EAAK,SACf,gBACA,cACA,CACD,WAXkB,EAYlB,YAAa,EACb,cAAe,EACf,gBAAiB,EACjB,UACA,CAmCF,SAAgB,EACf,EACA,EACgB,CAChB,GAAM,CAAE,OAAM,QAAS,EAMjB,EADa,2FACM,KAAK,EAAK,CAEnC,GAAI,CAAC,EACJ,MAAU,MAAM,mCAAmC,IAAO,CAG3D,IAAM,EAAY,EAAM,GAClB,EAAU,EAAM,GAAK,OAAO,SAAS,EAAM,GAAI,GAAG,CAAG,IAAA,GAGrD,EACL,EAAkB,gBAAgB,IAAI,EAAK,WAAW,EAAI,EAAK,WAC1D,EACL,EAAkB,gBAAgB,IAAI,EAAK,SAAS,EAAI,EAAK,SAK9D,MAAO,CACN,KAAM,QACN,OACA,KAAM,CACL,WAAY,EAAK,WACjB,SAAU,EAAK,SACf,gBACA,cACA,CACD,WAXkB,GAYlB,YAAa,EACb,cAAe,EACf,gBAAiB,EACjB,YACA,UACA,CAqCF,SAAgB,EACf,EACA,EACwB,CACxB,GAAM,CAAE,OAAM,QAAS,EAKjB,EADiB,iEACM,KAAK,EAAK,CAEvC,GAAI,CAAC,EACJ,MAAU,MAAM,6CAA6C,IAAO,CAGrE,IAAM,EAAY,EAAM,GAClB,EAAS,QAAQ,KAAK,EAAU,CAAG,OAAO,SAAS,EAAW,GAAG,CAAG,EACpE,EAAW,EAAM,GAAG,MAAM,CAC1B,EAAU,OAAO,SAAS,EAAM,GAAI,GAAG,CAGvC,EACL,EAAkB,gBAAgB,IAAI,EAAK,WAAW,EAAI,EAAK,WAC1D,EACL,EAAkB,gBAAgB,IAAI,EAAK,SAAS,EAAI,EAAK,SAK9D,MAAO,CACN,KAAM,gBACN,OACA,KAAM,CACL,WAAY,EAAK,WACjB,SAAU,EAAK,SACf,gBACA,cACA,CACD,WAXkB,GAYlB,YAAa,EACb,cAAe,EACf,gBAAiB,EACjB,SACA,WACA,UACA,CClOF,SAAgB,EACd,EACA,EACA,EAA0B,SACL,CACrB,IAAM,EAAe,IAAI,IAGnB,EAAuB,CAAC,EAAE,CAC5B,EAEJ,MAAQ,EAAQ,EAAgB,KAAK,EAAK,IAAM,MAE9C,EAAW,KAAK,EAAM,MAAQ,EAAM,GAAG,OAAO,CAGhD,EAAW,KAAK,EAAK,OAAO,CAG5B,IAAK,IAAI,EAAI,EAAG,EAAI,EAAU,OAAQ,IAAK,CAEzC,IAAM,EADW,EAAU,GACI,KAAK,cAGhC,EAAe,EACnB,IAAK,IAAI,EAAI,EAAG,EAAI,EAAW,OAAS,EAAG,IACzC,GAAI,GAAiB,EAAW,IAAM,EAAgB,EAAW,EAAI,GAAI,CACvE,EAAe,EACf,MAIJ,EAAa,IAAI,EAAG,EAAa,CAGnC,OAAO,EAYT,SAAgB,EACd,EACA,EACA,EACA,EACS,CACT,GAAI,IAAa,OAEf,MAAO,GAIT,IAAM,EAAsB,EAAa,IAAI,EAAgB,CACvD,EAAmB,EAAa,IAAI,EAAa,CASvD,OANI,IAAwB,IAAA,IAAa,IAAqB,IAAA,GACrD,GAKF,IAAwB,ECpEjC,SAAgB,EAAoB,EAAW,EAAmB,CAEhE,GAAI,EAAE,SAAW,EAAG,OAAO,EAAE,OAC7B,GAAI,EAAE,SAAW,EAAG,OAAO,EAAE,OAI7B,IAAM,EAAiB,MAAM,KAAK,CAAE,OAAQ,EAAE,OAAS,EAAG,KACxD,MAAM,EAAE,OAAS,EAAE,CAAC,KAAK,EAAE,CAC5B,CAGD,IAAK,IAAI,EAAI,EAAG,GAAK,EAAE,OAAQ,IAC7B,EAAG,GAAG,GAAK,EAEb,IAAK,IAAI,EAAI,EAAG,GAAK,EAAE,OAAQ,IAC7B,EAAG,GAAG,GAAK,EAIb,IAAK,IAAI,EAAI,EAAG,GAAK,EAAE,OAAQ,IAC7B,IAAK,IAAI,EAAI,EAAG,GAAK,EAAE,OAAQ,IACzB,EAAE,EAAI,KAAO,EAAE,EAAI,GAErB,EAAG,GAAG,GAAK,EAAG,EAAI,GAAG,EAAI,GAMzB,EAAG,GAAG,GAAK,EAAI,KAAK,IAClB,EAAG,EAAI,GAAG,GACV,EAAG,GAAG,EAAI,GACV,EAAG,EAAI,GAAG,EAAI,GACf,CAKP,OAAO,EAAG,EAAE,QAAQ,EAAE,QAgBxB,SAAgB,EAA8B,EAAW,EAAmB,CAE1E,IAAM,EAAS,EAAE,aAAa,CACxB,EAAS,EAAE,aAAa,CAGxB,EAAW,EAAoB,EAAQ,EAAO,CAG9C,EAAY,KAAK,IAAI,EAAO,OAAQ,EAAO,OAAO,CAIxD,OAHI,IAAc,EAAU,EAGrB,EAAI,EAAW,ECtDxB,IAAa,EAAb,KAA8B,CAa5B,YACE,EACA,EACA,EAA6B,EAAE,CAC/B,CACA,KAAK,UAAY,EACjB,KAAK,KAAO,EAGZ,KAAK,QAAU,CACb,cAAe,EAAQ,eAAiB,YACxC,qBAAsB,EAAQ,sBAAwB,GACtD,yBAA0B,EAAQ,0BAA4B,SAC9D,mBAAoB,EAAQ,oBAAsB,GAClD,oBAAqB,EAAQ,qBAAuB,GACpD,sBAAuB,EAAQ,uBAAyB,GACxD,iBAAkB,EAAQ,kBAAoB,GAC/C,CAGD,KAAK,QAAU,CACb,cAAe,EACf,aAAc,EACd,iBAAkB,IAAA,GAClB,oBAAqB,IAAI,IACzB,aAAc,IAAI,IACnB,CAGG,KAAK,QAAQ,uBACf,KAAK,QAAQ,aAAe,EAC1B,EACA,EACA,KAAK,QAAQ,yBACd,EASL,SAA8B,CAC5B,IAAM,EAA+B,EAAE,CAEvC,IAAK,IAAI,EAAI,EAAG,EAAI,KAAK,UAAU,OAAQ,IAAK,CAC9C,KAAK,QAAQ,cAAgB,EAC7B,IAAM,EAAW,KAAK,UAAU,GAG5B,EAEJ,OAAQ,EAAS,KAAjB,CACE,IAAK,KACH,EAAa,KAAK,UAAU,EAAS,CACrC,MACF,IAAK,QACH,EAAa,KAAK,aAAa,EAAS,CACxC,MACF,IAAK,gBACH,EAAa,KAAK,qBAAqB,EAAS,CAChD,MACF,QAEM,EAAe,EAAS,GAC1B,KAAK,QAAQ,iBAAmB,EAChC,KAAK,kBAAkB,EAAU,EAAE,EAErC,MAKJ,EAAS,KAAK,CACZ,GAAG,EACH,aACD,CAAqB,CAGxB,OAAO,EAMT,UAAkB,EAAqD,CACrE,IAAM,EAAe,KAAK,QAAQ,cAG9B,EACJ,IAAK,IAAI,EAAI,EAAe,EAAG,GAAK,EAAG,IAErC,GADkB,KAAK,UAAU,GACnB,OAAS,OAAQ,CAC7B,EAAkB,EAClB,MAcJ,OATI,IAAoB,IAAA,GACf,KAAK,oBAAoB,wCAAwC,CAIrE,KAAK,cAAc,EAAiB,EAAa,CAI/C,CACL,WAAY,EACZ,WAAY,EACb,CANQ,KAAK,oBAAoB,6CAA6C,CAYjF,aAAqB,EAAuD,CAC1E,IAAM,EAAe,KAAK,QAAQ,cAC5B,EAAkB,KAAK,mBAAmB,EAAS,UAAU,CAG/D,EAEJ,IAAK,GAAM,CAAC,EAAW,KAAkB,KAAK,QAAQ,oBAAqB,CAEzE,GAAI,CAAC,KAAK,cAAc,EAAe,EAAa,CAClD,SAIF,IAAM,EAAa,EAA8B,EAAiB,EAAU,EAGxE,CAAC,GAAa,EAAa,EAAU,cACvC,EAAY,CAAE,MAAO,EAAe,aAAY,EAKpD,GAAI,CAAC,EACH,OAAO,KAAK,oBAAoB,kCAAkC,CAGpE,GAAI,EAAU,WAAa,KAAK,QAAQ,oBACtC,OAAO,KAAK,oBACV,yBAAyB,EAAU,WAAW,QAAQ,EAAE,CAAC,mBAAmB,KAAK,QAAQ,sBAC1F,CAIH,IAAM,EAAqB,EAAE,CAK7B,OAJI,EAAU,WAAa,GACzB,EAAS,KAAK,2BAA2B,EAAU,WAAW,QAAQ,EAAE,GAAG,CAGtE,CACL,WAAY,EAAU,MACtB,WAAY,EAAU,WACtB,SAAU,EAAS,OAAS,EAAI,EAAW,IAAA,GAC5C,CAMH,qBAA6B,EAA+D,CAC1F,IAAM,EAAe,KAAK,QAAQ,cAGlC,IAAK,IAAI,EAAI,EAAe,EAAG,GAAK,EAAG,IAAK,CAC1C,IAAM,EAAY,KAAK,UAAU,GAG7B,KAAU,OAAS,QAMrB,EAAU,SAAW,EAAS,QAC9B,KAAK,kBAAkB,EAAU,SAAS,GAAK,KAAK,kBAAkB,EAAS,SAAS,CAQxF,OALK,KAAK,cAAc,EAAG,EAAa,CAKjC,CACL,WAAY,EACZ,WAAY,IACb,CAPQ,KAAK,oBAAoB,2CAA2C,CAWjF,OAAO,KAAK,oBAAoB,uCAAuC,CAQzE,kBAA0B,EAAoB,EAAqB,CAEjE,GAAI,EAAS,OAAS,SAGhB,EAAS,qBACX,KAAK,QAAQ,oBAAoB,IAAI,EAAS,oBAAqB,EAAM,CAEvE,EAAS,qBACX,KAAK,QAAQ,oBAAoB,IAAI,EAAS,oBAAqB,EAAM,CAIvE,CAAC,EAAS,qBAAuB,CAAC,EAAS,qBAAqB,CAClE,IAAM,EAAY,KAAK,iBAAiB,EAAS,CACjD,GAAI,EAAW,CACb,IAAM,EAAa,KAAK,mBAAmB,EAAU,CACrD,KAAK,QAAQ,oBAAoB,IAAI,EAAY,EAAM,GAU/D,iBAAyB,EAAgD,CAKvE,IAAM,EAAgB,EAAS,KAAK,cAE9B,EAAgB,KAAK,IAAI,EAAG,EAAgB,IAAI,CAChD,EAAa,KAAK,KAAK,UAAU,EAAe,EAAc,CAI9D,EAAS,EAAW,MAAM,4FAA4F,CAC5H,GAAI,EACF,OAAO,KAAK,iBAAiB,EAAO,GAAG,MAAM,CAAC,CAIhD,IAAM,EAAc,EAAW,MAAM,8CAA8C,CACnF,GAAI,EACF,OAAO,KAAK,iBAAiB,EAAY,GAAG,MAAM,CAAC,CAUvD,iBAAyB,EAAsB,CAC7C,IAAM,EAAW,EAAK,QAAQ,iFAAkF,GAAG,CAAC,MAAM,CAE1H,OAAO,EAAS,OAAS,EAAI,EAAW,EAM1C,mBAA2B,EAAsB,CAC/C,OAAO,EACJ,aAAa,CACb,QAAQ,OAAQ,IAAI,CACpB,MAAM,CAMX,kBAA0B,EAA0B,CAClD,OAAO,EACJ,aAAa,CACb,QAAQ,OAAQ,GAAG,CACnB,QAAQ,MAAO,GAAG,CAMvB,cAAsB,EAAyB,EAA+B,CAC5E,OAAO,EACL,EACA,EACA,KAAK,QAAQ,aACb,KAAK,QAAQ,cACd,CAMH,oBAA4B,EAA8C,CACxE,GAAI,KAAK,QAAQ,iBACf,MAAO,CACL,WAAY,IAAA,GACZ,cAAe,EACf,WAAY,EACb,GClUP,SAAgB,EACd,EACA,EACA,EACoB,CAEpB,OADiB,IAAI,EAAiB,EAAW,EAAM,EAAQ,CAC/C,SAAS,CCkB3B,SAAgB,GACf,EACA,EAAc,GACU,CACxB,IAAM,EAAiB,IAAI,IAG3B,GAAI,EAAO,SAAW,GAAK,IAAgB,GAC1C,OAAO,EAIR,IAAM,EAAkB,IAAI,IAE5B,IAAK,IAAI,EAAI,EAAG,EAAI,EAAO,OAAQ,IAAK,CACvC,IAAM,EAAU,EAAO,GAQvB,GALI,EAAQ,OAAS,QAKjB,EAAgB,IAAI,EAAE,CACzB,SAGD,IAAM,EAA6B,EAAE,CAIrC,IAAK,IAAI,EAAI,EAAI,EAAG,EAAI,EAAO,OAAQ,IAAK,CAC3C,IAAM,EAAY,EAAO,GAGzB,GAAI,EAAU,OAAS,OACtB,MAKD,IAAM,GADY,IAAM,EAAI,EAAI,EAAU,EAAO,EAAI,IAC1B,KAAK,SAC1B,EAAS,EAAU,KAAK,WAK9B,GADgB,EAAS,EACX,GACb,MAID,IAAM,EAAU,EAAY,UAAU,EAAU,EAAO,CAGvD,GAAI,CAAC,EAAQ,SAAS,IAAI,CACzB,MAKD,IAAM,EAAa,EAAQ,QAAQ,IAAI,CAiBvC,GAhB2B,EAAQ,OAAS,EAAa,EAEhC,GAQL,EAAY,UAAU,EAAQ,KAAK,SAAU,EAAU,KAAK,SAAS,CACzE,SAAS,IAAI,EAKzB,CAAC,GAAuB,EAAa,EAAU,KAAK,SAAS,CAChE,MAID,EAAiB,KAAK,EAAE,CACxB,EAAgB,IAAI,EAAE,CAInB,EAAiB,OAAS,GAC7B,EAAe,IAAI,EAAG,EAAiB,CAIzC,OAAO,EAaR,SAAS,GAAuB,EAAqB,EAA2B,CAE/E,IAAM,EAAa,EAAY,UAAU,EAAU,EAAW,IAAI,CAG5D,EAAY,EAAW,QAAQ,IAAI,CACzC,GAAI,IAAc,GACjB,MAAO,GAIR,IAAI,EAAQ,EACZ,IAAK,IAAI,EAAI,EAAW,EAAI,EAAW,OAAQ,IAC9C,GAAI,EAAW,KAAO,IACrB,YACU,EAAW,KAAO,MAC5B,IACI,IAAU,GAEb,MAAO,GAKV,MAAO,GCrBR,SAAgB,EACf,EACA,EACkC,CAClC,IAAM,EAAY,YAAY,KAAK,CAG7B,CAAE,UAAS,oBAAmB,YAAa,EAChD,EACA,GAAS,SACT,CAWK,EAAS,EAAS,EAPJ,GAAS,UAAY,CACxC,GAAG,EACH,GAAG,EACH,GAAG,EACH,GAAG,EACH,GAAG,EACH,CAC4C,CAKvC,EAAoC,EAAE,CACtC,EAAgB,IAAI,IAIpB,EAAgB,EAAQ,OAAS,MAEvC,IAAK,IAAM,KAAS,EAAQ,CAC3B,IAAM,EAAS,EACX,EAAM,KAAK,YAAc,GAAM,EAAM,KAAK,SAC3C,GAAG,EAAM,KAAK,WAAW,GAAG,EAAM,KAAK,WACrC,EAAc,IAAI,EAAO,GAC7B,EAAc,IAAI,EAAO,CACzB,EAAmB,KAAK,EAAM,EAMhC,IAAM,EAAiB,GAAwB,EAAoB,EAAQ,CAGrE,EAAwB,EAAE,CAChC,IAAK,IAAI,EAAI,EAAG,EAAI,EAAmB,OAAQ,IAAK,CACnD,IAAM,EAAQ,EAAmB,GAC7B,EAEJ,OAAQ,EAAM,KAAd,CACC,IAAK,OAEJ,AAOC,EAPG,EAAM,YAAc,MAAQ,EAAM,YAAc,OACxC,EAAU,EAAO,EAAkB,CACpC,EAAM,YAAc,QACnB,EAAa,EAAO,EAAkB,CACvC,EAAM,YAAc,gBACnB,EAAqB,EAAO,EAAkB,CAE9C,EAAY,EAAO,EAAmB,EAAQ,CAE1D,MACD,IAAK,UACJ,EAAW,EAAe,EAAO,EAAkB,CACnD,MACD,IAAK,UACJ,EAAW,EAAe,EAAO,EAAkB,CACnD,MACD,IAAK,UACJ,EAAW,EAAe,EAAO,EAAkB,CACnD,MACD,IAAK,YACJ,EAAW,EAAiB,EAAO,EAAkB,CACrD,MACD,IAAK,kBACJ,EAAW,EAAuB,EAAO,EAAkB,CAC3D,MACD,IAAK,kBACJ,EAAW,EAAuB,EAAO,EAAkB,CAC3D,MACD,QAEC,SAYF,GARI,EAAS,OAAS,IACrB,EAAS,SAAW,CAAC,GAAI,EAAS,UAAY,EAAE,CAAG,GAAG,EAAS,EAIhE,EAAS,cAAgB,YAAY,KAAK,CAAG,EAGzC,EAAS,OAAS,OAAQ,CAE7B,IAAM,EAAY,EAAe,IAAI,EAAE,CACjC,EAAc,MAAM,KAAK,EAAe,QAAQ,CAAC,CAAC,KAAK,GAAe,EAAY,SAAS,EAAE,CAAC,CAEpG,GAAI,GAAa,EAAa,CAE7B,IAAI,EAAe,EACnB,GAAI,OAEE,GAAM,CAAC,EAAS,KAAgB,EAAe,SAAS,CAC5D,GAAI,EAAY,SAAS,EAAE,CAAE,CAC5B,EAAe,EACf,OAMH,IAAM,EAAe,EAAmB,GAGlC,EAAQ,yBAAyB,KAAK,EAAa,KAAK,CAC9D,GAAI,EAAO,CACV,GAAM,EAAG,EAAQ,EAAU,GAAQ,EAInC,GAHA,EAAS,QAAU,GAAG,EAAO,GAAG,EAAS,QAAQ,OAAQ,IAAI,CAAC,GAAG,IAG7D,EAAW,CACd,IAAM,EAAmB,EAAe,IAAI,EAAE,CAC9C,EAAS,kBAAoB,EAAiB,IAAI,GAAU,CAC3D,IAAM,EAAW,EAAmB,GAC9B,EAAW,yBAAyB,KAAK,EAAS,KAAK,CAC7D,GAAI,EAAU,CACb,GAAM,EAAG,EAAQ,EAAQ,GAAW,EACpC,MAAO,CACN,OAAQ,QAAQ,KAAK,EAAO,CAAG,OAAO,SAAS,EAAQ,GAAG,CAAG,EAC7D,SAAU,EACV,KAAM,OAAO,SAAS,EAAS,GAAG,CAClC,CAEF,MAAO,CAAE,OAAQ,EAAG,SAAU,GAAI,KAAM,EAAG,EAC1C,IAMN,EAAU,KAAK,EAAS,CAQzB,OAJI,GAAS,QACL,EAAiB,EAAW,EAAM,EAAQ,kBAAkB,CAG7D,EA2BR,eAAsB,GACrB,EACA,EAC2C,CAG3C,OAAO,EAAiB,EAAM,EAAQ"}
|