flappa-doormal 2.2.1 → 2.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/AGENTS.md CHANGED
@@ -187,8 +187,8 @@ interface SegmentationOptions {
187
187
  ```typescript
188
188
  segmentPages(pages, {
189
189
  rules: [
190
- { lineStartsWith: ['{{basmalah}}'], split: 'at' },
191
- { lineStartsWith: ['{{bab}}'], split: 'at', meta: { type: 'chapter' } },
190
+ { lineStartsWith: ['{{basmalah}}'] }, // split defaults to 'at'
191
+ { lineStartsWith: ['{{bab}}'], meta: { type: 'chapter' } },
192
192
  ],
193
193
  maxPages: 2,
194
194
  breakpoints: ['{{tarqim}}\\s*', '\\n', ''], // Try: punctuation → newline → page boundary
package/README.md CHANGED
@@ -510,7 +510,7 @@ type SplitRule = {
510
510
  regex?: string;
511
511
 
512
512
  // Split behavior
513
- split: 'at' | 'after';
513
+ split?: 'at' | 'after'; // Default: 'at'
514
514
  occurrence?: 'first' | 'last' | 'all';
515
515
  maxSpan?: number;
516
516
  fuzzy?: boolean;
package/dist/index.d.mts CHANGED
@@ -229,8 +229,9 @@ type SplitBehavior = {
229
229
  * Where to split relative to the match.
230
230
  * - `'at'`: New segment starts at the match position
231
231
  * - `'after'`: New segment starts after the match ends
232
+ * @default 'at'
232
233
  */
233
- split: 'at' | 'after';
234
+ split?: 'at' | 'after';
234
235
  /**
235
236
  * Which occurrence(s) to split on.
236
237
  * - `'all'`: Split at every match (default)
@@ -366,14 +367,13 @@ type RuleConstraints = {
366
367
  * Each rule must specify:
367
368
  * - **Pattern** (exactly one): `regex`, `template`, `lineStartsWith`,
368
369
  * `lineStartsAfter`, or `lineEndsWith`
369
- * - **Split behavior**: `split` (required), `occurrence`, `maxSpan`, `fuzzy`
370
+ * - **Split behavior**: `split` (optional, defaults to `'at'`), `occurrence`, `maxSpan`, `fuzzy`
370
371
  * - **Constraints** (optional): `min`, `max`, `meta`
371
372
  *
372
373
  * @example
373
- * // Basic rule: split at markdown headers
374
+ * // Basic rule: split at markdown headers (split defaults to 'at')
374
375
  * const rule: SplitRule = {
375
376
  * lineStartsWith: ['## ', '### '],
376
- * split: 'at',
377
377
  * meta: { type: 'section' }
378
378
  * };
379
379
  *
@@ -381,7 +381,6 @@ type RuleConstraints = {
381
381
  * // Advanced rule: extract hadith numbers with fuzzy Arabic matching
382
382
  * const rule: SplitRule = {
383
383
  * lineStartsAfter: ['{{raqms:hadithNum}} {{dash}} '],
384
- * split: 'at',
385
384
  * fuzzy: true,
386
385
  * min: 5,
387
386
  * max: 500,
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.mts","names":[],"sources":["../src/segmentation/fuzzy.ts","../src/segmentation/types.ts","../src/segmentation/segmenter.ts","../src/segmentation/textUtils.ts","../src/segmentation/tokens.ts","../src/pattern-detection.ts"],"sourcesContent":[],"mappings":";;AAkEA;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA0FzB;AAAkD;AA4GlD;;;;;AAkBA;AAqCA;AA0EY,cD1aC,WC0aqB,EAAA,CAAA,CAAA,EAAA,MAAc,EAAA,GAAA,MAAA;AA8BhD;AAiDA;;;;;AAiHA;;;;AC2JA;;;;;;;;ACj0BA;AAcA;;;;ACgDA;AAkNA;AA2CA;AAWA;AA2DA;AAqGA;AAuBA;AAqBA;AAgBA;;;;AC5hBY,cLqJC,wBKrJc,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GAAA,MAAA;;;;ALsD3B;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA0FzB;AAAkD;AA4GlD;;;;;AAkBA,KA/VK,YAAA,GA+VW;EAqCJ;EA0EA,KAAA,EAAA,MAAU;AA8BtB,CAAA;AAiDA;;;;;AAiHA;;;;AC2JA;;;;;;;;ACj0BA;AAcA;;;;ACgDA;AAkNA,KH5NK,eAAA,GGgOJ;EAuCY;EAWD,QAAA,EAAA,MAAY;AA2DxB,CAAA;AAqGA;AAuBA;AAqBA;AAgBA;;;;AC5hBA;AA0DA;AA4DA;AAuBA;AAiCA;;;;;;;;;;;;;;KJlGK,qBAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAiCA,sBAAA;;;;;;;;;;;;;;;;;;;;;;;KAwBA,mBAAA;;;;;;;;;;;;;;KAeA,WAAA,GACC,eACA,kBACA,wBACA,yBACA;;;;;;;KAYD,aAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KA8EO,SAAA;;;;;;;KAYP,eAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAyCS;;;;;;;;;;;;SAaH;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KA0CC,SAAA,GAAY,cAAc,gBAAgB;;;;;;;;;;;;;KAkB1C,IAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAqCA,cAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAqCE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAqCF,UAAA,YAAsB;;;;;;;;;;;;;;;;;;;;;;;;;UA8BjB,MAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAiDL,mBAAA;;;;;;;;UAQA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;gBA8CM;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;WA0CL;;;;;;;;;;;;;;;;KAiBD,OAAA;;;;;;;;;;;;;;;;;;;;;;;;;;SA6BD;;;;AAhlBgB;AAwBH;;;;;;;AAoBC;AA0FzB;AAAkD;AA4GlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;;AAiHA;;;;AC2JA;;;;;;;;ACj0BA;AAcA;;;;ACgDa,cFmwBA,YE1vBZ,EAAA,CAAA,KAAA,EF0vBmC,IE1vBnC,EAAA,EAAA,OAAA,EF0vBoD,mBE1vBpD,EAAA,GF0vB0E,OE1vB1E,EAAA;;;;AJXD;AA+FA;;;;ACnIK,cExBQ,aFwBI,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GAAA,MAAA;AAAA;AA4BG;AA8BM;AAiCC;AAwBH;;;;;AAoBlB,cEjJO,oBFiJP,EAAA,CAAA,OAAA,EAAA,MAAA,EAAA,GAAA,MAAA;;;;ADnGN;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA0FzB;AAAkD;AA4GlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;;AAiHA;;;;AC2JA;;;;;;;;ACj0BA;AAcA;;;;ACgDA;AAkNA;AA2CA;AAWA;AA2DA;AAqGA;AAuBA;AAqBA;AAgBA;;;cApea;ACxDb;AA0DA;AA4DA;AAuBA;AAiCA;;;;;;;;;;;;;;;;;;;;;cD4Fa,gBAAgB;;;;;;;;;;;;;;;;cA2ChB;;;;;;;KAWD,YAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;cA2DC,2FAA0F;;;;;;;;;;;;;;;;;;;;cAqG1F;;;;;;;;;;;;;;;;;;;;;;cAuBA,uCAAsC;;;;;;;;;;;;;cAqBtC;;;;;;;;;;;;;;;cAgBA;;;;AJteb;AA+FA;;;;;ACnIiB;AA4BG;AA+Df,KI7GO,eAAA,GJ6Ge;EAwBtB;EAeA,KAAA,EAAA,MAAA;EACC;EACA,KAAA,EAAA,MAAA;EACA;EACA,KAAA,EAAA,MAAA;EACA;EAAmB,QAAA,EAAA,MAAA;AAAA,CAAA;AA0FzB;AAAkD;AA4GlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;AAgGmB,cIrlBN,mBJqlBM,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GIrlBgC,eJqlBhC,EAAA;AAiBnB;;;;AC2JA;;;;;;;;ACj0BA;AAca,cE8GA,wBF7GyD,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,QAAA,EE6GL,eF7GK,EAAA,EAAA,GAAA,MAAA;;;;AC+CtE;AAkNA;AA2CA;AAWY,cCnLC,oBDmLW,EAAA,CAAA,QAAA,EClLV,eDkLU,EAAA,EAAA,GAAA;EA2DX,WAAA,EAAA,gBAgFZ,GAAA,iBAhFsG;EAqG1F,KAAA,EAAA,OAAA;EAuBA,QAAA,CAAA,EAAA,MAAA;AAqBb,CAAA;AAgBA;;;;AC5hBA;AA0DA;AA4Da,cAwDA,kBAzCZ,EAAA,CAAA,IAfgE,EAAA,MAAA,EAAA,GAAe;EAuBnE,QAAA,EAAA,MAAA;EAiCA,WAAA,EAAA,gBAmBZ,GAAA,iBAZ4B;;;YAAf"}
1
+ {"version":3,"file":"index.d.mts","names":[],"sources":["../src/segmentation/fuzzy.ts","../src/segmentation/types.ts","../src/segmentation/segmenter.ts","../src/segmentation/textUtils.ts","../src/segmentation/tokens.ts","../src/pattern-detection.ts"],"sourcesContent":[],"mappings":";;AAkEA;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA2FzB;AAAkD;AA0GlD;;;;;AAkBA;AAqCA;AA0EY,cDzaC,WCyaqB,EAAA,CAAA,CAAA,EAAA,MAAc,EAAA,GAAA,MAAA;AA8BhD;AAiDA;;;;;AAiHA;;;;AC4JA;;;;;;;;ACj0BA;AAcA;;;;ACgDA;AAkNA;AA2CA;AAWA;AA2DA;AAmHA;AAuBA;AAqBA;AAgBA;;;;AC1iBY,cLqJC,wBKrJc,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GAAA,MAAA;;;;ALsD3B;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA2FzB;AAAkD;AA0GlD;;;;;AAkBA,KA9VK,YAAA,GA8VW;EAqCJ;EA0EA,KAAA,EAAA,MAAU;AA8BtB,CAAA;AAiDA;;;;;AAiHA;;;;AC4JA;;;;;;;;ACj0BA;AAcA;;;;ACgDA;AAkNA,KH5NK,eAAA,GGgOJ;EAuCY;EAWD,QAAA,EAAA,MAAY;AA2DxB,CAAA;AAmHA;AAuBA;AAqBA;AAgBA;;;;AC1iBA;AA0DA;AA4DA;AAuBA;AAiCA;;;;;;;;;;;;;;KJlGK,qBAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAiCA,sBAAA;;;;;;;;;;;;;;;;;;;;;;;KAwBA,mBAAA;;;;;;;;;;;;;;KAeA,WAAA,GACC,eACA,kBACA,wBACA,yBACA;;;;;;;KAYD,aAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KA+EO,SAAA;;;;;;;KAYP,eAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAyCS;;;;;;;;;;;;SAaH;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAwCC,SAAA,GAAY,cAAc,gBAAgB;;;;;;;;;;;;;KAkB1C,IAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAqCA,cAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAqCE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAqCF,UAAA,YAAsB;;;;;;;;;;;;;;;;;;;;;;;;;UA8BjB,MAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAiDL,mBAAA;;;;;;;;UAQA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;gBA8CM;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;WA0CL;;;;;;;;;;;;;;;;KAiBD,OAAA;;;;;;;;;;;;;;;;;;;;;;;;;;SA6BD;;;;AA/kBgB;AAwBH;;;;;;;AAoBC;AA2FzB;AAAkD;AA0GlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;;AAiHA;;;;AC4JA;;;;;;;;ACj0BA;AAcA;;;;ACgDa,cFmwBA,YE1vBZ,EAAA,CAAA,KAAA,EF0vBmC,IE1vBnC,EAAA,EAAA,OAAA,EF0vBoD,mBE1vBpD,EAAA,GF0vB0E,OE1vB1E,EAAA;;;;AJXD;AA+FA;;;;ACnIK,cExBQ,aFwBI,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GAAA,MAAA;AAAA;AA4BG;AA8BM;AAiCC;AAwBH;;;;;AAoBlB,cEjJO,oBFiJP,EAAA,CAAA,OAAA,EAAA,MAAA,EAAA,GAAA,MAAA;;;;ADnGN;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA2FzB;AAAkD;AA0GlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;;AAiHA;;;;AC4JA;;;;;;;;ACj0BA;AAcA;;;;ACgDA;AAkNA;AA2CA;AAWA;AA2DA;AAmHA;AAuBA;AAqBA;AAgBA;;;cAlfa;ACxDb;AA0DA;AA4DA;AAuBA;AAiCA;;;;;;;;;;;;;;;;;;;;;cD4Fa,gBAAgB;;;;;;;;;;;;;;;;cA2ChB;;;;;;;KAWD,YAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;cA2DC,2FAA0F;;;;;;;;;;;;;;;;;;;;cAmH1F;;;;;;;;;;;;;;;;;;;;;;cAuBA,uCAAmC;;;;;;;;;;;;;cAqBnC;;;;;;;;;;;;;;;cAgBA;;;;AJpfb;AA+FA;;;;;ACnIiB;AA4BG;AA+Df,KI7GO,eAAA,GJ6Ge;EAwBtB;EAeA,KAAA,EAAA,MAAA;EACC;EACA,KAAA,EAAA,MAAA;EACA;EACA,KAAA,EAAA,MAAA;EACA;EAAmB,QAAA,EAAA,MAAA;AAAA,CAAA;AA2FzB;AAAkD;AA0GlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;AAgGmB,cIplBN,mBJolBM,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GIplBgC,eJolBhC,EAAA;AAiBnB;;;;AC4JA;;;;;;;;ACj0BA;AAca,cE8GA,wBF7GyD,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,QAAA,EE6GL,eF7GK,EAAA,EAAA,GAAA,MAAA;;;;AC+CtE;AAkNA;AA2CA;AAWY,cCnLC,oBDmLW,EAAA,CAAA,QAAA,EClLV,eDkLU,EAAA,EAAA,GAAA;EA2DX,WAAA,EAAA,gBA8FZ,GAAA,iBA9FsG;EAmH1F,KAAA,EAAA,OAAA;EAuBA,QAAA,CAAA,EAAA,MAAA;AAqBb,CAAA;AAgBA;;;;AC1iBA;AA0DA;AA4Da,cAwDA,kBAzCZ,EAAA,CAfgE,IAAA,EAAA,MAAA,EAAA,GAAe;EAuBnE,QAAA,EAAA,MAAA;EAiCA,WAAA,EAAA,gBAmBZ,GAZa,iBAAe;;;YAAf"}
package/dist/index.mjs CHANGED
@@ -716,13 +716,21 @@ const escapeTemplateBrackets = (pattern) => {
716
716
  */
717
717
  const BASE_TOKENS = {
718
718
  bab: "باب",
719
- basmalah: "بسم الله|﷽",
719
+ basmalah: ["بسم الله", "﷽"].join("|"),
720
720
  bullet: "[•*°]",
721
721
  dash: "[-–—ـ]",
722
- fasl: "فصل|مسألة",
722
+ fasl: ["مسألة", "فصل"].join("|"),
723
723
  harf: "[أ-ي]",
724
724
  kitab: "كتاب",
725
- naql: "حدثنا|أخبرنا|حدثني|وحدثنا|أنبأنا|سمعت",
725
+ naql: [
726
+ "حدثني",
727
+ "وأخبرنا",
728
+ "حدثنا",
729
+ "سمعت",
730
+ "أنبأنا",
731
+ "وحدثنا",
732
+ "أخبرنا"
733
+ ].join("|"),
726
734
  raqm: "[\\u0660-\\u0669]",
727
735
  raqms: "[\\u0660-\\u0669]+",
728
736
  tarqim: "[.!?؟؛]"
@@ -858,6 +866,16 @@ const containsTokens = (query) => {
858
866
  */
859
867
  const expandTokensWithCaptures = (query, fuzzyTransform) => {
860
868
  const captureNames = [];
869
+ const captureNameCounts = /* @__PURE__ */ new Map();
870
+ /**
871
+ * Gets a unique capture name, appending _2, _3, etc. for duplicates.
872
+ * This prevents invalid regex with duplicate named groups.
873
+ */
874
+ const getUniqueCaptureName = (baseName) => {
875
+ const count = captureNameCounts.get(baseName) ?? 0;
876
+ captureNameCounts.set(baseName, count + 1);
877
+ return count === 0 ? baseName : `${baseName}_${count + 1}`;
878
+ };
861
879
  const segments = [];
862
880
  let lastIndex = 0;
863
881
  TOKEN_WITH_CAPTURE_REGEX.lastIndex = 0;
@@ -887,15 +905,17 @@ const expandTokensWithCaptures = (query, fuzzyTransform) => {
887
905
  if (!tokenMatch) return segment.value;
888
906
  const [, tokenName, captureName] = tokenMatch;
889
907
  if (!tokenName && captureName) {
890
- captureNames.push(captureName);
891
- return `(?<${captureName}>.+)`;
908
+ const uniqueName = getUniqueCaptureName(captureName);
909
+ captureNames.push(uniqueName);
910
+ return `(?<${uniqueName}>.+)`;
892
911
  }
893
912
  let tokenPattern = TOKEN_PATTERNS[tokenName];
894
913
  if (!tokenPattern) return segment.value;
895
914
  if (fuzzyTransform) tokenPattern = tokenPattern.split("|").map((part) => /[\u0600-\u06FF]/.test(part) ? fuzzyTransform(part) : part).join("|");
896
915
  if (captureName) {
897
- captureNames.push(captureName);
898
- return `(?<${captureName}>${tokenPattern})`;
916
+ const uniqueName = getUniqueCaptureName(captureName);
917
+ captureNames.push(uniqueName);
918
+ return `(?<${uniqueName}>${tokenPattern})`;
899
919
  }
900
920
  return tokenPattern;
901
921
  });
@@ -1514,7 +1534,7 @@ const segmentPages = (pages, options) => {
1514
1534
  splitPoints.push({
1515
1535
  capturedContent: isLineStartsAfter ? void 0 : m.captured,
1516
1536
  contentStartOffset: isLineStartsAfter ? markerLength : void 0,
1517
- index: rule.split === "at" ? m.start : m.end,
1537
+ index: (rule.split ?? "at") === "at" ? m.start : m.end,
1518
1538
  meta: rule.meta,
1519
1539
  namedCaptures: m.namedCaptures
1520
1540
  });
@@ -1 +1 @@
1
- {"version":3,"file":"index.mjs","names":["EQUIV_GROUPS: string[][]","seg: Segment","processPattern","first: { index: number; length: number } | undefined","last: { index: number; length: number } | undefined","namedCaptures: Record<string, string>","BASE_TOKENS: Record<string, string>","COMPOSITE_TOKENS: Record<string, string>","TOKEN_PATTERNS: Record<string, string>","captureNames: string[]","segments: Array<{ type: 'token' | 'text'; value: string }>","match: RegExpExecArray | null","s: {\n lineStartsWith?: string[];\n lineStartsAfter?: string[];\n lineEndsWith?: string[];\n template?: string;\n regex?: string;\n }","allCaptureNames: string[]","boundaries: PageBoundary[]","pageBreaks: number[]","parts: string[]","matches: MatchResult[]","result: MatchResult","result: number[]","pageIds","expandedBreakpoints","cumulativeOffsets","cumulativeOffsets: number[]","result: Segment[]","breakpointCtx: BreakpointContext","splitPoints: SplitPoint[]","initialSeg: Segment","createSegment","seg: Segment","segments: Segment[]","TOKEN_PRIORITY_ORDER: string[]","results: DetectedPattern[]","coveredRanges: Array<[number, number]>","match: RegExpExecArray | null"],"sources":["../src/segmentation/fuzzy.ts","../src/segmentation/breakpoint-utils.ts","../src/segmentation/match-utils.ts","../src/segmentation/textUtils.ts","../src/segmentation/tokens.ts","../src/segmentation/segmenter.ts","../src/pattern-detection.ts"],"sourcesContent":["/**\n * Fuzzy matching utilities for Arabic text.\n *\n * Provides diacritic-insensitive and character-equivalence matching for Arabic text.\n * This allows matching text regardless of:\n * - Diacritical marks (harakat/tashkeel): فَتْحَة، ضَمَّة، كَسْرَة، سُكُون، شَدَّة، تَنْوين\n * - Character equivalences: ا↔آ↔أ↔إ, ة↔ه, ى↔ي\n *\n * @module fuzzy\n *\n * @example\n * // Make a pattern diacritic-insensitive\n * const pattern = makeDiacriticInsensitive('حدثنا');\n * new RegExp(pattern, 'u').test('حَدَّثَنَا') // → true\n */\n\n/**\n * Character class matching all Arabic diacritics (Tashkeel/Harakat).\n *\n * Includes the following diacritical marks:\n * - U+064B: ً (fathatan - double fatha)\n * - U+064C: ٌ (dammatan - double damma)\n * - U+064D: ٍ (kasratan - double kasra)\n * - U+064E: َ (fatha - short a)\n * - U+064F: ُ (damma - short u)\n * - U+0650: ِ (kasra - short i)\n * - U+0651: ّ (shadda - gemination)\n * - U+0652: ْ (sukun - no vowel)\n *\n * @internal\n */\nconst DIACRITICS_CLASS = '[\\u064B\\u064C\\u064D\\u064E\\u064F\\u0650\\u0651\\u0652]';\n\n/**\n * Groups of equivalent Arabic characters.\n *\n * Characters within the same group are considered equivalent for matching purposes.\n * This handles common variations in Arabic text where different characters are\n * used interchangeably or have the same underlying meaning.\n *\n * Equivalence groups:\n * - Alef variants: ا (bare), آ (with madda), أ (with hamza above), إ (with hamza below)\n * - Ta marbuta and Ha: ة ↔ ه (often interchangeable at word endings)\n * - Alef maqsura and Ya: ى ↔ ي (often interchangeable at word endings)\n *\n * @internal\n */\nconst EQUIV_GROUPS: string[][] = [\n ['\\u0627', '\\u0622', '\\u0623', '\\u0625'], // ا, آ, أ, إ\n ['\\u0629', '\\u0647'], // ة <-> ه\n ['\\u0649', '\\u064A'], // ى <-> ي\n];\n\n/**\n * Escapes a string for safe inclusion in a regular expression.\n *\n * Escapes all regex metacharacters: `.*+?^${}()|[\\]\\\\`\n *\n * @param s - Any string to escape\n * @returns String with regex metacharacters escaped\n *\n * @example\n * escapeRegex('hello.world') // → 'hello\\\\.world'\n * escapeRegex('[test]') // → '\\\\[test\\\\]'\n * escapeRegex('a+b*c?') // → 'a\\\\+b\\\\*c\\\\?'\n */\nexport const escapeRegex = (s: string): string => s.replace(/[.*+?^${}()|[\\]\\\\]/g, '\\\\$&');\n\n/**\n * Returns a regex character class for all equivalents of a given character.\n *\n * If the character belongs to one of the predefined equivalence groups\n * (e.g., ا/آ/أ/إ), the returned class will match any member of that group.\n * Otherwise, the original character is simply escaped for safe regex inclusion.\n *\n * @param ch - A single character to expand into its equivalence class\n * @returns A RegExp-safe string representing the character and its equivalents\n *\n * @example\n * getEquivClass('ا') // → '[اآأإ]' (matches any alef variant)\n * getEquivClass('ب') // → 'ب' (no equivalents, just escaped)\n * getEquivClass('.') // → '\\\\.' (regex metachar escaped)\n *\n * @internal\n */\nconst getEquivClass = (ch: string): string => {\n for (const group of EQUIV_GROUPS) {\n if (group.includes(ch)) {\n // join the group's members into a character class\n return `[${group.map((c) => escapeRegex(c)).join('')}]`;\n }\n }\n // not in equivalence groups -> return escaped character\n return escapeRegex(ch);\n};\n\n/**\n * Performs light normalization on Arabic text for consistent matching.\n *\n * Normalization steps:\n * 1. NFC normalization (canonical decomposition then composition)\n * 2. Remove Zero-Width Joiner (U+200D) and Zero-Width Non-Joiner (U+200C)\n * 3. Collapse multiple whitespace characters to single space\n * 4. Trim leading and trailing whitespace\n *\n * This normalization preserves diacritics and letter forms while removing\n * invisible characters that could interfere with matching.\n *\n * @param str - Arabic text to normalize\n * @returns Normalized string\n *\n * @example\n * normalizeArabicLight('حَدَّثَنَا') // → 'حَدَّثَنَا' (diacritics preserved)\n * normalizeArabicLight('بسم الله') // → 'بسم الله' (spaces collapsed)\n * normalizeArabicLight(' text ') // → 'text' (trimmed)\n *\n * @internal\n */\nconst normalizeArabicLight = (str: string) => {\n return str\n .normalize('NFC')\n .replace(/[\\u200C\\u200D]/g, '') // remove ZWJ/ZWNJ\n .replace(/\\s+/g, ' ')\n .trim();\n};\n\n/**\n * Creates a diacritic-insensitive regex pattern for Arabic text matching.\n *\n * Transforms input text into a regex pattern that matches the text regardless\n * of diacritical marks (harakat) and character variations. Each character in\n * the input is:\n * 1. Expanded to its equivalence class (if applicable)\n * 2. Followed by an optional diacritics matcher\n *\n * This allows matching:\n * - `حدثنا` with `حَدَّثَنَا` (with full diacritics)\n * - `الإيمان` with `الايمان` (alef variants)\n * - `صلاة` with `صلاه` (ta marbuta ↔ ha)\n *\n * @param text - Input Arabic text to make diacritic-insensitive\n * @returns Regex pattern string that matches the text with or without diacritics\n *\n * @example\n * const pattern = makeDiacriticInsensitive('حدثنا');\n * // Each char gets equivalence class + optional diacritics\n * // Result matches: حدثنا, حَدَّثَنَا, حَدَثَنَا, etc.\n *\n * @example\n * const pattern = makeDiacriticInsensitive('باب');\n * new RegExp(pattern, 'u').test('بَابٌ') // → true\n * new RegExp(pattern, 'u').test('باب') // → true\n *\n * @example\n * // Using with split rules\n * {\n * lineStartsWith: ['باب'],\n * split: 'at',\n * fuzzy: true // Applies makeDiacriticInsensitive internally\n * }\n */\nexport const makeDiacriticInsensitive = (text: string) => {\n const diacriticsMatcher = `${DIACRITICS_CLASS}*`;\n const norm = normalizeArabicLight(text);\n // Use Array.from to iterate grapheme-safe over the string (works fine for Arabic letters)\n return Array.from(norm)\n .map((ch) => getEquivClass(ch) + diacriticsMatcher)\n .join('');\n};\n","/**\n * Utility functions for breakpoint processing in the segmentation engine.\n *\n * These functions handle breakpoint normalization, page exclusion checking,\n * and segment creation. Extracted for independent testing and reuse.\n *\n * @module breakpoint-utils\n */\n\nimport type { Breakpoint, BreakpointRule, PageRange, Segment } from './types.js';\n\n/**\n * Normalizes a breakpoint to the object form.\n * Strings are converted to { pattern: str } with no constraints.\n *\n * @param bp - Breakpoint as string or object\n * @returns Normalized BreakpointRule object\n *\n * @example\n * normalizeBreakpoint('\\\\n\\\\n')\n * // → { pattern: '\\\\n\\\\n' }\n *\n * normalizeBreakpoint({ pattern: '\\\\n', min: 10 })\n * // → { pattern: '\\\\n', min: 10 }\n */\nexport const normalizeBreakpoint = (bp: Breakpoint): BreakpointRule => (typeof bp === 'string' ? { pattern: bp } : bp);\n\n/**\n * Checks if a page ID is in an excluded list (single pages or ranges).\n *\n * @param pageId - Page ID to check\n * @param excludeList - List of page IDs or [from, to] ranges to exclude\n * @returns True if page is excluded\n *\n * @example\n * isPageExcluded(5, [1, 5, 10])\n * // → true\n *\n * isPageExcluded(5, [[3, 7]])\n * // → true\n *\n * isPageExcluded(5, [[10, 20]])\n * // → false\n */\nexport const isPageExcluded = (pageId: number, excludeList: PageRange[] | undefined): boolean => {\n if (!excludeList || excludeList.length === 0) {\n return false;\n }\n for (const item of excludeList) {\n if (typeof item === 'number') {\n if (pageId === item) {\n return true;\n }\n } else {\n const [from, to] = item;\n if (pageId >= from && pageId <= to) {\n return true;\n }\n }\n }\n return false;\n};\n\n/**\n * Checks if a page ID is within a breakpoint's min/max range and not excluded.\n *\n * @param pageId - Page ID to check\n * @param rule - Breakpoint rule with optional min/max/exclude constraints\n * @returns True if page is within valid range\n *\n * @example\n * isInBreakpointRange(50, { pattern: '\\\\n', min: 10, max: 100 })\n * // → true\n *\n * isInBreakpointRange(5, { pattern: '\\\\n', min: 10 })\n * // → false (below min)\n */\nexport const isInBreakpointRange = (pageId: number, rule: BreakpointRule): boolean => {\n if (rule.min !== undefined && pageId < rule.min) {\n return false;\n }\n if (rule.max !== undefined && pageId > rule.max) {\n return false;\n }\n return !isPageExcluded(pageId, rule.exclude);\n};\n\n/**\n * Builds an exclude set from a PageRange array for O(1) lookups.\n *\n * @param excludeList - List of page IDs or [from, to] ranges\n * @returns Set of all excluded page IDs\n *\n * @remarks\n * This expands ranges into explicit page IDs for fast membership checks. For typical\n * book-scale inputs (thousands of pages), this is small and keeps downstream logic\n * simple and fast. If you expect extremely large ranges (e.g., millions of pages),\n * consider avoiding broad excludes or introducing a range-based membership structure.\n *\n * @example\n * buildExcludeSet([1, 5, [10, 12]])\n * // → Set { 1, 5, 10, 11, 12 }\n */\nexport const buildExcludeSet = (excludeList: PageRange[] | undefined): Set<number> => {\n const excludeSet = new Set<number>();\n for (const item of excludeList || []) {\n if (typeof item === 'number') {\n excludeSet.add(item);\n } else {\n for (let i = item[0]; i <= item[1]; i++) {\n excludeSet.add(i);\n }\n }\n }\n return excludeSet;\n};\n\n/**\n * Creates a segment with optional to and meta fields.\n * Returns null if content is empty after trimming.\n *\n * @param content - Segment content\n * @param fromPageId - Starting page ID\n * @param toPageId - Optional ending page ID (omitted if same as from)\n * @param meta - Optional metadata to attach\n * @returns Segment object or null if empty\n *\n * @example\n * createSegment('Hello world', 1, 3, { chapter: 1 })\n * // → { content: 'Hello world', from: 1, to: 3, meta: { chapter: 1 } }\n *\n * createSegment(' ', 1, undefined, undefined)\n * // → null (empty content)\n */\nexport const createSegment = (\n content: string,\n fromPageId: number,\n toPageId: number | undefined,\n meta: Record<string, unknown> | undefined,\n): Segment | null => {\n const trimmed = content.trim();\n if (!trimmed) {\n return null;\n }\n const seg: Segment = { content: trimmed, from: fromPageId };\n if (toPageId !== undefined && toPageId !== fromPageId) {\n seg.to = toPageId;\n }\n if (meta) {\n seg.meta = meta;\n }\n return seg;\n};\n\n/** Expanded breakpoint with pre-compiled regex and exclude set */\nexport type ExpandedBreakpoint = {\n rule: BreakpointRule;\n regex: RegExp | null;\n excludeSet: Set<number>;\n skipWhenRegex: RegExp | null;\n};\n\n/** Function type for pattern processing */\nexport type PatternProcessor = (pattern: string) => string;\n\n/**\n * Expands breakpoint patterns and pre-computes exclude sets.\n *\n * @param breakpoints - Array of breakpoint patterns or rules\n * @param processPattern - Function to expand tokens in patterns\n * @returns Array of expanded breakpoints with compiled regexes\n *\n * @remarks\n * This function compiles regex patterns dynamically. This can be a ReDoS vector\n * if patterns come from untrusted sources. In typical usage, breakpoint rules\n * are application configuration, not user input.\n */\nexport const expandBreakpoints = (breakpoints: Breakpoint[], processPattern: PatternProcessor): ExpandedBreakpoint[] =>\n breakpoints.map((bp) => {\n const rule = normalizeBreakpoint(bp);\n const excludeSet = buildExcludeSet(rule.exclude);\n const skipWhenRegex =\n rule.skipWhen !== undefined\n ? (() => {\n const expandedSkip = processPattern(rule.skipWhen);\n try {\n return new RegExp(expandedSkip, 'mu');\n } catch (error) {\n const message = error instanceof Error ? error.message : String(error);\n throw new Error(`Invalid breakpoint skipWhen regex: ${rule.skipWhen}\\n Cause: ${message}`);\n }\n })()\n : null;\n if (rule.pattern === '') {\n return { excludeSet, regex: null, rule, skipWhenRegex };\n }\n const expanded = processPattern(rule.pattern);\n try {\n return { excludeSet, regex: new RegExp(expanded, 'gmu'), rule, skipWhenRegex };\n } catch (error) {\n const message = error instanceof Error ? error.message : String(error);\n throw new Error(`Invalid breakpoint regex: ${rule.pattern}\\n Cause: ${message}`);\n }\n });\n\n/** Normalized page data for efficient lookups */\nexport type NormalizedPage = { content: string; length: number; index: number };\n\n/**\n * Finds the actual ending page index by searching backwards for page content prefix.\n * Used to determine which page a segment actually ends on based on content matching.\n *\n * @param pieceContent - Content of the segment piece\n * @param currentFromIdx - Current starting index in pageIds\n * @param toIdx - Maximum ending index to search\n * @param pageIds - Array of page IDs\n * @param normalizedPages - Map of page ID to normalized content\n * @returns The actual ending page index\n */\nexport const findActualEndPage = (\n pieceContent: string,\n currentFromIdx: number,\n toIdx: number,\n pageIds: number[],\n normalizedPages: Map<number, NormalizedPage>,\n): number => {\n for (let pi = toIdx; pi > currentFromIdx; pi--) {\n const pageData = normalizedPages.get(pageIds[pi]);\n if (pageData) {\n const checkPortion = pageData.content.slice(0, Math.min(30, pageData.length));\n if (checkPortion.length > 0 && pieceContent.indexOf(checkPortion) > 0) {\n return pi;\n }\n }\n }\n return currentFromIdx;\n};\n\n/**\n * Finds the actual starting page index by searching forwards for page content prefix.\n * Used to determine which page content actually starts from based on content matching.\n *\n * This is the counterpart to findActualEndPage - it searches forward to find which\n * page the content starts on, rather than which page it ends on.\n *\n * @param pieceContent - Content of the segment piece\n * @param currentFromIdx - Current starting index in pageIds\n * @param toIdx - Maximum ending index to search\n * @param pageIds - Array of page IDs\n * @param normalizedPages - Map of page ID to normalized content\n * @returns The actual starting page index\n */\nexport const findActualStartPage = (\n pieceContent: string,\n currentFromIdx: number,\n toIdx: number,\n pageIds: number[],\n normalizedPages: Map<number, NormalizedPage>,\n): number => {\n const trimmedPiece = pieceContent.trimStart();\n if (!trimmedPiece) {\n return currentFromIdx;\n }\n\n // Search forward from currentFromIdx to find which page the content starts on\n for (let pi = currentFromIdx; pi <= toIdx; pi++) {\n const pageData = normalizedPages.get(pageIds[pi]);\n if (pageData) {\n const pagePrefix = pageData.content.slice(0, Math.min(30, pageData.length)).trim();\n const piecePrefix = trimmedPiece.slice(0, Math.min(30, trimmedPiece.length));\n\n // Check both directions:\n // 1. pieceContent starts with page prefix (page content is longer)\n // 2. page content starts with pieceContent prefix (pieceContent is shorter)\n if (pagePrefix.length > 0) {\n if (trimmedPiece.startsWith(pagePrefix)) {\n return pi;\n }\n if (pageData.content.trimStart().startsWith(piecePrefix)) {\n return pi;\n }\n }\n }\n }\n return currentFromIdx;\n};\n\n/** Context required for finding break positions */\nexport type BreakpointContext = {\n pageIds: number[];\n normalizedPages: Map<number, NormalizedPage>;\n cumulativeOffsets: number[];\n expandedBreakpoints: ExpandedBreakpoint[];\n prefer: 'longer' | 'shorter';\n};\n\n/**\n * Checks if any page in a range is excluded by the given exclude set.\n *\n * @param excludeSet - Set of excluded page IDs\n * @param pageIds - Array of page IDs\n * @param fromIdx - Start index (inclusive)\n * @param toIdx - End index (inclusive)\n * @returns True if any page in range is excluded\n */\nexport const hasExcludedPageInRange = (\n excludeSet: Set<number>,\n pageIds: number[],\n fromIdx: number,\n toIdx: number,\n): boolean => {\n if (excludeSet.size === 0) {\n return false;\n }\n for (let pageIdx = fromIdx; pageIdx <= toIdx; pageIdx++) {\n if (excludeSet.has(pageIds[pageIdx])) {\n return true;\n }\n }\n return false;\n};\n\n/**\n * Finds the position of the next page content within remaining content.\n * Returns -1 if not found.\n *\n * @param remainingContent - Content to search in\n * @param nextPageData - Normalized data for the next page\n * @returns Position of next page content, or -1 if not found\n */\nexport const findNextPagePosition = (remainingContent: string, nextPageData: NormalizedPage): number => {\n const searchPrefix = nextPageData.content.trim().slice(0, Math.min(30, nextPageData.length));\n if (searchPrefix.length === 0) {\n return -1;\n }\n const pos = remainingContent.indexOf(searchPrefix);\n return pos > 0 ? pos : -1;\n};\n\n/**\n * Finds matches within a window and returns the selected position based on preference.\n *\n * @param windowContent - Content to search\n * @param regex - Regex to match\n * @param prefer - 'longer' for last match, 'shorter' for first match\n * @returns Break position after the selected match, or -1 if no matches\n */\nexport const findPatternBreakPosition = (\n windowContent: string,\n regex: RegExp,\n prefer: 'longer' | 'shorter',\n): number => {\n // OPTIMIZATION: Stream matches instead of collecting all into an array.\n // Only track first and last match to avoid allocating large arrays for dense patterns.\n let first: { index: number; length: number } | undefined;\n let last: { index: number; length: number } | undefined;\n for (const m of windowContent.matchAll(regex)) {\n const match = { index: m.index, length: m[0].length };\n if (!first) {\n first = match;\n }\n last = match;\n }\n if (!first) {\n return -1;\n }\n const selected = prefer === 'longer' ? last! : first;\n return selected.index + selected.length;\n};\n\n/**\n * Tries to find a break position within the current window using breakpoint patterns.\n * Returns the break position or -1 if no suitable break was found.\n *\n * @param remainingContent - Content remaining to be segmented\n * @param currentFromIdx - Current starting page index\n * @param toIdx - Ending page index\n * @param windowEndIdx - Maximum window end index\n * @param ctx - Breakpoint context with page data and patterns\n * @returns Break position in the content, or -1 if no break found\n */\nexport const findBreakPosition = (\n remainingContent: string,\n currentFromIdx: number,\n toIdx: number,\n windowEndIdx: number,\n ctx: BreakpointContext,\n): number => {\n const { pageIds, normalizedPages, cumulativeOffsets, expandedBreakpoints, prefer } = ctx;\n\n for (const { rule, regex, excludeSet, skipWhenRegex } of expandedBreakpoints) {\n // Check if this breakpoint applies to the current segment's starting page\n if (!isInBreakpointRange(pageIds[currentFromIdx], rule)) {\n continue;\n }\n\n // Check if ANY page in the current WINDOW is excluded (not the entire segment)\n if (hasExcludedPageInRange(excludeSet, pageIds, currentFromIdx, windowEndIdx)) {\n continue;\n }\n\n // Check if content matches skipWhen pattern (pre-compiled)\n if (skipWhenRegex?.test(remainingContent)) {\n continue;\n }\n\n // Handle page boundary (empty pattern)\n if (regex === null) {\n const nextPageIdx = windowEndIdx + 1;\n if (nextPageIdx <= toIdx) {\n const nextPageData = normalizedPages.get(pageIds[nextPageIdx]);\n if (nextPageData) {\n const pos = findNextPagePosition(remainingContent, nextPageData);\n if (pos > 0) {\n return pos;\n }\n }\n }\n // Fallback to cumulative offsets\n return Math.min(\n cumulativeOffsets[windowEndIdx + 1] - cumulativeOffsets[currentFromIdx],\n remainingContent.length,\n );\n }\n\n // Find matches within window\n const windowEndPosition = Math.min(\n cumulativeOffsets[windowEndIdx + 1] - cumulativeOffsets[currentFromIdx],\n remainingContent.length,\n );\n const windowContent = remainingContent.slice(0, windowEndPosition);\n const breakPos = findPatternBreakPosition(windowContent, regex, prefer);\n if (breakPos > 0) {\n return breakPos;\n }\n }\n\n return -1;\n};\n","/**\n * Utility functions for regex matching and result processing.\n *\n * These functions were extracted from `segmenter.ts` to reduce complexity\n * and enable independent testing. They handle match filtering, capture\n * extraction, and occurrence-based selection.\n *\n * @module match-utils\n */\n\nimport { isPageExcluded } from './breakpoint-utils.js';\nimport type { SplitRule } from './types.js';\n\n/**\n * Result of a regex match with position and optional capture information.\n *\n * Represents a single match found by the segmentation engine, including\n * its position in the concatenated content and any captured values.\n */\nexport type MatchResult = {\n /**\n * Start offset (inclusive) of the match in the content string.\n */\n start: number;\n\n /**\n * End offset (exclusive) of the match in the content string.\n *\n * The matched text is `content.slice(start, end)`.\n */\n end: number;\n\n /**\n * Content captured by `lineStartsAfter` patterns.\n *\n * For patterns like `^٦٦٩٦ - (.*)`, this contains the text\n * matched by the `(.*)` group (the rest of the line after the marker).\n */\n captured?: string;\n\n /**\n * Named capture group values from `{{token:name}}` syntax.\n *\n * Keys are the capture names, values are the matched strings.\n *\n * @example\n * // For pattern '{{raqms:num}} {{dash}}'\n * { num: '٦٦٩٦' }\n */\n namedCaptures?: Record<string, string>;\n};\n\n/**\n * Extracts named capture groups from a regex match.\n *\n * Only includes groups that are in the `captureNames` list and have\n * defined values. This filters out positional captures and ensures\n * only explicitly requested named captures are returned.\n *\n * @param groups - The `match.groups` object from `RegExp.exec()`\n * @param captureNames - List of capture names to extract (from `{{token:name}}` syntax)\n * @returns Object with capture name → value pairs, or `undefined` if none found\n *\n * @example\n * const match = /(?<num>[٠-٩]+) -/.exec('٦٦٩٦ - text');\n * extractNamedCaptures(match.groups, ['num'])\n * // → { num: '٦٦٩٦' }\n *\n * @example\n * // No matching captures\n * extractNamedCaptures({}, ['num'])\n * // → undefined\n *\n * @example\n * // Undefined groups\n * extractNamedCaptures(undefined, ['num'])\n * // → undefined\n */\nexport const extractNamedCaptures = (\n groups: Record<string, string> | undefined,\n captureNames: string[],\n): Record<string, string> | undefined => {\n if (!groups || captureNames.length === 0) {\n return undefined;\n }\n\n const namedCaptures: Record<string, string> = {};\n for (const name of captureNames) {\n if (groups[name] !== undefined) {\n namedCaptures[name] = groups[name];\n }\n }\n\n return Object.keys(namedCaptures).length > 0 ? namedCaptures : undefined;\n};\n\n/**\n * Gets the last defined positional capture group from a match array.\n *\n * Used for `lineStartsAfter` patterns where the content capture (`.*`)\n * is always at the end of the pattern. Named captures may shift the\n * positional indices, so we iterate backward to find the actual content.\n *\n * @param match - RegExp exec result array\n * @returns The last defined capture group value, or `undefined` if none\n *\n * @example\n * // Pattern: ^(?:(?<num>[٠-٩]+) - )(.*)\n * // Match array: ['٦٦٩٦ - content', '٦٦٩٦', 'content']\n * getLastPositionalCapture(match)\n * // → 'content'\n *\n * @example\n * // No captures\n * getLastPositionalCapture(['full match'])\n * // → undefined\n */\nexport const getLastPositionalCapture = (match: RegExpExecArray): string | undefined => {\n if (match.length <= 1) {\n return undefined;\n }\n\n for (let i = match.length - 1; i >= 1; i--) {\n if (match[i] !== undefined) {\n return match[i];\n }\n }\n return undefined;\n};\n\n/**\n * Filters matches to only include those within page ID constraints.\n *\n * Applies the `min`, `max`, and `exclude` constraints from a rule to filter out\n * matches that occur on pages outside the allowed range or explicitly excluded.\n *\n * @param matches - Array of match results to filter\n * @param rule - Rule containing `min`, `max`, and/or `exclude` page constraints\n * @param getId - Function that returns the page ID for a given offset\n * @returns Filtered array containing only matches within constraints\n *\n * @example\n * const matches = [\n * { start: 0, end: 10 }, // Page 1\n * { start: 100, end: 110 }, // Page 5\n * { start: 200, end: 210 }, // Page 10\n * ];\n * filterByConstraints(matches, { min: 3, max: 8 }, getId)\n * // → [{ start: 100, end: 110 }] (only page 5 match)\n */\nexport const filterByConstraints = (\n matches: MatchResult[],\n rule: Pick<SplitRule, 'min' | 'max' | 'exclude'>,\n getId: (offset: number) => number,\n): MatchResult[] => {\n return matches.filter((m) => {\n const id = getId(m.start);\n if (rule.min !== undefined && id < rule.min) {\n return false;\n }\n if (rule.max !== undefined && id > rule.max) {\n return false;\n }\n if (isPageExcluded(id, rule.exclude)) {\n return false;\n }\n return true;\n });\n};\n\n/**\n * Filters matches based on occurrence setting (first, last, or all).\n *\n * Applies occurrence-based selection to a list of matches:\n * - `'all'` or `undefined`: Return all matches (default)\n * - `'first'`: Return only the first match\n * - `'last'`: Return only the last match\n *\n * @param matches - Array of match results to filter\n * @param occurrence - Which occurrence(s) to keep\n * @returns Filtered array based on occurrence setting\n *\n * @example\n * const matches = [{ start: 0 }, { start: 10 }, { start: 20 }];\n *\n * filterByOccurrence(matches, 'first')\n * // → [{ start: 0 }]\n *\n * filterByOccurrence(matches, 'last')\n * // → [{ start: 20 }]\n *\n * filterByOccurrence(matches, 'all')\n * // → [{ start: 0 }, { start: 10 }, { start: 20 }]\n *\n * filterByOccurrence(matches, undefined)\n * // → [{ start: 0 }, { start: 10 }, { start: 20 }] (default: all)\n */\nexport const filterByOccurrence = (matches: MatchResult[], occurrence?: 'first' | 'last' | 'all'): MatchResult[] => {\n if (!matches.length) {\n return [];\n }\n if (occurrence === 'first') {\n return [matches[0]];\n }\n if (occurrence === 'last') {\n return [matches[matches.length - 1]];\n }\n return matches;\n};\n\n/**\n * Groups matches using a sliding window approach based on page ID difference.\n *\n * Uses a lookahead algorithm where `maxSpan` is the maximum page ID difference\n * allowed when looking ahead for the next split point. This prefers longer\n * segments by looking as far ahead as allowed before selecting a match.\n *\n * Algorithm:\n * 1. Start from the first page in the pages list\n * 2. Look for matches within `maxSpan` page IDs ahead\n * 3. Apply occurrence filter (e.g., 'last') to select a match\n * 4. If match found, add it; move window to start from the next page after the match\n * 5. If no match in window, skip to the next page and repeat\n *\n * @param matches - Array of match results (must be sorted by start position)\n * @param maxSpan - Maximum page ID difference allowed when looking ahead\n * @param occurrence - Which occurrence(s) to keep within each window\n * @param getId - Function that returns the page ID for a given offset\n * @param pageIds - Sorted array of all page IDs in the content\n * @returns Filtered array with sliding window and occurrence filter applied\n *\n * @example\n * // Pages: [1, 2, 3], maxSpan=1, occurrence='last'\n * // Window from page 1: pages 1-2 (diff <= 1)\n * // Finds last match in pages 1-2, adds it\n * // Next window from page 3: just page 3\n * // Result: segments span pages 1-2 and page 3\n */\nexport const groupBySpanAndFilter = (\n matches: MatchResult[],\n maxSpan: number,\n occurrence: 'first' | 'last' | 'all' | undefined,\n getId: (offset: number) => number,\n pageIds?: number[],\n): MatchResult[] => {\n if (!matches.length) {\n return [];\n }\n\n // Precompute pageId per match once to avoid O(P×M) behavior for large inputs.\n // Since match offsets are in concatenated page order, pageIds are expected to be non-decreasing.\n const matchPageIds = matches.map((m) => getId(m.start));\n\n // If no pageIds provided, fall back to unique page IDs from matches\n const uniquePageIds =\n pageIds ?? [...new Set(matchPageIds)].sort((a, b) => a - b);\n\n if (!uniquePageIds.length) {\n return filterByOccurrence(matches, occurrence);\n }\n\n const result: MatchResult[] = [];\n let windowStartIdx = 0; // Index into uniquePageIds\n let matchIdx = 0; // Index into matches/matchPageIds\n\n while (windowStartIdx < uniquePageIds.length) {\n const windowStartPageId = uniquePageIds[windowStartIdx];\n const windowEndPageId = windowStartPageId + maxSpan;\n\n // Advance matchIdx to first match in or after the window start page.\n while (matchIdx < matches.length && matchPageIds[matchIdx] < windowStartPageId) {\n matchIdx++;\n }\n\n // No remaining matches anywhere\n if (matchIdx >= matches.length) {\n break;\n }\n\n // Find range of matches that fall within [windowStartPageId, windowEndPageId]\n const windowMatchStart = matchIdx;\n let windowMatchEndExclusive = windowMatchStart;\n while (windowMatchEndExclusive < matches.length && matchPageIds[windowMatchEndExclusive] <= windowEndPageId) {\n windowMatchEndExclusive++;\n }\n\n if (windowMatchEndExclusive <= windowMatchStart) {\n // No matches in this window, move to next page\n windowStartIdx++;\n continue;\n }\n\n // Apply occurrence selection without allocating/filtering per window.\n let selectedStart = windowMatchStart;\n let selectedEndExclusive = windowMatchEndExclusive;\n if (occurrence === 'first') {\n selectedEndExclusive = selectedStart + 1;\n } else if (occurrence === 'last') {\n selectedStart = windowMatchEndExclusive - 1;\n }\n\n for (let i = selectedStart; i < selectedEndExclusive; i++) {\n result.push(matches[i]);\n }\n\n const lastSelectedIndex = selectedEndExclusive - 1;\n const lastMatchPageId = matchPageIds[lastSelectedIndex];\n\n // Move window to start after the last selected match's page\n while (windowStartIdx < uniquePageIds.length && uniquePageIds[windowStartIdx] <= lastMatchPageId) {\n windowStartIdx++;\n }\n\n // Matches before this index can never be selected again (windowStartPageId only increases)\n matchIdx = lastSelectedIndex + 1;\n }\n\n return result;\n};\n\n/**\n * Checks if any rule in the list allows the given page ID.\n *\n * A rule allows an ID if it falls within the rule's `min`/`max` constraints.\n * Rules without constraints allow all page IDs.\n *\n * This is used to determine whether to create a segment for content\n * that appears before any split points (the \"first segment\").\n *\n * @param rules - Array of rules with optional `min` and `max` constraints\n * @param pageId - Page ID to check\n * @returns `true` if at least one rule allows the page ID\n *\n * @example\n * const rules = [\n * { min: 5, max: 10 }, // Allows pages 5-10\n * { min: 20 }, // Allows pages 20+\n * ];\n *\n * anyRuleAllowsId(rules, 7) // → true (first rule allows)\n * anyRuleAllowsId(rules, 3) // → false (no rule allows)\n * anyRuleAllowsId(rules, 25) // → true (second rule allows)\n *\n * @example\n * // Rules without constraints allow everything\n * anyRuleAllowsId([{}], 999) // → true\n */\nexport const anyRuleAllowsId = (rules: Pick<SplitRule, 'min' | 'max'>[], pageId: number): boolean => {\n return rules.some((r) => {\n const minOk = r.min === undefined || pageId >= r.min;\n const maxOk = r.max === undefined || pageId <= r.max;\n return minOk && maxOk;\n });\n};\n","/**\n * Strip all HTML tags from content, keeping only text.\n *\n * @param html - HTML content\n * @returns Plain text content\n */\nexport const stripHtmlTags = (html: string): string => {\n return html.replace(/<[^>]*>/g, '');\n};\n\n/**\n * Normalizes line endings to Unix-style (`\\n`).\n *\n * Converts Windows (`\\r\\n`) and old Mac (`\\r`) line endings to Unix style\n * for consistent pattern matching across platforms.\n *\n * @param content - Raw content with potentially mixed line endings\n * @returns Content with all line endings normalized to `\\n`\n */\n// OPTIMIZATION: Fast-path when no \\r present (common case for Unix/Mac content)\nexport const normalizeLineEndings = (content: string) =>\n content.includes('\\r') ? content.replace(/\\r\\n?/g, '\\n') : content;\n","/**\n * Token-based template system for Arabic text pattern matching.\n *\n * This module provides a human-readable way to define regex patterns using\n * `{{token}}` placeholders that expand to their regex equivalents. It supports\n * named capture groups for extracting matched values into metadata.\n *\n * @module tokens\n *\n * @example\n * // Simple token expansion\n * expandTokens('{{raqms}} {{dash}}')\n * // → '[\\\\u0660-\\\\u0669]+ [-–—ـ]'\n *\n * @example\n * // Named capture groups\n * expandTokensWithCaptures('{{raqms:num}} {{dash}}')\n * // → { pattern: '(?<num>[\\\\u0660-\\\\u0669]+) [-–—ـ]', captureNames: ['num'], hasCaptures: true }\n */\n\n/**\n * Token definitions mapping human-readable token names to regex patterns.\n *\n * Tokens are used in template strings with double-brace syntax:\n * - `{{token}}` - Expands to the pattern (non-capturing in context)\n * - `{{token:name}}` - Expands to a named capture group `(?<name>pattern)`\n * - `{{:name}}` - Captures any content with the given name `(?<name>.+)`\n *\n * @remarks\n * These patterns are designed for Arabic text matching. For diacritic-insensitive\n * matching of Arabic patterns, use the `fuzzy: true` option in split rules,\n * which applies `makeDiacriticInsensitive()` to the expanded patterns.\n *\n * @example\n * // Using tokens in a split rule\n * { lineStartsWith: ['{{kitab}}', '{{bab}}'], split: 'at', fuzzy: true }\n *\n * @example\n * // Using tokens with named captures\n * { lineStartsAfter: ['{{raqms:hadithNum}} {{dash}} '], split: 'at' }\n */\n// ─────────────────────────────────────────────────────────────\n// Auto-escaping for template patterns\n// ─────────────────────────────────────────────────────────────\n\n/**\n * Escapes regex metacharacters (parentheses and brackets) in template patterns,\n * but preserves content inside `{{...}}` token delimiters.\n *\n * This allows users to write intuitive patterns like `({{harf}}):` instead of\n * the verbose `\\\\({{harf}}\\\\):`. The escaping is applied BEFORE token expansion,\n * so tokens like `{{harf}}` which expand to `[أ-ي]` work correctly.\n *\n * @param pattern - Template pattern that may contain `()[]` and `{{tokens}}`\n * @returns Pattern with `()[]` escaped outside of `{{...}}` delimiters\n *\n * @example\n * escapeTemplateBrackets('({{harf}}): ')\n * // → '\\\\({{harf}}\\\\): '\n *\n * @example\n * escapeTemplateBrackets('[{{raqm}}] ')\n * // → '\\\\[{{raqm}}\\\\] '\n *\n * @example\n * escapeTemplateBrackets('{{harf}}')\n * // → '{{harf}}' (unchanged - no brackets outside tokens)\n */\nexport const escapeTemplateBrackets = (pattern: string): string => {\n // Match either a token ({{...}}) or a bracket character\n // Tokens are preserved as-is, brackets are escaped\n return pattern.replace(/(\\{\\{[^}]*\\}\\})|([()[\\]])/g, (match, token, bracket) => {\n if (token) {\n return token; // Leave tokens intact\n }\n return `\\\\${bracket}`; // Escape the bracket\n });\n};\n\n// ─────────────────────────────────────────────────────────────\n// Base tokens - raw regex patterns (no template references)\n// ─────────────────────────────────────────────────────────────\n\n/**\n * Base token definitions mapping human-readable token names to regex patterns.\n *\n * These tokens contain raw regex patterns and do not reference other tokens.\n * For composite tokens that build on these, see `COMPOSITE_TOKENS`.\n *\n * @internal\n */\nconst BASE_TOKENS: Record<string, string> = {\n /**\n * Chapter marker - Arabic word for \"chapter\" (باب).\n *\n * Commonly used in hadith collections to mark chapter divisions.\n *\n * @example 'باب ما جاء في الصلاة' (Chapter on what came regarding prayer)\n */\n bab: 'باب',\n\n /**\n * Basmala pattern - Arabic invocation \"In the name of Allah\" (بسم الله).\n *\n * Matches the beginning of the basmala formula, commonly appearing\n * at the start of chapters, books, or documents.\n *\n * @example 'بسم الله الرحمن الرحيم' (In the name of Allah, the Most Gracious, the Most Merciful)\n */\n basmalah: 'بسم الله|﷽',\n\n /**\n * Bullet point variants - common bullet characters.\n *\n * Character class matching: `•` (bullet), `*` (asterisk), `°` (degree).\n *\n * @example '• First item'\n */\n bullet: '[•*°]',\n\n /**\n * Dash variants - various dash and separator characters.\n *\n * Character class matching:\n * - `-` (hyphen-minus U+002D)\n * - `–` (en-dash U+2013)\n * - `—` (em-dash U+2014)\n * - `ـ` (tatweel U+0640, Arabic elongation character)\n *\n * @example '٦٦٩٦ - حدثنا' or '٦٦٩٦ ـ حدثنا'\n */\n dash: '[-–—ـ]',\n\n /**\n * Section marker - Arabic word for \"section/issue\".\n * Commonly used for fiqh books.\n */\n fasl: 'فصل|مسألة',\n\n /**\n * Single Arabic letter - matches any Arabic letter character.\n *\n * Character range from أ (alef with hamza) to ي (ya).\n * Does NOT include diacritics (harakat/tashkeel).\n *\n * @example '{{harf}}' matches 'ب' in 'باب'\n */\n harf: '[أ-ي]',\n\n /**\n * Book marker - Arabic word for \"book\" (كتاب).\n *\n * Commonly used in hadith collections to mark major book divisions.\n *\n * @example 'كتاب الإيمان' (Book of Faith)\n */\n kitab: 'كتاب',\n\n /**\n * Naql (transmission) phrases - common hadith transmission phrases.\n *\n * Alternation of Arabic phrases used to indicate narration chains:\n * - حدثنا (he narrated to us)\n * - أخبرنا (he informed us)\n * - حدثني (he narrated to me)\n * - وحدثنا (and he narrated to us)\n * - أنبأنا (he reported to us)\n * - سمعت (I heard)\n *\n * @example '{{naql}}' matches any of the above phrases\n */\n naql: 'حدثنا|أخبرنا|حدثني|وحدثنا|أنبأنا|سمعت',\n\n /**\n * Single Arabic-Indic digit - matches one digit (٠-٩).\n *\n * Unicode range: U+0660 to U+0669 (Arabic-Indic digits).\n * Use `{{raqms}}` for one or more digits.\n *\n * @example '{{raqm}}' matches '٥' in '٥ - '\n */\n raqm: '[\\\\u0660-\\\\u0669]',\n\n /**\n * One or more Arabic-Indic digits - matches digit sequences (٠-٩)+.\n *\n * Unicode range: U+0660 to U+0669 (Arabic-Indic digits).\n * Commonly used for hadith numbers, verse numbers, etc.\n *\n * @example '{{raqms}}' matches '٦٦٩٦' in '٦٦٩٦ - حدثنا'\n */\n raqms: '[\\\\u0660-\\\\u0669]+',\n\n /**\n * Punctuation characters.\n * Use {{tarqim}} which is especially useful when splitting using split: 'after' on punctuation marks.\n */\n tarqim: '[.!?؟؛]',\n};\n\n// ─────────────────────────────────────────────────────────────\n// Composite tokens - templates that reference base tokens\n// These are pre-expanded at module load time for performance\n// ─────────────────────────────────────────────────────────────\n\n/**\n * Composite token definitions using template syntax.\n *\n * These tokens reference base tokens using `{{token}}` syntax and are\n * automatically expanded to their final regex patterns at module load time.\n *\n * This provides better abstraction - if base tokens change, composites\n * automatically update on the next build.\n *\n * @internal\n */\nconst COMPOSITE_TOKENS: Record<string, string> = {\n /**\n * Numbered hadith marker - common format for hadith numbering.\n *\n * Matches patterns like \"٢٢ - \" (number, space, dash, space).\n * This is the most common format in hadith collections.\n *\n * Use with `lineStartsAfter` to cleanly extract hadith content:\n * ```typescript\n * { lineStartsAfter: ['{{numbered}}'], split: 'at' }\n * ```\n *\n * For capturing the hadith number, use explicit capture syntax:\n * ```typescript\n * { lineStartsAfter: ['{{raqms:hadithNum}} {{dash}} '], split: 'at' }\n * ```\n *\n * @example '٢٢ - حدثنا' matches, content starts after '٢٢ - '\n * @example '٦٦٩٦ – أخبرنا' matches (with en-dash)\n */\n numbered: '{{raqms}} {{dash}} ',\n};\n\n/**\n * Expands base tokens in a template string.\n * Used internally to pre-expand composite tokens.\n *\n * @param template - Template string with `{{token}}` placeholders\n * @returns Expanded pattern with base tokens replaced\n * @internal\n */\nconst expandBaseTokens = (template: string): string => {\n return template.replace(/\\{\\{(\\w+)\\}\\}/g, (_, tokenName) => {\n return BASE_TOKENS[tokenName] ?? `{{${tokenName}}}`;\n });\n};\n\n/**\n * Token definitions mapping human-readable token names to regex patterns.\n *\n * Tokens are used in template strings with double-brace syntax:\n * - `{{token}}` - Expands to the pattern (non-capturing in context)\n * - `{{token:name}}` - Expands to a named capture group `(?<name>pattern)`\n * - `{{:name}}` - Captures any content with the given name `(?<name>.+)`\n *\n * @remarks\n * These patterns are designed for Arabic text matching. For diacritic-insensitive\n * matching of Arabic patterns, use the `fuzzy: true` option in split rules,\n * which applies `makeDiacriticInsensitive()` to the expanded patterns.\n *\n * @example\n * // Using tokens in a split rule\n * { lineStartsWith: ['{{kitab}}', '{{bab}}'], split: 'at', fuzzy: true }\n *\n * @example\n * // Using tokens with named captures\n * { lineStartsAfter: ['{{raqms:hadithNum}} {{dash}} '], split: 'at' }\n *\n * @example\n * // Using the numbered convenience token\n * { lineStartsAfter: ['{{numbered}}'], split: 'at' }\n */\nexport const TOKEN_PATTERNS: Record<string, string> = {\n ...BASE_TOKENS,\n // Pre-expand composite tokens at module load time\n ...Object.fromEntries(Object.entries(COMPOSITE_TOKENS).map(([k, v]) => [k, expandBaseTokens(v)])),\n};\n\n/**\n * Regex pattern for matching tokens with optional named capture syntax.\n *\n * Matches:\n * - `{{token}}` - Simple token (group 1 = token name, group 2 = empty)\n * - `{{token:name}}` - Token with capture (group 1 = token, group 2 = name)\n * - `{{:name}}` - Capture-only (group 1 = empty, group 2 = name)\n *\n * @internal\n */\nconst TOKEN_WITH_CAPTURE_REGEX = /\\{\\{(\\w*):?(\\w*)\\}\\}/g;\n\n/**\n * Regex pattern for simple token matching (no capture syntax).\n *\n * Matches only `{{token}}` format where token is one or more word characters.\n * Used by `containsTokens()` for quick detection.\n *\n * @internal\n */\nconst SIMPLE_TOKEN_REGEX = /\\{\\{(\\w+)\\}\\}/g;\n\n/**\n * Checks if a query string contains template tokens.\n *\n * Performs a quick test for `{{token}}` patterns without actually\n * expanding them. Useful for determining whether to apply token\n * expansion to a string.\n *\n * @param query - String to check for tokens\n * @returns `true` if the string contains at least one `{{token}}` pattern\n *\n * @example\n * containsTokens('{{raqms}} {{dash}}') // → true\n * containsTokens('plain text') // → false\n * containsTokens('[٠-٩]+ - ') // → false (raw regex, no tokens)\n */\nexport const containsTokens = (query: string): boolean => {\n SIMPLE_TOKEN_REGEX.lastIndex = 0;\n return SIMPLE_TOKEN_REGEX.test(query);\n};\n\n/**\n * Result from expanding tokens with capture information.\n *\n * Contains the expanded pattern string along with metadata about\n * any named capture groups that were created.\n */\nexport type ExpandResult = {\n /**\n * The expanded regex pattern string with all tokens replaced.\n *\n * Named captures use the `(?<name>pattern)` syntax.\n */\n pattern: string;\n\n /**\n * Names of captured groups extracted from `{{token:name}}` syntax.\n *\n * Empty array if no named captures were found.\n */\n captureNames: string[];\n\n /**\n * Whether the pattern has any named capturing groups.\n *\n * Equivalent to `captureNames.length > 0`.\n */\n hasCaptures: boolean;\n};\n\n/**\n * Expands template tokens with support for named captures.\n *\n * This is the primary token expansion function that handles all token syntax:\n * - `{{token}}` → Expands to the token's pattern (no capture group)\n * - `{{token:name}}` → Expands to `(?<name>pattern)` (named capture)\n * - `{{:name}}` → Expands to `(?<name>.+)` (capture anything)\n *\n * Unknown tokens are left as-is in the output, allowing for partial templates.\n *\n * @param query - The template string containing tokens\n * @param fuzzyTransform - Optional function to transform Arabic text for fuzzy matching.\n * Applied to both token patterns and plain Arabic text between tokens.\n * Typically `makeDiacriticInsensitive` from the fuzzy module.\n * @returns Object with expanded pattern, capture names, and capture flag\n *\n * @example\n * // Simple token expansion\n * expandTokensWithCaptures('{{raqms}} {{dash}}')\n * // → { pattern: '[\\\\u0660-\\\\u0669]+ [-–—ـ]', captureNames: [], hasCaptures: false }\n *\n * @example\n * // Named capture\n * expandTokensWithCaptures('{{raqms:num}} {{dash}}')\n * // → { pattern: '(?<num>[\\\\u0660-\\\\u0669]+) [-–—ـ]', captureNames: ['num'], hasCaptures: true }\n *\n * @example\n * // Capture-only token\n * expandTokensWithCaptures('{{raqms:num}} {{dash}} {{:content}}')\n * // → { pattern: '(?<num>[٠-٩]+) [-–—ـ] (?<content>.+)', captureNames: ['num', 'content'], hasCaptures: true }\n *\n * @example\n * // With fuzzy transform\n * expandTokensWithCaptures('{{bab}}', makeDiacriticInsensitive)\n * // → { pattern: 'بَ?ا?بٌ?', captureNames: [], hasCaptures: false }\n */\nexport const expandTokensWithCaptures = (query: string, fuzzyTransform?: (pattern: string) => string): ExpandResult => {\n const captureNames: string[] = [];\n\n // Split the query into token matches and non-token segments\n const segments: Array<{ type: 'token' | 'text'; value: string }> = [];\n let lastIndex = 0;\n TOKEN_WITH_CAPTURE_REGEX.lastIndex = 0;\n let match: RegExpExecArray | null;\n\n // biome-ignore lint/suspicious/noAssignInExpressions: standard regex exec loop pattern\n while ((match = TOKEN_WITH_CAPTURE_REGEX.exec(query)) !== null) {\n // Add text before this token\n if (match.index > lastIndex) {\n segments.push({ type: 'text', value: query.slice(lastIndex, match.index) });\n }\n // Add the token\n segments.push({ type: 'token', value: match[0] });\n lastIndex = match.index + match[0].length;\n }\n // Add remaining text after last token\n if (lastIndex < query.length) {\n segments.push({ type: 'text', value: query.slice(lastIndex) });\n }\n\n // Process each segment\n const processedParts = segments.map((segment) => {\n if (segment.type === 'text') {\n // Plain text - apply fuzzy if it contains Arabic and fuzzyTransform is provided\n if (fuzzyTransform && /[\\u0600-\\u06FF]/.test(segment.value)) {\n return fuzzyTransform(segment.value);\n }\n return segment.value;\n }\n\n // Token - extract tokenName and captureName\n TOKEN_WITH_CAPTURE_REGEX.lastIndex = 0;\n const tokenMatch = TOKEN_WITH_CAPTURE_REGEX.exec(segment.value);\n if (!tokenMatch) {\n return segment.value;\n }\n\n const [, tokenName, captureName] = tokenMatch;\n\n // {{:name}} - capture anything with name\n if (!tokenName && captureName) {\n captureNames.push(captureName);\n return `(?<${captureName}>.+)`;\n }\n\n // Get the token pattern\n let tokenPattern = TOKEN_PATTERNS[tokenName];\n if (!tokenPattern) {\n // Unknown token - leave as-is\n return segment.value;\n }\n\n // Apply fuzzy transform to the token pattern\n if (fuzzyTransform) {\n // For tokens with alternation, apply fuzzy to each alternative\n tokenPattern = tokenPattern\n .split('|')\n .map((part) => (/[\\u0600-\\u06FF]/.test(part) ? fuzzyTransform(part) : part))\n .join('|');\n }\n\n // {{token:name}} - capture with name\n if (captureName) {\n captureNames.push(captureName);\n return `(?<${captureName}>${tokenPattern})`;\n }\n\n // {{token}} - no capture, just expand\n return tokenPattern;\n });\n\n return {\n captureNames,\n hasCaptures: captureNames.length > 0,\n pattern: processedParts.join(''),\n };\n};\n\n/**\n * Expands template tokens in a query string to their regex equivalents.\n *\n * This is the simple version without capture support. It returns only the\n * expanded pattern string, not capture metadata.\n *\n * Unknown tokens are left as-is, allowing for partial templates.\n *\n * @param query - Template string containing `{{token}}` placeholders\n * @returns Expanded regex pattern string\n *\n * @example\n * expandTokens('، {{raqms}}') // → '، [\\\\u0660-\\\\u0669]+'\n * expandTokens('{{raqm}}*') // → '[\\\\u0660-\\\\u0669]*'\n * expandTokens('{{dash}}{{raqm}}') // → '[-–—ـ][\\\\u0660-\\\\u0669]'\n * expandTokens('{{unknown}}') // → '{{unknown}}' (left as-is)\n *\n * @see expandTokensWithCaptures for full capture group support\n */\nexport const expandTokens = (query: string): string => expandTokensWithCaptures(query).pattern;\n\n/**\n * Converts a template string to a compiled RegExp.\n *\n * Expands all tokens and attempts to compile the result as a RegExp\n * with Unicode flag. Returns `null` if the resulting pattern is invalid.\n *\n * @remarks\n * This function dynamically compiles regular expressions from template strings.\n * If templates may come from untrusted sources, be aware of potential ReDoS\n * (Regular Expression Denial of Service) risks due to catastrophic backtracking.\n * Consider validating pattern complexity or applying execution timeouts when\n * running user-submitted patterns.\n *\n * @param template - Template string containing `{{token}}` placeholders\n * @returns Compiled RegExp with 'u' flag, or `null` if invalid\n *\n * @example\n * templateToRegex('، {{raqms}}') // → /، [٠-٩]+/u\n * templateToRegex('{{raqms}}+') // → /[٠-٩]++/u (might be invalid in some engines)\n * templateToRegex('(((') // → null (invalid regex)\n */\nexport const templateToRegex = (template: string): RegExp | null => {\n const expanded = expandTokens(template);\n try {\n return new RegExp(expanded, 'u');\n } catch {\n return null;\n }\n};\n\n/**\n * Lists all available token names defined in `TOKEN_PATTERNS`.\n *\n * Useful for documentation, validation, or building user interfaces\n * that show available tokens.\n *\n * @returns Array of token names (e.g., `['bab', 'basmala', 'bullet', ...]`)\n *\n * @example\n * getAvailableTokens()\n * // → ['bab', 'basmala', 'bullet', 'dash', 'harf', 'kitab', 'naql', 'raqm', 'raqms']\n */\nexport const getAvailableTokens = (): string[] => Object.keys(TOKEN_PATTERNS);\n\n/**\n * Gets the regex pattern for a specific token name.\n *\n * Returns the raw pattern string as defined in `TOKEN_PATTERNS`,\n * without any expansion or capture group wrapping.\n *\n * @param tokenName - The token name to look up (e.g., 'raqms', 'dash')\n * @returns The regex pattern string, or `undefined` if token doesn't exist\n *\n * @example\n * getTokenPattern('raqms') // → '[\\\\u0660-\\\\u0669]+'\n * getTokenPattern('dash') // → '[-–—ـ]'\n * getTokenPattern('unknown') // → undefined\n */\nexport const getTokenPattern = (tokenName: string): string | undefined => TOKEN_PATTERNS[tokenName];\n","/**\n * Core segmentation engine for splitting Arabic text pages into logical segments.\n *\n * The segmenter takes an array of pages and applies pattern-based rules to\n * identify split points, producing segments with content, page references,\n * and optional metadata.\n *\n * @module segmenter\n */\n\nimport {\n type BreakpointContext,\n createSegment,\n expandBreakpoints,\n findActualEndPage,\n findActualStartPage,\n findBreakPosition,\n hasExcludedPageInRange,\n type NormalizedPage,\n} from './breakpoint-utils.js';\nimport { makeDiacriticInsensitive } from './fuzzy.js';\nimport {\n anyRuleAllowsId,\n extractNamedCaptures,\n filterByConstraints,\n filterByOccurrence,\n getLastPositionalCapture,\n type MatchResult,\n} from './match-utils.js';\nimport { normalizeLineEndings } from './textUtils.js';\nimport { escapeTemplateBrackets, expandTokensWithCaptures } from './tokens.js';\nimport type { Breakpoint, Logger, Page, Segment, SegmentationOptions, SplitRule } from './types.js';\n\n/**\n * Checks if a regex pattern contains standard (anonymous) capturing groups.\n *\n * Detects standard capturing groups `(...)` while excluding:\n * - Non-capturing groups `(?:...)`\n * - Lookahead assertions `(?=...)` and `(?!...)`\n * - Lookbehind assertions `(?<=...)` and `(?<!...)`\n * - Named groups `(?<name>...)` (start with `(?` so excluded here)\n *\n * **Note**: Named capture groups `(?<name>...)` ARE capturing groups but are\n * excluded by this check because they are tracked separately via the\n * `captureNames` array from token expansion. This function only detects\n * anonymous capturing groups like `(.*)`.\n *\n * @param pattern - Regex pattern string to analyze\n * @returns `true` if the pattern contains at least one anonymous capturing group\n */\nconst hasCapturingGroup = (pattern: string): boolean => {\n // Match ( that is NOT followed by ? (excludes non-capturing and named groups)\n return /\\((?!\\?)/.test(pattern);\n};\n\n/**\n * Result of processing a pattern with token expansion and optional fuzzy matching.\n */\ntype ProcessedPattern = {\n /** The expanded regex pattern string (tokens replaced with regex) */\n pattern: string;\n /** Names of captured groups extracted from `{{token:name}}` syntax */\n captureNames: string[];\n};\n\n/**\n * Processes a pattern string by expanding tokens and optionally applying fuzzy matching.\n *\n * Fuzzy matching makes Arabic text diacritic-insensitive. When enabled, the\n * transform is applied to token patterns BEFORE wrapping with capture groups,\n * ensuring regex metacharacters (`(`, `)`, `|`, etc.) are not corrupted.\n *\n * @param pattern - Pattern string potentially containing `{{token}}` placeholders\n * @param fuzzy - Whether to apply diacritic-insensitive transformation\n * @returns Processed pattern with expanded tokens and capture names\n *\n * @example\n * processPattern('{{raqms:num}} {{dash}}', false)\n * // → { pattern: '(?<num>[٠-٩]+) [-–—ـ]', captureNames: ['num'] }\n *\n * @example\n * processPattern('{{naql}}', true)\n * // → { pattern: 'حَ?دَّ?ثَ?نَ?ا|...', captureNames: [] }\n */\nconst processPattern = (pattern: string, fuzzy: boolean): ProcessedPattern => {\n // First escape brackets ()[] outside of {{tokens}} - allows intuitive patterns like ({{harf}}):\n const escaped = escapeTemplateBrackets(pattern);\n // Pass fuzzy transform to expandTokensWithCaptures so it can apply to raw token patterns\n const fuzzyTransform = fuzzy ? makeDiacriticInsensitive : undefined;\n const { pattern: expanded, captureNames } = expandTokensWithCaptures(escaped, fuzzyTransform);\n return { captureNames, pattern: expanded };\n};\n\n/**\n * Compiled regex and metadata for a split rule.\n */\ntype RuleRegex = {\n /** Compiled RegExp with 'gmu' flags (global, multiline, unicode) */\n regex: RegExp;\n /** Whether the regex uses capturing groups for content extraction */\n usesCapture: boolean;\n /** Names of captured groups from `{{token:name}}` syntax */\n captureNames: string[];\n /** Whether this rule uses `lineStartsAfter` (content capture at end) */\n usesLineStartsAfter: boolean;\n};\n\n/**\n * Builds a compiled regex and metadata from a split rule.\n *\n * Handles all pattern types:\n * - `regex`: Used as-is (no token expansion)\n * - `template`: Tokens expanded via `expandTokensWithCaptures`\n * - `lineStartsWith`: Converted to `^(?:patterns...)`\n * - `lineStartsAfter`: Converted to `^(?:patterns...)(.*)`\n * - `lineEndsWith`: Converted to `(?:patterns...)$`\n *\n * @param rule - Split rule containing pattern and options\n * @returns Compiled regex with capture metadata\n */\nconst buildRuleRegex = (rule: SplitRule): RuleRegex => {\n const s: {\n lineStartsWith?: string[];\n lineStartsAfter?: string[];\n lineEndsWith?: string[];\n template?: string;\n regex?: string;\n } = { ...rule };\n\n const fuzzy = (rule as { fuzzy?: boolean }).fuzzy ?? false;\n let allCaptureNames: string[] = [];\n\n /**\n * Safely compiles a regex pattern, throwing a helpful error if invalid.\n *\n * @remarks\n * This catches syntax errors only. It does NOT protect against ReDoS\n * (catastrophic backtracking) from pathological patterns. Avoid compiling\n * patterns from untrusted sources.\n */\n const compileRegex = (pattern: string): RegExp => {\n try {\n return new RegExp(pattern, 'gmu');\n } catch (error) {\n const message = error instanceof Error ? error.message : String(error);\n throw new Error(`Invalid regex pattern: ${pattern}\\n Cause: ${message}`);\n }\n };\n\n // lineStartsAfter: creates a capturing group to exclude the marker from content\n if (s.lineStartsAfter?.length) {\n const processed = s.lineStartsAfter.map((p) => processPattern(p, fuzzy));\n const patterns = processed.map((p) => p.pattern).join('|');\n allCaptureNames = processed.flatMap((p) => p.captureNames);\n // Wrap patterns with named captures in a non-capturing group, then capture rest\n s.regex = `^(?:${patterns})(.*)`;\n return {\n captureNames: allCaptureNames,\n regex: compileRegex(s.regex),\n usesCapture: true,\n usesLineStartsAfter: true,\n };\n }\n\n if (s.lineStartsWith?.length) {\n const processed = s.lineStartsWith.map((p) => processPattern(p, fuzzy));\n const patterns = processed.map((p) => p.pattern).join('|');\n allCaptureNames = processed.flatMap((p) => p.captureNames);\n s.regex = `^(?:${patterns})`;\n }\n if (s.lineEndsWith?.length) {\n const processed = s.lineEndsWith.map((p) => processPattern(p, fuzzy));\n const patterns = processed.map((p) => p.pattern).join('|');\n allCaptureNames = processed.flatMap((p) => p.captureNames);\n s.regex = `(?:${patterns})$`;\n }\n if (s.template) {\n // Template from user: first escape brackets, then expand tokens with captures\n const escaped = escapeTemplateBrackets(s.template);\n const { pattern, captureNames } = expandTokensWithCaptures(escaped);\n s.regex = pattern;\n allCaptureNames = [...allCaptureNames, ...captureNames];\n }\n\n if (!s.regex) {\n throw new Error(\n 'Rule must specify exactly one pattern type: regex, template, lineStartsWith, lineStartsAfter, or lineEndsWith',\n );\n }\n\n const usesCapture = hasCapturingGroup(s.regex) || allCaptureNames.length > 0;\n return {\n captureNames: allCaptureNames,\n regex: compileRegex(s.regex),\n usesCapture,\n usesLineStartsAfter: false,\n };\n};\n\n/**\n * Represents the byte offset boundaries of a single page within concatenated content.\n */\ntype PageBoundary = {\n /** Start offset (inclusive) in the concatenated content string */\n start: number;\n /** End offset (inclusive) in the concatenated content string */\n end: number;\n /** Page ID from the original `Page` */\n id: number;\n};\n\n/**\n * Page mapping utilities for tracking positions across concatenated pages.\n */\ntype PageMap = {\n /**\n * Returns the page ID for a given offset in the concatenated content.\n *\n * @param offset - Character offset in concatenated content\n * @returns Page ID containing that offset\n */\n getId: (offset: number) => number;\n /** Array of page boundaries in order */\n boundaries: PageBoundary[];\n /** Sorted array of offsets where page breaks occur (for binary search) */\n pageBreaks: number[];\n /** Array of all page IDs in order (for sliding window algorithm) */\n pageIds: number[];\n};\n\n/**\n * Builds a concatenated content string and page mapping from input pages.\n *\n * Pages are joined with newline characters, and a page map is created to\n * track which page each offset belongs to. This allows pattern matching\n * across page boundaries while preserving page reference information.\n *\n * @param pages - Array of input pages with id and content\n * @returns Concatenated content string and page mapping utilities\n *\n * @example\n * const pages = [\n * { id: 1, content: 'Page 1 text' },\n * { id: 2, content: 'Page 2 text' }\n * ];\n * const { content, pageMap } = buildPageMap(pages);\n * // content = 'Page 1 text\\nPage 2 text'\n * // pageMap.getId(0) = 1\n * // pageMap.getId(12) = 2\n */\nconst buildPageMap = (pages: Page[]): { content: string; normalizedPages: string[]; pageMap: PageMap } => {\n const boundaries: PageBoundary[] = [];\n const pageBreaks: number[] = []; // Sorted array for binary search\n let offset = 0;\n const parts: string[] = [];\n\n for (let i = 0; i < pages.length; i++) {\n const normalized = normalizeLineEndings(pages[i].content);\n boundaries.push({ end: offset + normalized.length, id: pages[i].id, start: offset });\n parts.push(normalized);\n if (i < pages.length - 1) {\n pageBreaks.push(offset + normalized.length); // Already in sorted order\n offset += normalized.length + 1;\n } else {\n offset += normalized.length;\n }\n }\n\n /**\n * Finds the page boundary containing the given offset using binary search.\n * O(log n) complexity for efficient lookup with many pages.\n *\n * @param off - Character offset to look up\n * @returns Page boundary or the last boundary as fallback\n */\n const findBoundary = (off: number): PageBoundary | undefined => {\n let lo = 0;\n let hi = boundaries.length - 1;\n\n while (lo <= hi) {\n const mid = (lo + hi) >>> 1; // Unsigned right shift for floor division\n const b = boundaries[mid];\n if (off < b.start) {\n hi = mid - 1;\n } else if (off > b.end) {\n lo = mid + 1;\n } else {\n return b;\n }\n }\n // Fallback to last boundary if not found\n return boundaries[boundaries.length - 1];\n };\n\n return {\n content: parts.join('\\n'),\n normalizedPages: parts, // OPTIMIZATION: Return already-normalized content for reuse\n pageMap: {\n boundaries,\n getId: (off: number) => findBoundary(off)?.id ?? 0,\n pageBreaks,\n pageIds: boundaries.map((b) => b.id),\n },\n };\n};\n\n/**\n * Represents a position where content should be split, with associated metadata.\n */\ntype SplitPoint = {\n /** Character index in the concatenated content where the split occurs */\n index: number;\n /** Static metadata from the matched rule */\n meta?: Record<string, unknown>;\n /** Content captured by regex patterns with capturing groups */\n capturedContent?: string;\n /** Named captures from `{{token:name}}` patterns */\n namedCaptures?: Record<string, string>;\n /**\n * Offset from index where content actually starts (for lineStartsAfter).\n * If set, the segment content starts at `index + contentStartOffset`.\n * This allows excluding the marker from content while keeping the split index\n * at the match start so previous segment doesn't include the marker.\n */\n contentStartOffset?: number;\n};\n\n/**\n * Executes a regex against content and extracts match results with capture information.\n *\n * @param content - Full content string to search\n * @param regex - Compiled regex with 'g' flag\n * @param usesCapture - Whether to extract captured content\n * @param captureNames - Names of expected named capture groups\n * @returns Array of match results with positions and captures\n */\nconst findMatches = (content: string, regex: RegExp, usesCapture: boolean, captureNames: string[]) => {\n const matches: MatchResult[] = [];\n regex.lastIndex = 0;\n let m = regex.exec(content);\n\n while (m !== null) {\n const result: MatchResult = { end: m.index + m[0].length, start: m.index };\n\n // Extract named captures if present\n result.namedCaptures = extractNamedCaptures(m.groups, captureNames);\n\n // For lineStartsAfter, get the last positional capture (the .* content)\n if (usesCapture) {\n result.captured = getLastPositionalCapture(m);\n }\n\n matches.push(result);\n\n if (m[0].length === 0) {\n regex.lastIndex++;\n }\n m = regex.exec(content);\n }\n\n return matches;\n};\n\n/**\n * Finds page breaks within a given offset range using binary search.\n * O(log n + k) where n = total breaks, k = breaks in range.\n *\n * @param startOffset - Start of range (inclusive)\n * @param endOffset - End of range (exclusive)\n * @param sortedBreaks - Sorted array of page break offsets\n * @returns Array of break offsets relative to startOffset\n */\nconst findBreaksInRange = (startOffset: number, endOffset: number, sortedBreaks: number[]) => {\n if (sortedBreaks.length === 0) {\n return [];\n }\n\n // Binary search for first break >= startOffset\n let lo = 0;\n let hi = sortedBreaks.length;\n while (lo < hi) {\n const mid = (lo + hi) >>> 1;\n if (sortedBreaks[mid] < startOffset) {\n lo = mid + 1;\n } else {\n hi = mid;\n }\n }\n\n // Collect breaks until we exceed endOffset\n const result: number[] = [];\n for (let i = lo; i < sortedBreaks.length && sortedBreaks[i] < endOffset; i++) {\n result.push(sortedBreaks[i] - startOffset);\n }\n return result;\n};\n\n/**\n * Converts page-break newlines to spaces in segment content.\n *\n * When a segment spans multiple pages, the newline characters that were\n * inserted as page separators during concatenation are converted to spaces\n * for more natural reading.\n *\n * Uses binary search for O(log n + k) lookup instead of O(n) iteration.\n *\n * @param content - Segment content string\n * @param startOffset - Starting offset of this content in concatenated string\n * @param pageBreaks - Sorted array of page break offsets\n * @returns Content with page-break newlines converted to spaces\n */\nconst convertPageBreaks = (content: string, startOffset: number, pageBreaks: number[]): string => {\n // OPTIMIZATION: Fast-path for empty or no-newline content (common cases)\n if (!content || !content.includes('\\n')) {\n return content;\n }\n\n const endOffset = startOffset + content.length;\n const breaksInRange = findBreaksInRange(startOffset, endOffset, pageBreaks);\n\n // No page breaks in this segment - return as-is (most common case)\n if (breaksInRange.length === 0) {\n return content;\n }\n\n // Convert ONLY page-break newlines (the ones inserted during concatenation) to spaces.\n //\n // NOTE: Offsets from findBreaksInRange are string indices (code units). Using Array.from()\n // would index by Unicode code points and can desync indices if surrogate pairs appear.\n const breakSet = new Set(breaksInRange);\n return content.replace(/\\n/g, (match, offset: number) => (breakSet.has(offset) ? ' ' : match));\n};\n\n/**\n * Applies breakpoints to oversized segments.\n *\n * For each segment that spans more than maxPages, tries the breakpoint patterns\n * in order to find a suitable split point. Structural markers (from rules) are\n * always respected - segments are only broken within their boundaries.\n *\n * @param segments - Initial segments from rule processing\n * @param pages - Original pages for page lookup\n * @param maxPages - Maximum pages before breakpoints apply\n * @param breakpoints - Patterns to try in order (tokens supported)\n * @param prefer - 'longer' for last match, 'shorter' for first match\n * @returns Processed segments with oversized ones broken up\n */\nconst applyBreakpoints = (\n segments: Segment[],\n pages: Page[],\n normalizedContent: string[], // OPTIMIZATION: Pre-normalized content from buildPageMap\n maxPages: number,\n breakpoints: Breakpoint[],\n prefer: 'longer' | 'shorter',\n logger?: Logger,\n): Segment[] => {\n const findExclusionBreakPosition = (\n currentFromIdx: number,\n windowEndIdx: number,\n toIdx: number,\n pageIds: number[],\n expandedBreakpoints: Array<{ excludeSet: Set<number> }>,\n cumulativeOffsets: number[],\n ): number => {\n const startingPageId = pageIds[currentFromIdx];\n const startingPageExcluded = expandedBreakpoints.some((bp) => bp.excludeSet.has(startingPageId));\n if (startingPageExcluded && currentFromIdx < toIdx) {\n // Output just this one page as a segment (break at next page boundary)\n return cumulativeOffsets[currentFromIdx + 1] - cumulativeOffsets[currentFromIdx];\n }\n\n // Find the first excluded page AFTER the starting page (within window) and split BEFORE it\n for (let pageIdx = currentFromIdx + 1; pageIdx <= windowEndIdx; pageIdx++) {\n const pageId = pageIds[pageIdx];\n const isExcluded = expandedBreakpoints.some((bp) => bp.excludeSet.has(pageId));\n if (isExcluded) {\n return cumulativeOffsets[pageIdx] - cumulativeOffsets[currentFromIdx];\n }\n }\n return -1;\n };\n\n // Get page IDs in order\n const pageIds = pages.map((p) => p.id);\n\n // OPTIMIZATION: Build pageId to index Map for O(1) lookups instead of O(P) indexOf\n const pageIdToIndex = new Map(pageIds.map((id, i) => [id, i]));\n\n // OPTIMIZATION: Build normalized pages Map from pre-normalized content\n const normalizedPages = new Map<number, NormalizedPage>();\n for (let i = 0; i < pages.length; i++) {\n const content = normalizedContent[i];\n normalizedPages.set(pages[i].id, { content, index: i, length: content.length });\n }\n\n // OPTIMIZATION: Pre-compute cumulative offsets for O(1) window size calculation\n const cumulativeOffsets: number[] = [0];\n let totalOffset = 0;\n for (let i = 0; i < pageIds.length; i++) {\n const pageData = normalizedPages.get(pageIds[i]);\n totalOffset += pageData ? pageData.length : 0;\n if (i < pageIds.length - 1) {\n totalOffset += 1; // separator between pages\n }\n cumulativeOffsets.push(totalOffset);\n }\n\n // Use extracted helper to expand breakpoints\n // Create pattern processor function for breakpoint-utils\n const patternProcessor = (p: string) => processPattern(p, false).pattern;\n const expandedBreakpoints = expandBreakpoints(breakpoints, patternProcessor);\n\n const result: Segment[] = [];\n\n logger?.info?.('Starting breakpoint processing', { maxPages, segmentCount: segments.length });\n\n for (const segment of segments) {\n const fromIdx = pageIdToIndex.get(segment.from) ?? -1;\n const toIdx = segment.to !== undefined ? (pageIdToIndex.get(segment.to) ?? fromIdx) : fromIdx;\n\n logger?.debug?.('Processing segment', {\n contentLength: segment.content.length,\n contentPreview: segment.content.slice(0, 100),\n from: segment.from,\n fromIdx,\n to: segment.to,\n toIdx,\n });\n\n // Calculate span using actual page IDs (not array indices)\n const segmentSpan = (segment.to ?? segment.from) - segment.from;\n // If segment span is within limit AND no pages are excluded, keep as-is\n // Check if any page in this segment is excluded by any breakpoint\n const hasExclusions = expandedBreakpoints.some((bp) =>\n hasExcludedPageInRange(bp.excludeSet, pageIds, fromIdx, toIdx),\n );\n\n if (segmentSpan <= maxPages && !hasExclusions) {\n logger?.trace?.('Segment within limit, keeping as-is');\n\n result.push(segment);\n continue;\n }\n\n logger?.debug?.('Segment exceeds limit or has exclusions, breaking it up');\n\n // Rebuild content for this segment from individual pages\n // We need to work with the actual page content, not the merged segment content\n\n // Process this segment, potentially breaking it into multiple\n let remainingContent = segment.content;\n let currentFromIdx = fromIdx;\n let isFirstPiece = true;\n let iterationCount = 0;\n const maxIterations = 10000; // Safety limit\n\n while (currentFromIdx <= toIdx) {\n iterationCount++;\n if (iterationCount > maxIterations) {\n logger?.error?.('INFINITE LOOP DETECTED! Breaking out, you should report this bug', {\n iterationCount: maxIterations,\n });\n logger?.error?.('Loop state', {\n currentFromIdx,\n remainingContentLength: remainingContent.length,\n toIdx,\n });\n break;\n }\n\n // Calculate remaining span using actual page IDs (not array indices)\n const remainingSpan = pageIds[toIdx] - pageIds[currentFromIdx];\n\n logger?.trace?.('Loop iteration', {\n currentFromIdx,\n currentPageId: pageIds[currentFromIdx],\n iterationCount,\n remainingContentLength: remainingContent.length,\n remainingContentPreview: remainingContent.slice(0, 80),\n remainingSpan,\n toIdx,\n toPageId: pageIds[toIdx],\n });\n\n // Check if any page in remaining segment is excluded\n const remainingHasExclusions = expandedBreakpoints.some((bp) =>\n hasExcludedPageInRange(bp.excludeSet, pageIds, currentFromIdx, toIdx),\n );\n\n // If remaining span is within limit AND no exclusions, output and done\n if (remainingSpan <= maxPages && !remainingHasExclusions) {\n logger?.debug?.('Remaining span within limit, outputting final segment');\n\n const finalSeg = createSegment(\n remainingContent,\n pageIds[currentFromIdx],\n currentFromIdx !== toIdx ? pageIds[toIdx] : undefined,\n isFirstPiece ? segment.meta : undefined,\n );\n if (finalSeg) {\n result.push(finalSeg);\n }\n break;\n }\n\n // Need to break within maxPages window (based on page IDs, not indices)\n // Find the last page index where pageId <= currentPageId + maxPages\n const currentPageId = pageIds[currentFromIdx];\n const maxWindowPageId = currentPageId + maxPages;\n let windowEndIdx = currentFromIdx;\n for (let i = currentFromIdx; i <= toIdx; i++) {\n if (pageIds[i] <= maxWindowPageId) {\n windowEndIdx = i;\n } else {\n break;\n }\n }\n\n logger?.trace?.('Window calculation', {\n currentPageId,\n maxWindowPageId,\n windowEndIdx,\n windowEndPageId: pageIds[windowEndIdx],\n });\n\n // Special case: if we have exclusions IN THE CURRENT WINDOW, handle them\n // Check if any page in the WINDOW (not entire segment) is excluded\n const windowHasExclusions = expandedBreakpoints.some((bp) =>\n hasExcludedPageInRange(bp.excludeSet, pageIds, currentFromIdx, windowEndIdx),\n );\n\n let breakPosition = -1;\n\n if (windowHasExclusions) {\n logger?.trace?.('Window has exclusions, finding exclusion break position');\n\n breakPosition = findExclusionBreakPosition(\n currentFromIdx,\n windowEndIdx,\n toIdx,\n pageIds,\n expandedBreakpoints,\n cumulativeOffsets,\n );\n\n logger?.trace?.('Exclusion break position', { breakPosition });\n }\n\n // If no exclusion-based split found, use normal breakpoint finding\n if (breakPosition <= 0) {\n // Use extracted helper to find break position\n const breakpointCtx: BreakpointContext = {\n cumulativeOffsets,\n expandedBreakpoints,\n normalizedPages,\n pageIds,\n prefer,\n };\n\n logger?.trace?.('Finding break position using patterns...');\n\n breakPosition = findBreakPosition(remainingContent, currentFromIdx, toIdx, windowEndIdx, breakpointCtx);\n\n logger?.trace?.('Pattern break position', { breakPosition });\n }\n\n if (breakPosition <= 0) {\n logger?.debug?.('No pattern matched, falling back to page boundary');\n\n // No pattern matched - fallback to page boundary split\n // If only one page in window, output it and continue to next page\n if (windowEndIdx === currentFromIdx) {\n logger?.trace?.('Single page window, outputting page and advancing');\n\n // Output this single page as a segment\n const pageContent =\n cumulativeOffsets[currentFromIdx + 1] !== undefined\n ? remainingContent.slice(\n 0,\n cumulativeOffsets[currentFromIdx + 1] - cumulativeOffsets[currentFromIdx],\n )\n : remainingContent;\n const pageSeg = createSegment(\n pageContent.trim(),\n pageIds[currentFromIdx],\n undefined,\n isFirstPiece ? segment.meta : undefined,\n );\n if (pageSeg) {\n result.push(pageSeg);\n }\n // Move to next page\n remainingContent = remainingContent.slice(pageContent.length).trim();\n currentFromIdx++;\n isFirstPiece = false;\n\n logger?.trace?.('After single page', {\n currentFromIdx,\n remainingContentLength: remainingContent.length,\n });\n\n continue;\n }\n // Multi-page window with no pattern match - output entire window and continue\n breakPosition = cumulativeOffsets[windowEndIdx + 1] - cumulativeOffsets[currentFromIdx];\n logger?.trace?.('Multi-page window, using full window break position', { breakPosition });\n }\n\n const pieceContent = remainingContent.slice(0, breakPosition).trim();\n\n logger?.trace?.('Piece extracted', {\n breakPosition,\n pieceContentLength: pieceContent.length,\n pieceContentPreview: pieceContent.slice(0, 80),\n });\n\n // Find the actual starting and ending pages for this piece content\n // currentFromIdx might not be the actual starting page if content was split across pages\n const actualStartIdx = pieceContent\n ? findActualStartPage(pieceContent, currentFromIdx, toIdx, pageIds, normalizedPages)\n : currentFromIdx;\n const actualEndIdx = pieceContent\n ? findActualEndPage(pieceContent, actualStartIdx, windowEndIdx, pageIds, normalizedPages)\n : currentFromIdx;\n\n logger?.trace?.('Actual page indices', {\n actualEndIdx,\n actualStartIdx,\n pieceHasContent: !!pieceContent,\n });\n\n if (pieceContent) {\n const pieceSeg = createSegment(\n pieceContent,\n pageIds[actualStartIdx],\n actualEndIdx > actualStartIdx ? pageIds[actualEndIdx] : undefined,\n isFirstPiece ? segment.meta : undefined,\n );\n if (pieceSeg) {\n result.push(pieceSeg);\n\n logger?.debug?.('Created segment', {\n contentLength: pieceSeg.content.length,\n from: pieceSeg.from,\n to: pieceSeg.to,\n });\n }\n }\n\n // Update for next iteration\n const prevRemainingLength = remainingContent.length;\n remainingContent = remainingContent.slice(breakPosition).trim();\n\n logger?.trace?.('After slicing remainingContent', {\n newLength: remainingContent.length,\n prevLength: prevRemainingLength,\n slicedAmount: breakPosition,\n });\n\n // If no remaining content, we're done with this segment\n if (!remainingContent) {\n logger?.debug?.('No remaining content, breaking out of loop');\n break;\n }\n\n // Find which page the remaining content actually starts on\n // The next piece starts from actualEndIdx OR the next page if the break was at a page boundary\n let nextFromIdx = actualEndIdx;\n\n // Check if remaining content starts with content from the next page\n if (remainingContent && actualEndIdx + 1 <= toIdx) {\n const nextPageData = normalizedPages.get(pageIds[actualEndIdx + 1]);\n if (nextPageData) {\n const nextPrefix = nextPageData.content.slice(0, Math.min(30, nextPageData.length));\n if (nextPrefix && remainingContent.startsWith(nextPrefix)) {\n nextFromIdx = actualEndIdx + 1;\n logger?.trace?.('Content starts with next page prefix', { advancingTo: nextFromIdx });\n }\n }\n }\n\n logger?.trace?.('End of iteration', {\n nextFromIdx,\n prevCurrentFromIdx: currentFromIdx,\n willAdvance: nextFromIdx !== currentFromIdx,\n });\n\n currentFromIdx = nextFromIdx;\n isFirstPiece = false;\n }\n }\n\n logger?.info?.('Breakpoint processing completed', { resultCount: result.length });\n\n return result;\n};\n\n/**\n * Segments pages of content based on pattern-matching rules.\n *\n * This is the main entry point for the segmentation engine. It takes an array\n * of pages and applies the provided rules to identify split points, producing\n * an array of segments with content, page references, and metadata.\n *\n * @param pages - Array of pages with id and content\n * @param options - Segmentation options including splitting rules\n * @returns Array of segments with content, from/to page references, and optional metadata\n *\n * @example\n * // Split markdown by headers\n * const segments = segmentPages(pages, {\n * rules: [\n * { lineStartsWith: ['## '], split: 'at', meta: { type: 'chapter' } }\n * ]\n * });\n *\n * @example\n * // Split Arabic hadith text with number extraction\n * const segments = segmentPages(pages, {\n * rules: [\n * {\n * lineStartsAfter: ['{{raqms:hadithNum}} {{dash}} '],\n * split: 'at',\n * fuzzy: true,\n * meta: { type: 'hadith' }\n * }\n * ]\n * });\n *\n * @example\n * // Multiple rules with page constraints\n * const segments = segmentPages(pages, {\n * rules: [\n * { lineStartsWith: ['{{kitab}}'], split: 'at', meta: { type: 'book' } },\n * { lineStartsWith: ['{{bab}}'], split: 'at', min: 10, meta: { type: 'chapter' } },\n * { regex: '^[٠-٩]+ - ', split: 'at', meta: { type: 'hadith' } }\n * ]\n * });\n */\nexport const segmentPages = (pages: Page[], options: SegmentationOptions): Segment[] => {\n const { rules = [], maxPages, breakpoints, prefer = 'longer', logger } = options;\n if (!pages.length) {\n return [];\n }\n\n const { content: matchContent, normalizedPages: normalizedContent, pageMap } = buildPageMap(pages);\n const splitPoints: SplitPoint[] = [];\n\n // Process rules to find structural split points\n for (const rule of rules) {\n const { regex, usesCapture, captureNames, usesLineStartsAfter } = buildRuleRegex(rule);\n const allMatches = findMatches(matchContent, regex, usesCapture, captureNames);\n\n // Filter matches by page ID constraints\n const constrainedMatches = filterByConstraints(allMatches, rule, pageMap.getId);\n\n // Apply occurrence filtering (global)\n const finalMatches = filterByOccurrence(constrainedMatches, rule.occurrence);\n\n for (const m of finalMatches) {\n // For lineStartsAfter: we want to exclude the marker from content.\n // - Split at m.start so previous segment doesn't include the marker\n // - Set contentStartOffset to skip the marker when slicing this segment\n const isLineStartsAfter = usesLineStartsAfter && m.captured !== undefined;\n const markerLength = isLineStartsAfter ? m.end - m.captured!.length - m.start : 0;\n\n splitPoints.push({\n // lineStartsAfter: DON'T use capturedContent, let normal slicing extend to next split\n capturedContent: isLineStartsAfter ? undefined : m.captured,\n // lineStartsAfter: skip the marker when slicing content\n contentStartOffset: isLineStartsAfter ? markerLength : undefined,\n index: rule.split === 'at' ? m.start : m.end,\n meta: rule.meta,\n namedCaptures: m.namedCaptures,\n });\n }\n }\n\n // Deduplicate split points by index, preferring ones with more information\n // (contentStartOffset or meta over plain splits)\n const byIndex = new Map<number, SplitPoint>();\n for (const p of splitPoints) {\n const existing = byIndex.get(p.index);\n if (!existing) {\n byIndex.set(p.index, p);\n } else {\n // Prefer split with contentStartOffset (for lineStartsAfter stripping)\n // or with meta over one without\n const hasMoreInfo =\n (p.contentStartOffset !== undefined && existing.contentStartOffset === undefined) ||\n (p.meta !== undefined && existing.meta === undefined);\n if (hasMoreInfo) {\n byIndex.set(p.index, p);\n }\n }\n }\n const unique = [...byIndex.values()];\n unique.sort((a, b) => a.index - b.index);\n\n // Build initial segments from structural rules\n let segments = buildSegments(unique, matchContent, pageMap, rules);\n\n // Handle case where no rules or no split points - create one segment from all content\n // This allows breakpoints to still process the content\n if (segments.length === 0 && pages.length > 0) {\n const firstPage = pages[0];\n const lastPage = pages[pages.length - 1];\n // OPTIMIZATION: Reuse pre-normalized content from buildPageMap instead of re-normalizing\n const allContent = normalizedContent.join('\\n');\n const initialSeg: Segment = {\n content: allContent.trim(),\n from: firstPage.id,\n };\n if (lastPage.id !== firstPage.id) {\n initialSeg.to = lastPage.id;\n }\n if (initialSeg.content) {\n segments = [initialSeg];\n }\n }\n\n // Apply breakpoints post-processing for oversized segments\n if (maxPages !== undefined && maxPages >= 0 && breakpoints?.length) {\n return applyBreakpoints(segments, pages, normalizedContent, maxPages, breakpoints, prefer, logger);\n }\n\n return segments;\n};\n\n/**\n * Creates segment objects from split points.\n *\n * Handles segment creation including:\n * - Content extraction (with captured content for `lineStartsAfter`)\n * - Page break conversion to spaces\n * - From/to page reference calculation\n * - Metadata merging (static + named captures)\n *\n * @param splitPoints - Sorted, unique split points\n * @param content - Full concatenated content string\n * @param pageMap - Page mapping utilities\n * @param rules - Original rules (for constraint checking on first segment)\n * @returns Array of segment objects\n */\nconst buildSegments = (splitPoints: SplitPoint[], content: string, pageMap: PageMap, rules: SplitRule[]): Segment[] => {\n /**\n * Creates a single segment from a content range.\n */\n const createSegment = (\n start: number,\n end: number,\n meta?: Record<string, unknown>,\n capturedContent?: string,\n namedCaptures?: Record<string, string>,\n contentStartOffset?: number,\n ): Segment | null => {\n // For lineStartsAfter, skip the marker by using contentStartOffset\n const actualStart = start + (contentStartOffset ?? 0);\n // For lineStartsAfter (contentStartOffset set), trim leading whitespace after marker\n // For other rules, only trim trailing whitespace to preserve intentional leading spaces\n const sliced = content.slice(actualStart, end);\n let text = capturedContent?.trim() ?? (contentStartOffset ? sliced.trim() : sliced.replace(/[\\s\\n]+$/, ''));\n if (!text) {\n return null;\n }\n if (!capturedContent) {\n text = convertPageBreaks(text, actualStart, pageMap.pageBreaks);\n }\n const from = pageMap.getId(actualStart);\n const to = capturedContent ? pageMap.getId(end - 1) : pageMap.getId(actualStart + text.length - 1);\n const seg: Segment = { content: text, from };\n if (to !== from) {\n seg.to = to;\n }\n if (meta || namedCaptures) {\n seg.meta = { ...meta, ...namedCaptures };\n }\n return seg;\n };\n\n /**\n * Creates segments from an array of split points.\n */\n const createSegmentsFromSplitPoints = (): Segment[] => {\n const result: Segment[] = [];\n for (let i = 0; i < splitPoints.length; i++) {\n const sp = splitPoints[i];\n const end = i < splitPoints.length - 1 ? splitPoints[i + 1].index : content.length;\n const s = createSegment(\n sp.index,\n end,\n sp.meta,\n sp.capturedContent,\n sp.namedCaptures,\n sp.contentStartOffset,\n );\n if (s) {\n result.push(s);\n }\n }\n return result;\n };\n\n const segments: Segment[] = [];\n\n // Handle case with no split points\n if (!splitPoints.length) {\n const firstId = pageMap.getId(0);\n if (anyRuleAllowsId(rules, firstId)) {\n const s = createSegment(0, content.length);\n if (s) {\n segments.push(s);\n }\n }\n return segments;\n }\n\n // Add first segment if there's content before first split\n if (splitPoints[0].index > 0) {\n const firstId = pageMap.getId(0);\n if (anyRuleAllowsId(rules, firstId)) {\n const s = createSegment(0, splitPoints[0].index);\n if (s) {\n segments.push(s);\n }\n }\n }\n\n // Create segments from split points using extracted utility\n return [...segments, ...createSegmentsFromSplitPoints()];\n};\n","/**\n * Pattern detection utilities for recognizing template tokens in Arabic text.\n * Used to auto-detect patterns from user-highlighted text in the segmentation dialog.\n *\n * @module pattern-detection\n */\n\nimport { getAvailableTokens, TOKEN_PATTERNS } from './segmentation/tokens.js';\n\n/**\n * Result of detecting a token pattern in text\n */\nexport type DetectedPattern = {\n /** Token name from TOKEN_PATTERNS (e.g., 'raqms', 'dash') */\n token: string;\n /** The matched text */\n match: string;\n /** Start index in the original text */\n index: number;\n /** End index (exclusive) */\n endIndex: number;\n};\n\n/**\n * Token detection order - more specific patterns first to avoid partial matches.\n * Example: 'raqms' before 'raqm' so \"٣٤\" matches 'raqms' not just the first digit.\n *\n * Tokens not in this list are appended in alphabetical order from TOKEN_PATTERNS.\n */\nconst TOKEN_PRIORITY_ORDER: string[] = [\n 'basmalah', // Most specific - full phrase\n 'kitab',\n 'bab',\n 'fasl',\n 'naql',\n 'numbered', // Composite: raqms + dash\n 'raqms', // Multiple digits before single digit\n 'raqm',\n 'tarqim',\n 'bullet',\n 'dash',\n 'harf',\n];\n\n/**\n * Gets the token detection priority order.\n * Returns tokens in priority order, with any TOKEN_PATTERNS not in the priority list appended.\n */\nconst getTokenPriority = (): string[] => {\n const allTokens = getAvailableTokens();\n const prioritized = TOKEN_PRIORITY_ORDER.filter((t) => allTokens.includes(t));\n const remaining = allTokens.filter((t) => !TOKEN_PRIORITY_ORDER.includes(t)).sort();\n return [...prioritized, ...remaining];\n};\n\n/**\n * Analyzes text and returns all detected token patterns with their positions.\n * Patterns are detected in priority order to avoid partial matches.\n *\n * @param text - The text to analyze for token patterns\n * @returns Array of detected patterns sorted by position\n *\n * @example\n * detectTokenPatterns(\"٣٤ - حدثنا\")\n * // Returns: [\n * // { token: 'raqms', match: '٣٤', index: 0, endIndex: 2 },\n * // { token: 'dash', match: '-', index: 3, endIndex: 4 },\n * // { token: 'naql', match: 'حدثنا', index: 5, endIndex: 10 }\n * // ]\n */\nexport const detectTokenPatterns = (text: string): DetectedPattern[] => {\n if (!text) {\n return [];\n }\n\n const results: DetectedPattern[] = [];\n const coveredRanges: Array<[number, number]> = [];\n\n // Check if a position is already covered by a detected pattern\n const isPositionCovered = (start: number, end: number): boolean => {\n return coveredRanges.some(\n ([s, e]) => (start >= s && start < e) || (end > s && end <= e) || (start <= s && end >= e),\n );\n };\n\n // Process tokens in priority order\n for (const tokenName of getTokenPriority()) {\n const pattern = TOKEN_PATTERNS[tokenName];\n if (!pattern) {\n continue;\n }\n\n try {\n // Create a global regex to find all matches\n const regex = new RegExp(`(${pattern})`, 'gu');\n let match: RegExpExecArray | null;\n\n // biome-ignore lint/suspicious/noAssignInExpressions: standard regex exec loop pattern\n while ((match = regex.exec(text)) !== null) {\n const startIndex = match.index;\n const endIndex = startIndex + match[0].length;\n\n // Skip if this range overlaps with an already detected pattern\n if (isPositionCovered(startIndex, endIndex)) {\n continue;\n }\n\n results.push({ endIndex, index: startIndex, match: match[0], token: tokenName });\n\n coveredRanges.push([startIndex, endIndex]);\n }\n } catch {}\n }\n\n return results.sort((a, b) => a.index - b.index);\n};\n\n/**\n * Generates a template pattern from text using detected tokens.\n * Replaces matched portions with {{token}} syntax.\n *\n * @param text - Original text\n * @param detected - Array of detected patterns from detectTokenPatterns\n * @returns Template string with tokens, e.g., \"{{raqms}} {{dash}} \"\n *\n * @example\n * const detected = detectTokenPatterns(\"٣٤ - \");\n * generateTemplateFromText(\"٣٤ - \", detected);\n * // Returns: \"{{raqms}} {{dash}} \"\n */\nexport const generateTemplateFromText = (text: string, detected: DetectedPattern[]): string => {\n if (!text || detected.length === 0) {\n return text;\n }\n\n // Build template by replacing detected patterns with tokens\n // Process in reverse order to preserve indices\n let template = text;\n const sortedByIndexDesc = [...detected].sort((a, b) => b.index - a.index);\n\n for (const d of sortedByIndexDesc) {\n template = `${template.slice(0, d.index)}{{${d.token}}}${template.slice(d.endIndex)}`;\n }\n\n return template;\n};\n\n/**\n * Determines the best pattern type for auto-generated rules based on detected patterns.\n *\n * @param detected - Array of detected patterns\n * @returns Suggested pattern type and whether to use fuzzy matching\n */\nexport const suggestPatternConfig = (\n detected: DetectedPattern[],\n): { patternType: 'lineStartsWith' | 'lineStartsAfter'; fuzzy: boolean; metaType?: string } => {\n // Check if the detected patterns suggest a structural marker (chapter, book, etc.)\n const hasStructuralToken = detected.some((d) => ['basmalah', 'kitab', 'bab', 'fasl'].includes(d.token));\n\n // Check if the pattern is numbered (hadith-style)\n const hasNumberedPattern = detected.some((d) => ['raqms', 'raqm', 'numbered'].includes(d.token));\n\n // If it starts with a structural token, use lineStartsWith (keep marker in content)\n if (hasStructuralToken) {\n return {\n fuzzy: true,\n metaType: detected.find((d) => ['kitab', 'bab', 'fasl'].includes(d.token))?.token || 'chapter',\n patternType: 'lineStartsWith',\n };\n }\n\n // If it's a numbered pattern (like hadith numbers), use lineStartsAfter (strip prefix)\n if (hasNumberedPattern) {\n return { fuzzy: false, metaType: 'hadith', patternType: 'lineStartsAfter' };\n }\n\n // Default: use lineStartsAfter without fuzzy\n return { fuzzy: false, patternType: 'lineStartsAfter' };\n};\n\n/**\n * Analyzes text and generates a complete suggested rule configuration.\n *\n * @param text - Highlighted text from the page\n * @returns Suggested rule configuration or null if no patterns detected\n */\nexport const analyzeTextForRule = (\n text: string,\n): {\n template: string;\n patternType: 'lineStartsWith' | 'lineStartsAfter';\n fuzzy: boolean;\n metaType?: string;\n detected: DetectedPattern[];\n} | null => {\n const detected = detectTokenPatterns(text);\n\n if (detected.length === 0) {\n return null;\n }\n\n const template = generateTemplateFromText(text, detected);\n const config = suggestPatternConfig(detected);\n\n return { detected, template, ...config };\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AA+BA,MAAM,mBAAmB;;;;;;;;;;;;;;;AAgBzB,MAAMA,eAA2B;CAC7B;EAAC;EAAU;EAAU;EAAU;EAAS;CACxC,CAAC,KAAU,IAAS;CACpB,CAAC,KAAU,IAAS;CACvB;;;;;;;;;;;;;;AAeD,MAAa,eAAe,MAAsB,EAAE,QAAQ,uBAAuB,OAAO;;;;;;;;;;;;;;;;;;AAmB1F,MAAM,iBAAiB,OAAuB;AAC1C,MAAK,MAAM,SAAS,aAChB,KAAI,MAAM,SAAS,GAAG,CAElB,QAAO,IAAI,MAAM,KAAK,MAAM,YAAY,EAAE,CAAC,CAAC,KAAK,GAAG,CAAC;AAI7D,QAAO,YAAY,GAAG;;;;;;;;;;;;;;;;;;;;;;;;AAyB1B,MAAM,wBAAwB,QAAgB;AAC1C,QAAO,IACF,UAAU,MAAM,CAChB,QAAQ,mBAAmB,GAAG,CAC9B,QAAQ,QAAQ,IAAI,CACpB,MAAM;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAsCf,MAAa,4BAA4B,SAAiB;CACtD,MAAM,oBAAoB,GAAG,iBAAiB;CAC9C,MAAM,OAAO,qBAAqB,KAAK;AAEvC,QAAO,MAAM,KAAK,KAAK,CAClB,KAAK,OAAO,cAAc,GAAG,GAAG,kBAAkB,CAClD,KAAK,GAAG;;;;;;;;;;;;;;;;;;;AC9IjB,MAAa,uBAAuB,OAAoC,OAAO,OAAO,WAAW,EAAE,SAAS,IAAI,GAAG;;;;;;;;;;;;;;;;;;AAmBnH,MAAa,kBAAkB,QAAgB,gBAAkD;AAC7F,KAAI,CAAC,eAAe,YAAY,WAAW,EACvC,QAAO;AAEX,MAAK,MAAM,QAAQ,YACf,KAAI,OAAO,SAAS,UAChB;MAAI,WAAW,KACX,QAAO;QAER;EACH,MAAM,CAAC,MAAM,MAAM;AACnB,MAAI,UAAU,QAAQ,UAAU,GAC5B,QAAO;;AAInB,QAAO;;;;;;;;;;;;;;;;AAiBX,MAAa,uBAAuB,QAAgB,SAAkC;AAClF,KAAI,KAAK,QAAQ,UAAa,SAAS,KAAK,IACxC,QAAO;AAEX,KAAI,KAAK,QAAQ,UAAa,SAAS,KAAK,IACxC,QAAO;AAEX,QAAO,CAAC,eAAe,QAAQ,KAAK,QAAQ;;;;;;;;;;;;;;;;;;AAmBhD,MAAa,mBAAmB,gBAAsD;CAClF,MAAM,6BAAa,IAAI,KAAa;AACpC,MAAK,MAAM,QAAQ,eAAe,EAAE,CAChC,KAAI,OAAO,SAAS,SAChB,YAAW,IAAI,KAAK;KAEpB,MAAK,IAAI,IAAI,KAAK,IAAI,KAAK,KAAK,IAAI,IAChC,YAAW,IAAI,EAAE;AAI7B,QAAO;;;;;;;;;;;;;;;;;;;AAoBX,MAAa,iBACT,SACA,YACA,UACA,SACiB;CACjB,MAAM,UAAU,QAAQ,MAAM;AAC9B,KAAI,CAAC,QACD,QAAO;CAEX,MAAMC,MAAe;EAAE,SAAS;EAAS,MAAM;EAAY;AAC3D,KAAI,aAAa,UAAa,aAAa,WACvC,KAAI,KAAK;AAEb,KAAI,KACA,KAAI,OAAO;AAEf,QAAO;;;;;;;;;;;;;;AA0BX,MAAa,qBAAqB,aAA2B,qBACzD,YAAY,KAAK,OAAO;CACpB,MAAM,OAAO,oBAAoB,GAAG;CACpC,MAAM,aAAa,gBAAgB,KAAK,QAAQ;CAChD,MAAM,gBACF,KAAK,aAAa,gBACL;EACH,MAAM,eAAeC,iBAAe,KAAK,SAAS;AAClD,MAAI;AACA,UAAO,IAAI,OAAO,cAAc,KAAK;WAChC,OAAO;GACZ,MAAM,UAAU,iBAAiB,QAAQ,MAAM,UAAU,OAAO,MAAM;AACtE,SAAM,IAAI,MAAM,sCAAsC,KAAK,SAAS,aAAa,UAAU;;KAE/F,GACJ;AACV,KAAI,KAAK,YAAY,GACjB,QAAO;EAAE;EAAY,OAAO;EAAM;EAAM;EAAe;CAE3D,MAAM,WAAWA,iBAAe,KAAK,QAAQ;AAC7C,KAAI;AACA,SAAO;GAAE;GAAY,OAAO,IAAI,OAAO,UAAU,MAAM;GAAE;GAAM;GAAe;UACzE,OAAO;EACZ,MAAM,UAAU,iBAAiB,QAAQ,MAAM,UAAU,OAAO,MAAM;AACtE,QAAM,IAAI,MAAM,6BAA6B,KAAK,QAAQ,aAAa,UAAU;;EAEvF;;;;;;;;;;;;AAgBN,MAAa,qBACT,cACA,gBACA,OACA,SACA,oBACS;AACT,MAAK,IAAI,KAAK,OAAO,KAAK,gBAAgB,MAAM;EAC5C,MAAM,WAAW,gBAAgB,IAAI,QAAQ,IAAI;AACjD,MAAI,UAAU;GACV,MAAM,eAAe,SAAS,QAAQ,MAAM,GAAG,KAAK,IAAI,IAAI,SAAS,OAAO,CAAC;AAC7E,OAAI,aAAa,SAAS,KAAK,aAAa,QAAQ,aAAa,GAAG,EAChE,QAAO;;;AAInB,QAAO;;;;;;;;;;;;;;;;AAiBX,MAAa,uBACT,cACA,gBACA,OACA,SACA,oBACS;CACT,MAAM,eAAe,aAAa,WAAW;AAC7C,KAAI,CAAC,aACD,QAAO;AAIX,MAAK,IAAI,KAAK,gBAAgB,MAAM,OAAO,MAAM;EAC7C,MAAM,WAAW,gBAAgB,IAAI,QAAQ,IAAI;AACjD,MAAI,UAAU;GACV,MAAM,aAAa,SAAS,QAAQ,MAAM,GAAG,KAAK,IAAI,IAAI,SAAS,OAAO,CAAC,CAAC,MAAM;GAClF,MAAM,cAAc,aAAa,MAAM,GAAG,KAAK,IAAI,IAAI,aAAa,OAAO,CAAC;AAK5E,OAAI,WAAW,SAAS,GAAG;AACvB,QAAI,aAAa,WAAW,WAAW,CACnC,QAAO;AAEX,QAAI,SAAS,QAAQ,WAAW,CAAC,WAAW,YAAY,CACpD,QAAO;;;;AAKvB,QAAO;;;;;;;;;;;AAqBX,MAAa,0BACT,YACA,SACA,SACA,UACU;AACV,KAAI,WAAW,SAAS,EACpB,QAAO;AAEX,MAAK,IAAI,UAAU,SAAS,WAAW,OAAO,UAC1C,KAAI,WAAW,IAAI,QAAQ,SAAS,CAChC,QAAO;AAGf,QAAO;;;;;;;;;;AAWX,MAAa,wBAAwB,kBAA0B,iBAAyC;CACpG,MAAM,eAAe,aAAa,QAAQ,MAAM,CAAC,MAAM,GAAG,KAAK,IAAI,IAAI,aAAa,OAAO,CAAC;AAC5F,KAAI,aAAa,WAAW,EACxB,QAAO;CAEX,MAAM,MAAM,iBAAiB,QAAQ,aAAa;AAClD,QAAO,MAAM,IAAI,MAAM;;;;;;;;;;AAW3B,MAAa,4BACT,eACA,OACA,WACS;CAGT,IAAIC;CACJ,IAAIC;AACJ,MAAK,MAAM,KAAK,cAAc,SAAS,MAAM,EAAE;EAC3C,MAAM,QAAQ;GAAE,OAAO,EAAE;GAAO,QAAQ,EAAE,GAAG;GAAQ;AACrD,MAAI,CAAC,MACD,SAAQ;AAEZ,SAAO;;AAEX,KAAI,CAAC,MACD,QAAO;CAEX,MAAM,WAAW,WAAW,WAAW,OAAQ;AAC/C,QAAO,SAAS,QAAQ,SAAS;;;;;;;;;;;;;AAcrC,MAAa,qBACT,kBACA,gBACA,OACA,cACA,QACS;CACT,MAAM,EAAE,SAAS,iBAAiB,mBAAmB,qBAAqB,WAAW;AAErF,MAAK,MAAM,EAAE,MAAM,OAAO,YAAY,mBAAmB,qBAAqB;AAE1E,MAAI,CAAC,oBAAoB,QAAQ,iBAAiB,KAAK,CACnD;AAIJ,MAAI,uBAAuB,YAAY,SAAS,gBAAgB,aAAa,CACzE;AAIJ,MAAI,eAAe,KAAK,iBAAiB,CACrC;AAIJ,MAAI,UAAU,MAAM;GAChB,MAAM,cAAc,eAAe;AACnC,OAAI,eAAe,OAAO;IACtB,MAAM,eAAe,gBAAgB,IAAI,QAAQ,aAAa;AAC9D,QAAI,cAAc;KACd,MAAM,MAAM,qBAAqB,kBAAkB,aAAa;AAChE,SAAI,MAAM,EACN,QAAO;;;AAKnB,UAAO,KAAK,IACR,kBAAkB,eAAe,KAAK,kBAAkB,iBACxD,iBAAiB,OACpB;;EAIL,MAAM,oBAAoB,KAAK,IAC3B,kBAAkB,eAAe,KAAK,kBAAkB,iBACxD,iBAAiB,OACpB;EAED,MAAM,WAAW,yBADK,iBAAiB,MAAM,GAAG,kBAAkB,EACT,OAAO,OAAO;AACvE,MAAI,WAAW,EACX,QAAO;;AAIf,QAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;ACvWX,MAAa,wBACT,QACA,iBACqC;AACrC,KAAI,CAAC,UAAU,aAAa,WAAW,EACnC;CAGJ,MAAMC,gBAAwC,EAAE;AAChD,MAAK,MAAM,QAAQ,aACf,KAAI,OAAO,UAAU,OACjB,eAAc,QAAQ,OAAO;AAIrC,QAAO,OAAO,KAAK,cAAc,CAAC,SAAS,IAAI,gBAAgB;;;;;;;;;;;;;;;;;;;;;;;AAwBnE,MAAa,4BAA4B,UAA+C;AACpF,KAAI,MAAM,UAAU,EAChB;AAGJ,MAAK,IAAI,IAAI,MAAM,SAAS,GAAG,KAAK,GAAG,IACnC,KAAI,MAAM,OAAO,OACb,QAAO,MAAM;;;;;;;;;;;;;;;;;;;;;;AA0BzB,MAAa,uBACT,SACA,MACA,UACgB;AAChB,QAAO,QAAQ,QAAQ,MAAM;EACzB,MAAM,KAAK,MAAM,EAAE,MAAM;AACzB,MAAI,KAAK,QAAQ,UAAa,KAAK,KAAK,IACpC,QAAO;AAEX,MAAI,KAAK,QAAQ,UAAa,KAAK,KAAK,IACpC,QAAO;AAEX,MAAI,eAAe,IAAI,KAAK,QAAQ,CAChC,QAAO;AAEX,SAAO;GACT;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AA8BN,MAAa,sBAAsB,SAAwB,eAAyD;AAChH,KAAI,CAAC,QAAQ,OACT,QAAO,EAAE;AAEb,KAAI,eAAe,QACf,QAAO,CAAC,QAAQ,GAAG;AAEvB,KAAI,eAAe,OACf,QAAO,CAAC,QAAQ,QAAQ,SAAS,GAAG;AAExC,QAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AA4IX,MAAa,mBAAmB,OAAyC,WAA4B;AACjG,QAAO,MAAM,MAAM,MAAM;EACrB,MAAM,QAAQ,EAAE,QAAQ,UAAa,UAAU,EAAE;EACjD,MAAM,QAAQ,EAAE,QAAQ,UAAa,UAAU,EAAE;AACjD,SAAO,SAAS;GAClB;;;;;;;;;;;AC1VN,MAAa,iBAAiB,SAAyB;AACnD,QAAO,KAAK,QAAQ,YAAY,GAAG;;;;;;;;;;;AAavC,MAAa,wBAAwB,YACjC,QAAQ,SAAS,KAAK,GAAG,QAAQ,QAAQ,UAAU,KAAK,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AC+C/D,MAAa,0BAA0B,YAA4B;AAG/D,QAAO,QAAQ,QAAQ,+BAA+B,OAAO,OAAO,YAAY;AAC5E,MAAI,MACA,QAAO;AAEX,SAAO,KAAK;GACd;;;;;;;;;;AAeN,MAAMC,cAAsC;CAQxC,KAAK;CAUL,UAAU;CASV,QAAQ;CAaR,MAAM;CAMN,MAAM;CAUN,MAAM;CASN,OAAO;CAeP,MAAM;CAUN,MAAM;CAUN,OAAO;CAMP,QAAQ;CACX;;;;;;;;;;;;AAkBD,MAAMC,mBAA2C,EAoB7C,UAAU,uBACb;;;;;;;;;AAUD,MAAM,oBAAoB,aAA6B;AACnD,QAAO,SAAS,QAAQ,mBAAmB,GAAG,cAAc;AACxD,SAAO,YAAY,cAAc,KAAK,UAAU;GAClD;;;;;;;;;;;;;;;;;;;;;;;;;;;AA4BN,MAAaC,iBAAyC;CAClD,GAAG;CAEH,GAAG,OAAO,YAAY,OAAO,QAAQ,iBAAiB,CAAC,KAAK,CAAC,GAAG,OAAO,CAAC,GAAG,iBAAiB,EAAE,CAAC,CAAC,CAAC;CACpG;;;;;;;;;;;AAYD,MAAM,2BAA2B;;;;;;;;;AAUjC,MAAM,qBAAqB;;;;;;;;;;;;;;;;AAiB3B,MAAa,kBAAkB,UAA2B;AACtD,oBAAmB,YAAY;AAC/B,QAAO,mBAAmB,KAAK,MAAM;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAoEzC,MAAa,4BAA4B,OAAe,mBAA+D;CACnH,MAAMC,eAAyB,EAAE;CAGjC,MAAMC,WAA6D,EAAE;CACrE,IAAI,YAAY;AAChB,0BAAyB,YAAY;CACrC,IAAIC;AAGJ,SAAQ,QAAQ,yBAAyB,KAAK,MAAM,MAAM,MAAM;AAE5D,MAAI,MAAM,QAAQ,UACd,UAAS,KAAK;GAAE,MAAM;GAAQ,OAAO,MAAM,MAAM,WAAW,MAAM,MAAM;GAAE,CAAC;AAG/E,WAAS,KAAK;GAAE,MAAM;GAAS,OAAO,MAAM;GAAI,CAAC;AACjD,cAAY,MAAM,QAAQ,MAAM,GAAG;;AAGvC,KAAI,YAAY,MAAM,OAClB,UAAS,KAAK;EAAE,MAAM;EAAQ,OAAO,MAAM,MAAM,UAAU;EAAE,CAAC;CAIlE,MAAM,iBAAiB,SAAS,KAAK,YAAY;AAC7C,MAAI,QAAQ,SAAS,QAAQ;AAEzB,OAAI,kBAAkB,kBAAkB,KAAK,QAAQ,MAAM,CACvD,QAAO,eAAe,QAAQ,MAAM;AAExC,UAAO,QAAQ;;AAInB,2BAAyB,YAAY;EACrC,MAAM,aAAa,yBAAyB,KAAK,QAAQ,MAAM;AAC/D,MAAI,CAAC,WACD,QAAO,QAAQ;EAGnB,MAAM,GAAG,WAAW,eAAe;AAGnC,MAAI,CAAC,aAAa,aAAa;AAC3B,gBAAa,KAAK,YAAY;AAC9B,UAAO,MAAM,YAAY;;EAI7B,IAAI,eAAe,eAAe;AAClC,MAAI,CAAC,aAED,QAAO,QAAQ;AAInB,MAAI,eAEA,gBAAe,aACV,MAAM,IAAI,CACV,KAAK,SAAU,kBAAkB,KAAK,KAAK,GAAG,eAAe,KAAK,GAAG,KAAM,CAC3E,KAAK,IAAI;AAIlB,MAAI,aAAa;AACb,gBAAa,KAAK,YAAY;AAC9B,UAAO,MAAM,YAAY,GAAG,aAAa;;AAI7C,SAAO;GACT;AAEF,QAAO;EACH;EACA,aAAa,aAAa,SAAS;EACnC,SAAS,eAAe,KAAK,GAAG;EACnC;;;;;;;;;;;;;;;;;;;;;AAsBL,MAAa,gBAAgB,UAA0B,yBAAyB,MAAM,CAAC;;;;;;;;;;;;;;;;;;;;;;AAuBvF,MAAa,mBAAmB,aAAoC;CAChE,MAAM,WAAW,aAAa,SAAS;AACvC,KAAI;AACA,SAAO,IAAI,OAAO,UAAU,IAAI;SAC5B;AACJ,SAAO;;;;;;;;;;;;;;;AAgBf,MAAa,2BAAqC,OAAO,KAAK,eAAe;;;;;;;;;;;;;;;AAgB7E,MAAa,mBAAmB,cAA0C,eAAe;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;ACtfzF,MAAM,qBAAqB,YAA6B;AAEpD,QAAO,WAAW,KAAK,QAAQ;;;;;;;;;;;;;;;;;;;;;AAgCnC,MAAM,kBAAkB,SAAiB,UAAqC;CAK1E,MAAM,EAAE,SAAS,UAAU,iBAAiB,yBAH5B,uBAAuB,QAAQ,EAExB,QAAQ,2BAA2B,OACmC;AAC7F,QAAO;EAAE;EAAc,SAAS;EAAU;;;;;;;;;;;;;;;AA8B9C,MAAM,kBAAkB,SAA+B;CACnD,MAAMC,IAMF,EAAE,GAAG,MAAM;CAEf,MAAM,QAAS,KAA6B,SAAS;CACrD,IAAIC,kBAA4B,EAAE;;;;;;;;;CAUlC,MAAM,gBAAgB,YAA4B;AAC9C,MAAI;AACA,UAAO,IAAI,OAAO,SAAS,MAAM;WAC5B,OAAO;GACZ,MAAM,UAAU,iBAAiB,QAAQ,MAAM,UAAU,OAAO,MAAM;AACtE,SAAM,IAAI,MAAM,0BAA0B,QAAQ,aAAa,UAAU;;;AAKjF,KAAI,EAAE,iBAAiB,QAAQ;EAC3B,MAAM,YAAY,EAAE,gBAAgB,KAAK,MAAM,eAAe,GAAG,MAAM,CAAC;EACxE,MAAM,WAAW,UAAU,KAAK,MAAM,EAAE,QAAQ,CAAC,KAAK,IAAI;AAC1D,oBAAkB,UAAU,SAAS,MAAM,EAAE,aAAa;AAE1D,IAAE,QAAQ,OAAO,SAAS;AAC1B,SAAO;GACH,cAAc;GACd,OAAO,aAAa,EAAE,MAAM;GAC5B,aAAa;GACb,qBAAqB;GACxB;;AAGL,KAAI,EAAE,gBAAgB,QAAQ;EAC1B,MAAM,YAAY,EAAE,eAAe,KAAK,MAAM,eAAe,GAAG,MAAM,CAAC;EACvE,MAAM,WAAW,UAAU,KAAK,MAAM,EAAE,QAAQ,CAAC,KAAK,IAAI;AAC1D,oBAAkB,UAAU,SAAS,MAAM,EAAE,aAAa;AAC1D,IAAE,QAAQ,OAAO,SAAS;;AAE9B,KAAI,EAAE,cAAc,QAAQ;EACxB,MAAM,YAAY,EAAE,aAAa,KAAK,MAAM,eAAe,GAAG,MAAM,CAAC;EACrE,MAAM,WAAW,UAAU,KAAK,MAAM,EAAE,QAAQ,CAAC,KAAK,IAAI;AAC1D,oBAAkB,UAAU,SAAS,MAAM,EAAE,aAAa;AAC1D,IAAE,QAAQ,MAAM,SAAS;;AAE7B,KAAI,EAAE,UAAU;EAGZ,MAAM,EAAE,SAAS,iBAAiB,yBADlB,uBAAuB,EAAE,SAAS,CACiB;AACnE,IAAE,QAAQ;AACV,oBAAkB,CAAC,GAAG,iBAAiB,GAAG,aAAa;;AAG3D,KAAI,CAAC,EAAE,MACH,OAAM,IAAI,MACN,gHACH;CAGL,MAAM,cAAc,kBAAkB,EAAE,MAAM,IAAI,gBAAgB,SAAS;AAC3E,QAAO;EACH,cAAc;EACd,OAAO,aAAa,EAAE,MAAM;EAC5B;EACA,qBAAqB;EACxB;;;;;;;;;;;;;;;;;;;;;;AAsDL,MAAM,gBAAgB,UAAoF;CACtG,MAAMC,aAA6B,EAAE;CACrC,MAAMC,aAAuB,EAAE;CAC/B,IAAI,SAAS;CACb,MAAMC,QAAkB,EAAE;AAE1B,MAAK,IAAI,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;EACnC,MAAM,aAAa,qBAAqB,MAAM,GAAG,QAAQ;AACzD,aAAW,KAAK;GAAE,KAAK,SAAS,WAAW;GAAQ,IAAI,MAAM,GAAG;GAAI,OAAO;GAAQ,CAAC;AACpF,QAAM,KAAK,WAAW;AACtB,MAAI,IAAI,MAAM,SAAS,GAAG;AACtB,cAAW,KAAK,SAAS,WAAW,OAAO;AAC3C,aAAU,WAAW,SAAS;QAE9B,WAAU,WAAW;;;;;;;;;CAW7B,MAAM,gBAAgB,QAA0C;EAC5D,IAAI,KAAK;EACT,IAAI,KAAK,WAAW,SAAS;AAE7B,SAAO,MAAM,IAAI;GACb,MAAM,MAAO,KAAK,OAAQ;GAC1B,MAAM,IAAI,WAAW;AACrB,OAAI,MAAM,EAAE,MACR,MAAK,MAAM;YACJ,MAAM,EAAE,IACf,MAAK,MAAM;OAEX,QAAO;;AAIf,SAAO,WAAW,WAAW,SAAS;;AAG1C,QAAO;EACH,SAAS,MAAM,KAAK,KAAK;EACzB,iBAAiB;EACjB,SAAS;GACL;GACA,QAAQ,QAAgB,aAAa,IAAI,EAAE,MAAM;GACjD;GACA,SAAS,WAAW,KAAK,MAAM,EAAE,GAAG;GACvC;EACJ;;;;;;;;;;;AAiCL,MAAM,eAAe,SAAiB,OAAe,aAAsB,iBAA2B;CAClG,MAAMC,UAAyB,EAAE;AACjC,OAAM,YAAY;CAClB,IAAI,IAAI,MAAM,KAAK,QAAQ;AAE3B,QAAO,MAAM,MAAM;EACf,MAAMC,SAAsB;GAAE,KAAK,EAAE,QAAQ,EAAE,GAAG;GAAQ,OAAO,EAAE;GAAO;AAG1E,SAAO,gBAAgB,qBAAqB,EAAE,QAAQ,aAAa;AAGnE,MAAI,YACA,QAAO,WAAW,yBAAyB,EAAE;AAGjD,UAAQ,KAAK,OAAO;AAEpB,MAAI,EAAE,GAAG,WAAW,EAChB,OAAM;AAEV,MAAI,MAAM,KAAK,QAAQ;;AAG3B,QAAO;;;;;;;;;;;AAYX,MAAM,qBAAqB,aAAqB,WAAmB,iBAA2B;AAC1F,KAAI,aAAa,WAAW,EACxB,QAAO,EAAE;CAIb,IAAI,KAAK;CACT,IAAI,KAAK,aAAa;AACtB,QAAO,KAAK,IAAI;EACZ,MAAM,MAAO,KAAK,OAAQ;AAC1B,MAAI,aAAa,OAAO,YACpB,MAAK,MAAM;MAEX,MAAK;;CAKb,MAAMC,SAAmB,EAAE;AAC3B,MAAK,IAAI,IAAI,IAAI,IAAI,aAAa,UAAU,aAAa,KAAK,WAAW,IACrE,QAAO,KAAK,aAAa,KAAK,YAAY;AAE9C,QAAO;;;;;;;;;;;;;;;;AAiBX,MAAM,qBAAqB,SAAiB,aAAqB,eAAiC;AAE9F,KAAI,CAAC,WAAW,CAAC,QAAQ,SAAS,KAAK,CACnC,QAAO;CAIX,MAAM,gBAAgB,kBAAkB,aADtB,cAAc,QAAQ,QACwB,WAAW;AAG3E,KAAI,cAAc,WAAW,EACzB,QAAO;CAOX,MAAM,WAAW,IAAI,IAAI,cAAc;AACvC,QAAO,QAAQ,QAAQ,QAAQ,OAAO,WAAoB,SAAS,IAAI,OAAO,GAAG,MAAM,MAAO;;;;;;;;;;;;;;;;AAiBlG,MAAM,oBACF,UACA,OACA,mBACA,UACA,aACA,QACA,WACY;CACZ,MAAM,8BACF,gBACA,cACA,OACA,WACA,uBACA,wBACS;EACT,MAAM,iBAAiBC,UAAQ;AAE/B,MAD6BC,sBAAoB,MAAM,OAAO,GAAG,WAAW,IAAI,eAAe,CAAC,IACpE,iBAAiB,MAEzC,QAAOC,oBAAkB,iBAAiB,KAAKA,oBAAkB;AAIrE,OAAK,IAAI,UAAU,iBAAiB,GAAG,WAAW,cAAc,WAAW;GACvE,MAAM,SAASF,UAAQ;AAEvB,OADmBC,sBAAoB,MAAM,OAAO,GAAG,WAAW,IAAI,OAAO,CAAC,CAE1E,QAAOC,oBAAkB,WAAWA,oBAAkB;;AAG9D,SAAO;;CAIX,MAAM,UAAU,MAAM,KAAK,MAAM,EAAE,GAAG;CAGtC,MAAM,gBAAgB,IAAI,IAAI,QAAQ,KAAK,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;CAG9D,MAAM,kCAAkB,IAAI,KAA6B;AACzD,MAAK,IAAI,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;EACnC,MAAM,UAAU,kBAAkB;AAClC,kBAAgB,IAAI,MAAM,GAAG,IAAI;GAAE;GAAS,OAAO;GAAG,QAAQ,QAAQ;GAAQ,CAAC;;CAInF,MAAMC,oBAA8B,CAAC,EAAE;CACvC,IAAI,cAAc;AAClB,MAAK,IAAI,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;EACrC,MAAM,WAAW,gBAAgB,IAAI,QAAQ,GAAG;AAChD,iBAAe,WAAW,SAAS,SAAS;AAC5C,MAAI,IAAI,QAAQ,SAAS,EACrB,gBAAe;AAEnB,oBAAkB,KAAK,YAAY;;CAKvC,MAAM,oBAAoB,MAAc,eAAe,GAAG,MAAM,CAAC;CACjE,MAAM,sBAAsB,kBAAkB,aAAa,iBAAiB;CAE5E,MAAMC,SAAoB,EAAE;AAE5B,SAAQ,OAAO,kCAAkC;EAAE;EAAU,cAAc,SAAS;EAAQ,CAAC;AAE7F,MAAK,MAAM,WAAW,UAAU;EAC5B,MAAM,UAAU,cAAc,IAAI,QAAQ,KAAK,IAAI;EACnD,MAAM,QAAQ,QAAQ,OAAO,SAAa,cAAc,IAAI,QAAQ,GAAG,IAAI,UAAW;AAEtF,UAAQ,QAAQ,sBAAsB;GAClC,eAAe,QAAQ,QAAQ;GAC/B,gBAAgB,QAAQ,QAAQ,MAAM,GAAG,IAAI;GAC7C,MAAM,QAAQ;GACd;GACA,IAAI,QAAQ;GACZ;GACH,CAAC;EAGF,MAAM,eAAe,QAAQ,MAAM,QAAQ,QAAQ,QAAQ;EAG3D,MAAM,gBAAgB,oBAAoB,MAAM,OAC5C,uBAAuB,GAAG,YAAY,SAAS,SAAS,MAAM,CACjE;AAED,MAAI,eAAe,YAAY,CAAC,eAAe;AAC3C,WAAQ,QAAQ,sCAAsC;AAEtD,UAAO,KAAK,QAAQ;AACpB;;AAGJ,UAAQ,QAAQ,0DAA0D;EAM1E,IAAI,mBAAmB,QAAQ;EAC/B,IAAI,iBAAiB;EACrB,IAAI,eAAe;EACnB,IAAI,iBAAiB;EACrB,MAAM,gBAAgB;AAEtB,SAAO,kBAAkB,OAAO;AAC5B;AACA,OAAI,iBAAiB,eAAe;AAChC,YAAQ,QAAQ,oEAAoE,EAChF,gBAAgB,eACnB,CAAC;AACF,YAAQ,QAAQ,cAAc;KAC1B;KACA,wBAAwB,iBAAiB;KACzC;KACH,CAAC;AACF;;GAIJ,MAAM,gBAAgB,QAAQ,SAAS,QAAQ;AAE/C,WAAQ,QAAQ,kBAAkB;IAC9B;IACA,eAAe,QAAQ;IACvB;IACA,wBAAwB,iBAAiB;IACzC,yBAAyB,iBAAiB,MAAM,GAAG,GAAG;IACtD;IACA;IACA,UAAU,QAAQ;IACrB,CAAC;GAGF,MAAM,yBAAyB,oBAAoB,MAAM,OACrD,uBAAuB,GAAG,YAAY,SAAS,gBAAgB,MAAM,CACxE;AAGD,OAAI,iBAAiB,YAAY,CAAC,wBAAwB;AACtD,YAAQ,QAAQ,wDAAwD;IAExE,MAAM,WAAW,cACb,kBACA,QAAQ,iBACR,mBAAmB,QAAQ,QAAQ,SAAS,QAC5C,eAAe,QAAQ,OAAO,OACjC;AACD,QAAI,SACA,QAAO,KAAK,SAAS;AAEzB;;GAKJ,MAAM,gBAAgB,QAAQ;GAC9B,MAAM,kBAAkB,gBAAgB;GACxC,IAAI,eAAe;AACnB,QAAK,IAAI,IAAI,gBAAgB,KAAK,OAAO,IACrC,KAAI,QAAQ,MAAM,gBACd,gBAAe;OAEf;AAIR,WAAQ,QAAQ,sBAAsB;IAClC;IACA;IACA;IACA,iBAAiB,QAAQ;IAC5B,CAAC;GAIF,MAAM,sBAAsB,oBAAoB,MAAM,OAClD,uBAAuB,GAAG,YAAY,SAAS,gBAAgB,aAAa,CAC/E;GAED,IAAI,gBAAgB;AAEpB,OAAI,qBAAqB;AACrB,YAAQ,QAAQ,0DAA0D;AAE1E,oBAAgB,2BACZ,gBACA,cACA,OACA,SACA,qBACA,kBACH;AAED,YAAQ,QAAQ,4BAA4B,EAAE,eAAe,CAAC;;AAIlE,OAAI,iBAAiB,GAAG;IAEpB,MAAMC,gBAAmC;KACrC;KACA;KACA;KACA;KACA;KACH;AAED,YAAQ,QAAQ,2CAA2C;AAE3D,oBAAgB,kBAAkB,kBAAkB,gBAAgB,OAAO,cAAc,cAAc;AAEvG,YAAQ,QAAQ,0BAA0B,EAAE,eAAe,CAAC;;AAGhE,OAAI,iBAAiB,GAAG;AACpB,YAAQ,QAAQ,oDAAoD;AAIpE,QAAI,iBAAiB,gBAAgB;AACjC,aAAQ,QAAQ,oDAAoD;KAGpE,MAAM,cACF,kBAAkB,iBAAiB,OAAO,SACpC,iBAAiB,MACb,GACA,kBAAkB,iBAAiB,KAAK,kBAAkB,gBAC7D,GACD;KACV,MAAM,UAAU,cACZ,YAAY,MAAM,EAClB,QAAQ,iBACR,QACA,eAAe,QAAQ,OAAO,OACjC;AACD,SAAI,QACA,QAAO,KAAK,QAAQ;AAGxB,wBAAmB,iBAAiB,MAAM,YAAY,OAAO,CAAC,MAAM;AACpE;AACA,oBAAe;AAEf,aAAQ,QAAQ,qBAAqB;MACjC;MACA,wBAAwB,iBAAiB;MAC5C,CAAC;AAEF;;AAGJ,oBAAgB,kBAAkB,eAAe,KAAK,kBAAkB;AACxE,YAAQ,QAAQ,uDAAuD,EAAE,eAAe,CAAC;;GAG7F,MAAM,eAAe,iBAAiB,MAAM,GAAG,cAAc,CAAC,MAAM;AAEpE,WAAQ,QAAQ,mBAAmB;IAC/B;IACA,oBAAoB,aAAa;IACjC,qBAAqB,aAAa,MAAM,GAAG,GAAG;IACjD,CAAC;GAIF,MAAM,iBAAiB,eACjB,oBAAoB,cAAc,gBAAgB,OAAO,SAAS,gBAAgB,GAClF;GACN,MAAM,eAAe,eACf,kBAAkB,cAAc,gBAAgB,cAAc,SAAS,gBAAgB,GACvF;AAEN,WAAQ,QAAQ,uBAAuB;IACnC;IACA;IACA,iBAAiB,CAAC,CAAC;IACtB,CAAC;AAEF,OAAI,cAAc;IACd,MAAM,WAAW,cACb,cACA,QAAQ,iBACR,eAAe,iBAAiB,QAAQ,gBAAgB,QACxD,eAAe,QAAQ,OAAO,OACjC;AACD,QAAI,UAAU;AACV,YAAO,KAAK,SAAS;AAErB,aAAQ,QAAQ,mBAAmB;MAC/B,eAAe,SAAS,QAAQ;MAChC,MAAM,SAAS;MACf,IAAI,SAAS;MAChB,CAAC;;;GAKV,MAAM,sBAAsB,iBAAiB;AAC7C,sBAAmB,iBAAiB,MAAM,cAAc,CAAC,MAAM;AAE/D,WAAQ,QAAQ,kCAAkC;IAC9C,WAAW,iBAAiB;IAC5B,YAAY;IACZ,cAAc;IACjB,CAAC;AAGF,OAAI,CAAC,kBAAkB;AACnB,YAAQ,QAAQ,6CAA6C;AAC7D;;GAKJ,IAAI,cAAc;AAGlB,OAAI,oBAAoB,eAAe,KAAK,OAAO;IAC/C,MAAM,eAAe,gBAAgB,IAAI,QAAQ,eAAe,GAAG;AACnE,QAAI,cAAc;KACd,MAAM,aAAa,aAAa,QAAQ,MAAM,GAAG,KAAK,IAAI,IAAI,aAAa,OAAO,CAAC;AACnF,SAAI,cAAc,iBAAiB,WAAW,WAAW,EAAE;AACvD,oBAAc,eAAe;AAC7B,cAAQ,QAAQ,wCAAwC,EAAE,aAAa,aAAa,CAAC;;;;AAKjG,WAAQ,QAAQ,oBAAoB;IAChC;IACA,oBAAoB;IACpB,aAAa,gBAAgB;IAChC,CAAC;AAEF,oBAAiB;AACjB,kBAAe;;;AAIvB,SAAQ,OAAO,mCAAmC,EAAE,aAAa,OAAO,QAAQ,CAAC;AAEjF,QAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AA6CX,MAAa,gBAAgB,OAAe,YAA4C;CACpF,MAAM,EAAE,QAAQ,EAAE,EAAE,UAAU,aAAa,SAAS,UAAU,WAAW;AACzE,KAAI,CAAC,MAAM,OACP,QAAO,EAAE;CAGb,MAAM,EAAE,SAAS,cAAc,iBAAiB,mBAAmB,YAAY,aAAa,MAAM;CAClG,MAAMC,cAA4B,EAAE;AAGpC,MAAK,MAAM,QAAQ,OAAO;EACtB,MAAM,EAAE,OAAO,aAAa,cAAc,wBAAwB,eAAe,KAAK;EAOtF,MAAM,eAAe,mBAHM,oBAHR,YAAY,cAAc,OAAO,aAAa,aAAa,EAGnB,MAAM,QAAQ,MAAM,EAGnB,KAAK,WAAW;AAE5E,OAAK,MAAM,KAAK,cAAc;GAI1B,MAAM,oBAAoB,uBAAuB,EAAE,aAAa;GAChE,MAAM,eAAe,oBAAoB,EAAE,MAAM,EAAE,SAAU,SAAS,EAAE,QAAQ;AAEhF,eAAY,KAAK;IAEb,iBAAiB,oBAAoB,SAAY,EAAE;IAEnD,oBAAoB,oBAAoB,eAAe;IACvD,OAAO,KAAK,UAAU,OAAO,EAAE,QAAQ,EAAE;IACzC,MAAM,KAAK;IACX,eAAe,EAAE;IACpB,CAAC;;;CAMV,MAAM,0BAAU,IAAI,KAAyB;AAC7C,MAAK,MAAM,KAAK,aAAa;EACzB,MAAM,WAAW,QAAQ,IAAI,EAAE,MAAM;AACrC,MAAI,CAAC,SACD,SAAQ,IAAI,EAAE,OAAO,EAAE;WAKlB,EAAE,uBAAuB,UAAa,SAAS,uBAAuB,UACtE,EAAE,SAAS,UAAa,SAAS,SAAS,OAE3C,SAAQ,IAAI,EAAE,OAAO,EAAE;;CAInC,MAAM,SAAS,CAAC,GAAG,QAAQ,QAAQ,CAAC;AACpC,QAAO,MAAM,GAAG,MAAM,EAAE,QAAQ,EAAE,MAAM;CAGxC,IAAI,WAAW,cAAc,QAAQ,cAAc,SAAS,MAAM;AAIlE,KAAI,SAAS,WAAW,KAAK,MAAM,SAAS,GAAG;EAC3C,MAAM,YAAY,MAAM;EACxB,MAAM,WAAW,MAAM,MAAM,SAAS;EAGtC,MAAMC,aAAsB;GACxB,SAFe,kBAAkB,KAAK,KAAK,CAEvB,MAAM;GAC1B,MAAM,UAAU;GACnB;AACD,MAAI,SAAS,OAAO,UAAU,GAC1B,YAAW,KAAK,SAAS;AAE7B,MAAI,WAAW,QACX,YAAW,CAAC,WAAW;;AAK/B,KAAI,aAAa,UAAa,YAAY,KAAK,aAAa,OACxD,QAAO,iBAAiB,UAAU,OAAO,mBAAmB,UAAU,aAAa,QAAQ,OAAO;AAGtG,QAAO;;;;;;;;;;;;;;;;;AAkBX,MAAM,iBAAiB,aAA2B,SAAiB,SAAkB,UAAkC;;;;CAInH,MAAMC,mBACF,OACA,KACA,MACA,iBACA,eACA,uBACiB;EAEjB,MAAM,cAAc,SAAS,sBAAsB;EAGnD,MAAM,SAAS,QAAQ,MAAM,aAAa,IAAI;EAC9C,IAAI,OAAO,iBAAiB,MAAM,KAAK,qBAAqB,OAAO,MAAM,GAAG,OAAO,QAAQ,YAAY,GAAG;AAC1G,MAAI,CAAC,KACD,QAAO;AAEX,MAAI,CAAC,gBACD,QAAO,kBAAkB,MAAM,aAAa,QAAQ,WAAW;EAEnE,MAAM,OAAO,QAAQ,MAAM,YAAY;EACvC,MAAM,KAAK,kBAAkB,QAAQ,MAAM,MAAM,EAAE,GAAG,QAAQ,MAAM,cAAc,KAAK,SAAS,EAAE;EAClG,MAAMC,MAAe;GAAE,SAAS;GAAM;GAAM;AAC5C,MAAI,OAAO,KACP,KAAI,KAAK;AAEb,MAAI,QAAQ,cACR,KAAI,OAAO;GAAE,GAAG;GAAM,GAAG;GAAe;AAE5C,SAAO;;;;;CAMX,MAAM,sCAAiD;EACnD,MAAML,SAAoB,EAAE;AAC5B,OAAK,IAAI,IAAI,GAAG,IAAI,YAAY,QAAQ,KAAK;GACzC,MAAM,KAAK,YAAY;GACvB,MAAM,MAAM,IAAI,YAAY,SAAS,IAAI,YAAY,IAAI,GAAG,QAAQ,QAAQ;GAC5E,MAAM,IAAII,gBACN,GAAG,OACH,KACA,GAAG,MACH,GAAG,iBACH,GAAG,eACH,GAAG,mBACN;AACD,OAAI,EACA,QAAO,KAAK,EAAE;;AAGtB,SAAO;;CAGX,MAAME,WAAsB,EAAE;AAG9B,KAAI,CAAC,YAAY,QAAQ;AAErB,MAAI,gBAAgB,OADJ,QAAQ,MAAM,EAAE,CACG,EAAE;GACjC,MAAM,IAAIF,gBAAc,GAAG,QAAQ,OAAO;AAC1C,OAAI,EACA,UAAS,KAAK,EAAE;;AAGxB,SAAO;;AAIX,KAAI,YAAY,GAAG,QAAQ,GAEvB;MAAI,gBAAgB,OADJ,QAAQ,MAAM,EAAE,CACG,EAAE;GACjC,MAAM,IAAIA,gBAAc,GAAG,YAAY,GAAG,MAAM;AAChD,OAAI,EACA,UAAS,KAAK,EAAE;;;AAM5B,QAAO,CAAC,GAAG,UAAU,GAAG,+BAA+B,CAAC;;;;;;;;;;;;;;;;;ACx+B5D,MAAMG,uBAAiC;CACnC;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACH;;;;;AAMD,MAAM,yBAAmC;CACrC,MAAM,YAAY,oBAAoB;CACtC,MAAM,cAAc,qBAAqB,QAAQ,MAAM,UAAU,SAAS,EAAE,CAAC;CAC7E,MAAM,YAAY,UAAU,QAAQ,MAAM,CAAC,qBAAqB,SAAS,EAAE,CAAC,CAAC,MAAM;AACnF,QAAO,CAAC,GAAG,aAAa,GAAG,UAAU;;;;;;;;;;;;;;;;;AAkBzC,MAAa,uBAAuB,SAAoC;AACpE,KAAI,CAAC,KACD,QAAO,EAAE;CAGb,MAAMC,UAA6B,EAAE;CACrC,MAAMC,gBAAyC,EAAE;CAGjD,MAAM,qBAAqB,OAAe,QAAyB;AAC/D,SAAO,cAAc,MAChB,CAAC,GAAG,OAAQ,SAAS,KAAK,QAAQ,KAAO,MAAM,KAAK,OAAO,KAAO,SAAS,KAAK,OAAO,EAC3F;;AAIL,MAAK,MAAM,aAAa,kBAAkB,EAAE;EACxC,MAAM,UAAU,eAAe;AAC/B,MAAI,CAAC,QACD;AAGJ,MAAI;GAEA,MAAM,QAAQ,IAAI,OAAO,IAAI,QAAQ,IAAI,KAAK;GAC9C,IAAIC;AAGJ,WAAQ,QAAQ,MAAM,KAAK,KAAK,MAAM,MAAM;IACxC,MAAM,aAAa,MAAM;IACzB,MAAM,WAAW,aAAa,MAAM,GAAG;AAGvC,QAAI,kBAAkB,YAAY,SAAS,CACvC;AAGJ,YAAQ,KAAK;KAAE;KAAU,OAAO;KAAY,OAAO,MAAM;KAAI,OAAO;KAAW,CAAC;AAEhF,kBAAc,KAAK,CAAC,YAAY,SAAS,CAAC;;UAE1C;;AAGZ,QAAO,QAAQ,MAAM,GAAG,MAAM,EAAE,QAAQ,EAAE,MAAM;;;;;;;;;;;;;;;AAgBpD,MAAa,4BAA4B,MAAc,aAAwC;AAC3F,KAAI,CAAC,QAAQ,SAAS,WAAW,EAC7B,QAAO;CAKX,IAAI,WAAW;CACf,MAAM,oBAAoB,CAAC,GAAG,SAAS,CAAC,MAAM,GAAG,MAAM,EAAE,QAAQ,EAAE,MAAM;AAEzE,MAAK,MAAM,KAAK,kBACZ,YAAW,GAAG,SAAS,MAAM,GAAG,EAAE,MAAM,CAAC,IAAI,EAAE,MAAM,IAAI,SAAS,MAAM,EAAE,SAAS;AAGvF,QAAO;;;;;;;;AASX,MAAa,wBACT,aAC2F;CAE3F,MAAM,qBAAqB,SAAS,MAAM,MAAM;EAAC;EAAY;EAAS;EAAO;EAAO,CAAC,SAAS,EAAE,MAAM,CAAC;CAGvG,MAAM,qBAAqB,SAAS,MAAM,MAAM;EAAC;EAAS;EAAQ;EAAW,CAAC,SAAS,EAAE,MAAM,CAAC;AAGhG,KAAI,mBACA,QAAO;EACH,OAAO;EACP,UAAU,SAAS,MAAM,MAAM;GAAC;GAAS;GAAO;GAAO,CAAC,SAAS,EAAE,MAAM,CAAC,EAAE,SAAS;EACrF,aAAa;EAChB;AAIL,KAAI,mBACA,QAAO;EAAE,OAAO;EAAO,UAAU;EAAU,aAAa;EAAmB;AAI/E,QAAO;EAAE,OAAO;EAAO,aAAa;EAAmB;;;;;;;;AAS3D,MAAa,sBACT,SAOQ;CACR,MAAM,WAAW,oBAAoB,KAAK;AAE1C,KAAI,SAAS,WAAW,EACpB,QAAO;AAMX,QAAO;EAAE;EAAU,UAHF,yBAAyB,MAAM,SAAS;EAG5B,GAFd,qBAAqB,SAAS;EAEL"}
1
+ {"version":3,"file":"index.mjs","names":["EQUIV_GROUPS: string[][]","seg: Segment","processPattern","first: { index: number; length: number } | undefined","last: { index: number; length: number } | undefined","namedCaptures: Record<string, string>","BASE_TOKENS: Record<string, string>","COMPOSITE_TOKENS: Record<string, string>","TOKEN_PATTERNS: Record<string, string>","captureNames: string[]","segments: Array<{ type: 'token' | 'text'; value: string }>","match: RegExpExecArray | null","s: {\n lineStartsWith?: string[];\n lineStartsAfter?: string[];\n lineEndsWith?: string[];\n template?: string;\n regex?: string;\n }","allCaptureNames: string[]","boundaries: PageBoundary[]","pageBreaks: number[]","parts: string[]","matches: MatchResult[]","result: MatchResult","result: number[]","pageIds","expandedBreakpoints","cumulativeOffsets","cumulativeOffsets: number[]","result: Segment[]","breakpointCtx: BreakpointContext","splitPoints: SplitPoint[]","initialSeg: Segment","createSegment","seg: Segment","segments: Segment[]","TOKEN_PRIORITY_ORDER: string[]","results: DetectedPattern[]","coveredRanges: Array<[number, number]>","match: RegExpExecArray | null"],"sources":["../src/segmentation/fuzzy.ts","../src/segmentation/breakpoint-utils.ts","../src/segmentation/match-utils.ts","../src/segmentation/textUtils.ts","../src/segmentation/tokens.ts","../src/segmentation/segmenter.ts","../src/pattern-detection.ts"],"sourcesContent":["/**\n * Fuzzy matching utilities for Arabic text.\n *\n * Provides diacritic-insensitive and character-equivalence matching for Arabic text.\n * This allows matching text regardless of:\n * - Diacritical marks (harakat/tashkeel): فَتْحَة، ضَمَّة، كَسْرَة، سُكُون، شَدَّة، تَنْوين\n * - Character equivalences: ا↔آ↔أ↔إ, ة↔ه, ى↔ي\n *\n * @module fuzzy\n *\n * @example\n * // Make a pattern diacritic-insensitive\n * const pattern = makeDiacriticInsensitive('حدثنا');\n * new RegExp(pattern, 'u').test('حَدَّثَنَا') // → true\n */\n\n/**\n * Character class matching all Arabic diacritics (Tashkeel/Harakat).\n *\n * Includes the following diacritical marks:\n * - U+064B: ً (fathatan - double fatha)\n * - U+064C: ٌ (dammatan - double damma)\n * - U+064D: ٍ (kasratan - double kasra)\n * - U+064E: َ (fatha - short a)\n * - U+064F: ُ (damma - short u)\n * - U+0650: ِ (kasra - short i)\n * - U+0651: ّ (shadda - gemination)\n * - U+0652: ْ (sukun - no vowel)\n *\n * @internal\n */\nconst DIACRITICS_CLASS = '[\\u064B\\u064C\\u064D\\u064E\\u064F\\u0650\\u0651\\u0652]';\n\n/**\n * Groups of equivalent Arabic characters.\n *\n * Characters within the same group are considered equivalent for matching purposes.\n * This handles common variations in Arabic text where different characters are\n * used interchangeably or have the same underlying meaning.\n *\n * Equivalence groups:\n * - Alef variants: ا (bare), آ (with madda), أ (with hamza above), إ (with hamza below)\n * - Ta marbuta and Ha: ة ↔ ه (often interchangeable at word endings)\n * - Alef maqsura and Ya: ى ↔ ي (often interchangeable at word endings)\n *\n * @internal\n */\nconst EQUIV_GROUPS: string[][] = [\n ['\\u0627', '\\u0622', '\\u0623', '\\u0625'], // ا, آ, أ, إ\n ['\\u0629', '\\u0647'], // ة <-> ه\n ['\\u0649', '\\u064A'], // ى <-> ي\n];\n\n/**\n * Escapes a string for safe inclusion in a regular expression.\n *\n * Escapes all regex metacharacters: `.*+?^${}()|[\\]\\\\`\n *\n * @param s - Any string to escape\n * @returns String with regex metacharacters escaped\n *\n * @example\n * escapeRegex('hello.world') // → 'hello\\\\.world'\n * escapeRegex('[test]') // → '\\\\[test\\\\]'\n * escapeRegex('a+b*c?') // → 'a\\\\+b\\\\*c\\\\?'\n */\nexport const escapeRegex = (s: string): string => s.replace(/[.*+?^${}()|[\\]\\\\]/g, '\\\\$&');\n\n/**\n * Returns a regex character class for all equivalents of a given character.\n *\n * If the character belongs to one of the predefined equivalence groups\n * (e.g., ا/آ/أ/إ), the returned class will match any member of that group.\n * Otherwise, the original character is simply escaped for safe regex inclusion.\n *\n * @param ch - A single character to expand into its equivalence class\n * @returns A RegExp-safe string representing the character and its equivalents\n *\n * @example\n * getEquivClass('ا') // → '[اآأإ]' (matches any alef variant)\n * getEquivClass('ب') // → 'ب' (no equivalents, just escaped)\n * getEquivClass('.') // → '\\\\.' (regex metachar escaped)\n *\n * @internal\n */\nconst getEquivClass = (ch: string): string => {\n for (const group of EQUIV_GROUPS) {\n if (group.includes(ch)) {\n // join the group's members into a character class\n return `[${group.map((c) => escapeRegex(c)).join('')}]`;\n }\n }\n // not in equivalence groups -> return escaped character\n return escapeRegex(ch);\n};\n\n/**\n * Performs light normalization on Arabic text for consistent matching.\n *\n * Normalization steps:\n * 1. NFC normalization (canonical decomposition then composition)\n * 2. Remove Zero-Width Joiner (U+200D) and Zero-Width Non-Joiner (U+200C)\n * 3. Collapse multiple whitespace characters to single space\n * 4. Trim leading and trailing whitespace\n *\n * This normalization preserves diacritics and letter forms while removing\n * invisible characters that could interfere with matching.\n *\n * @param str - Arabic text to normalize\n * @returns Normalized string\n *\n * @example\n * normalizeArabicLight('حَدَّثَنَا') // → 'حَدَّثَنَا' (diacritics preserved)\n * normalizeArabicLight('بسم الله') // → 'بسم الله' (spaces collapsed)\n * normalizeArabicLight(' text ') // → 'text' (trimmed)\n *\n * @internal\n */\nconst normalizeArabicLight = (str: string) => {\n return str\n .normalize('NFC')\n .replace(/[\\u200C\\u200D]/g, '') // remove ZWJ/ZWNJ\n .replace(/\\s+/g, ' ')\n .trim();\n};\n\n/**\n * Creates a diacritic-insensitive regex pattern for Arabic text matching.\n *\n * Transforms input text into a regex pattern that matches the text regardless\n * of diacritical marks (harakat) and character variations. Each character in\n * the input is:\n * 1. Expanded to its equivalence class (if applicable)\n * 2. Followed by an optional diacritics matcher\n *\n * This allows matching:\n * - `حدثنا` with `حَدَّثَنَا` (with full diacritics)\n * - `الإيمان` with `الايمان` (alef variants)\n * - `صلاة` with `صلاه` (ta marbuta ↔ ha)\n *\n * @param text - Input Arabic text to make diacritic-insensitive\n * @returns Regex pattern string that matches the text with or without diacritics\n *\n * @example\n * const pattern = makeDiacriticInsensitive('حدثنا');\n * // Each char gets equivalence class + optional diacritics\n * // Result matches: حدثنا, حَدَّثَنَا, حَدَثَنَا, etc.\n *\n * @example\n * const pattern = makeDiacriticInsensitive('باب');\n * new RegExp(pattern, 'u').test('بَابٌ') // → true\n * new RegExp(pattern, 'u').test('باب') // → true\n *\n * @example\n * // Using with split rules\n * {\n * lineStartsWith: ['باب'],\n * split: 'at',\n * fuzzy: true // Applies makeDiacriticInsensitive internally\n * }\n */\nexport const makeDiacriticInsensitive = (text: string) => {\n const diacriticsMatcher = `${DIACRITICS_CLASS}*`;\n const norm = normalizeArabicLight(text);\n // Use Array.from to iterate grapheme-safe over the string (works fine for Arabic letters)\n return Array.from(norm)\n .map((ch) => getEquivClass(ch) + diacriticsMatcher)\n .join('');\n};\n","/**\n * Utility functions for breakpoint processing in the segmentation engine.\n *\n * These functions handle breakpoint normalization, page exclusion checking,\n * and segment creation. Extracted for independent testing and reuse.\n *\n * @module breakpoint-utils\n */\n\nimport type { Breakpoint, BreakpointRule, PageRange, Segment } from './types.js';\n\n/**\n * Normalizes a breakpoint to the object form.\n * Strings are converted to { pattern: str } with no constraints.\n *\n * @param bp - Breakpoint as string or object\n * @returns Normalized BreakpointRule object\n *\n * @example\n * normalizeBreakpoint('\\\\n\\\\n')\n * // → { pattern: '\\\\n\\\\n' }\n *\n * normalizeBreakpoint({ pattern: '\\\\n', min: 10 })\n * // → { pattern: '\\\\n', min: 10 }\n */\nexport const normalizeBreakpoint = (bp: Breakpoint): BreakpointRule => (typeof bp === 'string' ? { pattern: bp } : bp);\n\n/**\n * Checks if a page ID is in an excluded list (single pages or ranges).\n *\n * @param pageId - Page ID to check\n * @param excludeList - List of page IDs or [from, to] ranges to exclude\n * @returns True if page is excluded\n *\n * @example\n * isPageExcluded(5, [1, 5, 10])\n * // → true\n *\n * isPageExcluded(5, [[3, 7]])\n * // → true\n *\n * isPageExcluded(5, [[10, 20]])\n * // → false\n */\nexport const isPageExcluded = (pageId: number, excludeList: PageRange[] | undefined): boolean => {\n if (!excludeList || excludeList.length === 0) {\n return false;\n }\n for (const item of excludeList) {\n if (typeof item === 'number') {\n if (pageId === item) {\n return true;\n }\n } else {\n const [from, to] = item;\n if (pageId >= from && pageId <= to) {\n return true;\n }\n }\n }\n return false;\n};\n\n/**\n * Checks if a page ID is within a breakpoint's min/max range and not excluded.\n *\n * @param pageId - Page ID to check\n * @param rule - Breakpoint rule with optional min/max/exclude constraints\n * @returns True if page is within valid range\n *\n * @example\n * isInBreakpointRange(50, { pattern: '\\\\n', min: 10, max: 100 })\n * // → true\n *\n * isInBreakpointRange(5, { pattern: '\\\\n', min: 10 })\n * // → false (below min)\n */\nexport const isInBreakpointRange = (pageId: number, rule: BreakpointRule): boolean => {\n if (rule.min !== undefined && pageId < rule.min) {\n return false;\n }\n if (rule.max !== undefined && pageId > rule.max) {\n return false;\n }\n return !isPageExcluded(pageId, rule.exclude);\n};\n\n/**\n * Builds an exclude set from a PageRange array for O(1) lookups.\n *\n * @param excludeList - List of page IDs or [from, to] ranges\n * @returns Set of all excluded page IDs\n *\n * @remarks\n * This expands ranges into explicit page IDs for fast membership checks. For typical\n * book-scale inputs (thousands of pages), this is small and keeps downstream logic\n * simple and fast. If you expect extremely large ranges (e.g., millions of pages),\n * consider avoiding broad excludes or introducing a range-based membership structure.\n *\n * @example\n * buildExcludeSet([1, 5, [10, 12]])\n * // → Set { 1, 5, 10, 11, 12 }\n */\nexport const buildExcludeSet = (excludeList: PageRange[] | undefined): Set<number> => {\n const excludeSet = new Set<number>();\n for (const item of excludeList || []) {\n if (typeof item === 'number') {\n excludeSet.add(item);\n } else {\n for (let i = item[0]; i <= item[1]; i++) {\n excludeSet.add(i);\n }\n }\n }\n return excludeSet;\n};\n\n/**\n * Creates a segment with optional to and meta fields.\n * Returns null if content is empty after trimming.\n *\n * @param content - Segment content\n * @param fromPageId - Starting page ID\n * @param toPageId - Optional ending page ID (omitted if same as from)\n * @param meta - Optional metadata to attach\n * @returns Segment object or null if empty\n *\n * @example\n * createSegment('Hello world', 1, 3, { chapter: 1 })\n * // → { content: 'Hello world', from: 1, to: 3, meta: { chapter: 1 } }\n *\n * createSegment(' ', 1, undefined, undefined)\n * // → null (empty content)\n */\nexport const createSegment = (\n content: string,\n fromPageId: number,\n toPageId: number | undefined,\n meta: Record<string, unknown> | undefined,\n): Segment | null => {\n const trimmed = content.trim();\n if (!trimmed) {\n return null;\n }\n const seg: Segment = { content: trimmed, from: fromPageId };\n if (toPageId !== undefined && toPageId !== fromPageId) {\n seg.to = toPageId;\n }\n if (meta) {\n seg.meta = meta;\n }\n return seg;\n};\n\n/** Expanded breakpoint with pre-compiled regex and exclude set */\nexport type ExpandedBreakpoint = {\n rule: BreakpointRule;\n regex: RegExp | null;\n excludeSet: Set<number>;\n skipWhenRegex: RegExp | null;\n};\n\n/** Function type for pattern processing */\nexport type PatternProcessor = (pattern: string) => string;\n\n/**\n * Expands breakpoint patterns and pre-computes exclude sets.\n *\n * @param breakpoints - Array of breakpoint patterns or rules\n * @param processPattern - Function to expand tokens in patterns\n * @returns Array of expanded breakpoints with compiled regexes\n *\n * @remarks\n * This function compiles regex patterns dynamically. This can be a ReDoS vector\n * if patterns come from untrusted sources. In typical usage, breakpoint rules\n * are application configuration, not user input.\n */\nexport const expandBreakpoints = (breakpoints: Breakpoint[], processPattern: PatternProcessor): ExpandedBreakpoint[] =>\n breakpoints.map((bp) => {\n const rule = normalizeBreakpoint(bp);\n const excludeSet = buildExcludeSet(rule.exclude);\n const skipWhenRegex =\n rule.skipWhen !== undefined\n ? (() => {\n const expandedSkip = processPattern(rule.skipWhen);\n try {\n return new RegExp(expandedSkip, 'mu');\n } catch (error) {\n const message = error instanceof Error ? error.message : String(error);\n throw new Error(`Invalid breakpoint skipWhen regex: ${rule.skipWhen}\\n Cause: ${message}`);\n }\n })()\n : null;\n if (rule.pattern === '') {\n return { excludeSet, regex: null, rule, skipWhenRegex };\n }\n const expanded = processPattern(rule.pattern);\n try {\n return { excludeSet, regex: new RegExp(expanded, 'gmu'), rule, skipWhenRegex };\n } catch (error) {\n const message = error instanceof Error ? error.message : String(error);\n throw new Error(`Invalid breakpoint regex: ${rule.pattern}\\n Cause: ${message}`);\n }\n });\n\n/** Normalized page data for efficient lookups */\nexport type NormalizedPage = { content: string; length: number; index: number };\n\n/**\n * Finds the actual ending page index by searching backwards for page content prefix.\n * Used to determine which page a segment actually ends on based on content matching.\n *\n * @param pieceContent - Content of the segment piece\n * @param currentFromIdx - Current starting index in pageIds\n * @param toIdx - Maximum ending index to search\n * @param pageIds - Array of page IDs\n * @param normalizedPages - Map of page ID to normalized content\n * @returns The actual ending page index\n */\nexport const findActualEndPage = (\n pieceContent: string,\n currentFromIdx: number,\n toIdx: number,\n pageIds: number[],\n normalizedPages: Map<number, NormalizedPage>,\n): number => {\n for (let pi = toIdx; pi > currentFromIdx; pi--) {\n const pageData = normalizedPages.get(pageIds[pi]);\n if (pageData) {\n const checkPortion = pageData.content.slice(0, Math.min(30, pageData.length));\n if (checkPortion.length > 0 && pieceContent.indexOf(checkPortion) > 0) {\n return pi;\n }\n }\n }\n return currentFromIdx;\n};\n\n/**\n * Finds the actual starting page index by searching forwards for page content prefix.\n * Used to determine which page content actually starts from based on content matching.\n *\n * This is the counterpart to findActualEndPage - it searches forward to find which\n * page the content starts on, rather than which page it ends on.\n *\n * @param pieceContent - Content of the segment piece\n * @param currentFromIdx - Current starting index in pageIds\n * @param toIdx - Maximum ending index to search\n * @param pageIds - Array of page IDs\n * @param normalizedPages - Map of page ID to normalized content\n * @returns The actual starting page index\n */\nexport const findActualStartPage = (\n pieceContent: string,\n currentFromIdx: number,\n toIdx: number,\n pageIds: number[],\n normalizedPages: Map<number, NormalizedPage>,\n): number => {\n const trimmedPiece = pieceContent.trimStart();\n if (!trimmedPiece) {\n return currentFromIdx;\n }\n\n // Search forward from currentFromIdx to find which page the content starts on\n for (let pi = currentFromIdx; pi <= toIdx; pi++) {\n const pageData = normalizedPages.get(pageIds[pi]);\n if (pageData) {\n const pagePrefix = pageData.content.slice(0, Math.min(30, pageData.length)).trim();\n const piecePrefix = trimmedPiece.slice(0, Math.min(30, trimmedPiece.length));\n\n // Check both directions:\n // 1. pieceContent starts with page prefix (page content is longer)\n // 2. page content starts with pieceContent prefix (pieceContent is shorter)\n if (pagePrefix.length > 0) {\n if (trimmedPiece.startsWith(pagePrefix)) {\n return pi;\n }\n if (pageData.content.trimStart().startsWith(piecePrefix)) {\n return pi;\n }\n }\n }\n }\n return currentFromIdx;\n};\n\n/** Context required for finding break positions */\nexport type BreakpointContext = {\n pageIds: number[];\n normalizedPages: Map<number, NormalizedPage>;\n cumulativeOffsets: number[];\n expandedBreakpoints: ExpandedBreakpoint[];\n prefer: 'longer' | 'shorter';\n};\n\n/**\n * Checks if any page in a range is excluded by the given exclude set.\n *\n * @param excludeSet - Set of excluded page IDs\n * @param pageIds - Array of page IDs\n * @param fromIdx - Start index (inclusive)\n * @param toIdx - End index (inclusive)\n * @returns True if any page in range is excluded\n */\nexport const hasExcludedPageInRange = (\n excludeSet: Set<number>,\n pageIds: number[],\n fromIdx: number,\n toIdx: number,\n): boolean => {\n if (excludeSet.size === 0) {\n return false;\n }\n for (let pageIdx = fromIdx; pageIdx <= toIdx; pageIdx++) {\n if (excludeSet.has(pageIds[pageIdx])) {\n return true;\n }\n }\n return false;\n};\n\n/**\n * Finds the position of the next page content within remaining content.\n * Returns -1 if not found.\n *\n * @param remainingContent - Content to search in\n * @param nextPageData - Normalized data for the next page\n * @returns Position of next page content, or -1 if not found\n */\nexport const findNextPagePosition = (remainingContent: string, nextPageData: NormalizedPage): number => {\n const searchPrefix = nextPageData.content.trim().slice(0, Math.min(30, nextPageData.length));\n if (searchPrefix.length === 0) {\n return -1;\n }\n const pos = remainingContent.indexOf(searchPrefix);\n return pos > 0 ? pos : -1;\n};\n\n/**\n * Finds matches within a window and returns the selected position based on preference.\n *\n * @param windowContent - Content to search\n * @param regex - Regex to match\n * @param prefer - 'longer' for last match, 'shorter' for first match\n * @returns Break position after the selected match, or -1 if no matches\n */\nexport const findPatternBreakPosition = (\n windowContent: string,\n regex: RegExp,\n prefer: 'longer' | 'shorter',\n): number => {\n // OPTIMIZATION: Stream matches instead of collecting all into an array.\n // Only track first and last match to avoid allocating large arrays for dense patterns.\n let first: { index: number; length: number } | undefined;\n let last: { index: number; length: number } | undefined;\n for (const m of windowContent.matchAll(regex)) {\n const match = { index: m.index, length: m[0].length };\n if (!first) {\n first = match;\n }\n last = match;\n }\n if (!first) {\n return -1;\n }\n const selected = prefer === 'longer' ? last! : first;\n return selected.index + selected.length;\n};\n\n/**\n * Tries to find a break position within the current window using breakpoint patterns.\n * Returns the break position or -1 if no suitable break was found.\n *\n * @param remainingContent - Content remaining to be segmented\n * @param currentFromIdx - Current starting page index\n * @param toIdx - Ending page index\n * @param windowEndIdx - Maximum window end index\n * @param ctx - Breakpoint context with page data and patterns\n * @returns Break position in the content, or -1 if no break found\n */\nexport const findBreakPosition = (\n remainingContent: string,\n currentFromIdx: number,\n toIdx: number,\n windowEndIdx: number,\n ctx: BreakpointContext,\n): number => {\n const { pageIds, normalizedPages, cumulativeOffsets, expandedBreakpoints, prefer } = ctx;\n\n for (const { rule, regex, excludeSet, skipWhenRegex } of expandedBreakpoints) {\n // Check if this breakpoint applies to the current segment's starting page\n if (!isInBreakpointRange(pageIds[currentFromIdx], rule)) {\n continue;\n }\n\n // Check if ANY page in the current WINDOW is excluded (not the entire segment)\n if (hasExcludedPageInRange(excludeSet, pageIds, currentFromIdx, windowEndIdx)) {\n continue;\n }\n\n // Check if content matches skipWhen pattern (pre-compiled)\n if (skipWhenRegex?.test(remainingContent)) {\n continue;\n }\n\n // Handle page boundary (empty pattern)\n if (regex === null) {\n const nextPageIdx = windowEndIdx + 1;\n if (nextPageIdx <= toIdx) {\n const nextPageData = normalizedPages.get(pageIds[nextPageIdx]);\n if (nextPageData) {\n const pos = findNextPagePosition(remainingContent, nextPageData);\n if (pos > 0) {\n return pos;\n }\n }\n }\n // Fallback to cumulative offsets\n return Math.min(\n cumulativeOffsets[windowEndIdx + 1] - cumulativeOffsets[currentFromIdx],\n remainingContent.length,\n );\n }\n\n // Find matches within window\n const windowEndPosition = Math.min(\n cumulativeOffsets[windowEndIdx + 1] - cumulativeOffsets[currentFromIdx],\n remainingContent.length,\n );\n const windowContent = remainingContent.slice(0, windowEndPosition);\n const breakPos = findPatternBreakPosition(windowContent, regex, prefer);\n if (breakPos > 0) {\n return breakPos;\n }\n }\n\n return -1;\n};\n","/**\n * Utility functions for regex matching and result processing.\n *\n * These functions were extracted from `segmenter.ts` to reduce complexity\n * and enable independent testing. They handle match filtering, capture\n * extraction, and occurrence-based selection.\n *\n * @module match-utils\n */\n\nimport { isPageExcluded } from './breakpoint-utils.js';\nimport type { SplitRule } from './types.js';\n\n/**\n * Result of a regex match with position and optional capture information.\n *\n * Represents a single match found by the segmentation engine, including\n * its position in the concatenated content and any captured values.\n */\nexport type MatchResult = {\n /**\n * Start offset (inclusive) of the match in the content string.\n */\n start: number;\n\n /**\n * End offset (exclusive) of the match in the content string.\n *\n * The matched text is `content.slice(start, end)`.\n */\n end: number;\n\n /**\n * Content captured by `lineStartsAfter` patterns.\n *\n * For patterns like `^٦٦٩٦ - (.*)`, this contains the text\n * matched by the `(.*)` group (the rest of the line after the marker).\n */\n captured?: string;\n\n /**\n * Named capture group values from `{{token:name}}` syntax.\n *\n * Keys are the capture names, values are the matched strings.\n *\n * @example\n * // For pattern '{{raqms:num}} {{dash}}'\n * { num: '٦٦٩٦' }\n */\n namedCaptures?: Record<string, string>;\n};\n\n/**\n * Extracts named capture groups from a regex match.\n *\n * Only includes groups that are in the `captureNames` list and have\n * defined values. This filters out positional captures and ensures\n * only explicitly requested named captures are returned.\n *\n * @param groups - The `match.groups` object from `RegExp.exec()`\n * @param captureNames - List of capture names to extract (from `{{token:name}}` syntax)\n * @returns Object with capture name → value pairs, or `undefined` if none found\n *\n * @example\n * const match = /(?<num>[٠-٩]+) -/.exec('٦٦٩٦ - text');\n * extractNamedCaptures(match.groups, ['num'])\n * // → { num: '٦٦٩٦' }\n *\n * @example\n * // No matching captures\n * extractNamedCaptures({}, ['num'])\n * // → undefined\n *\n * @example\n * // Undefined groups\n * extractNamedCaptures(undefined, ['num'])\n * // → undefined\n */\nexport const extractNamedCaptures = (\n groups: Record<string, string> | undefined,\n captureNames: string[],\n): Record<string, string> | undefined => {\n if (!groups || captureNames.length === 0) {\n return undefined;\n }\n\n const namedCaptures: Record<string, string> = {};\n for (const name of captureNames) {\n if (groups[name] !== undefined) {\n namedCaptures[name] = groups[name];\n }\n }\n\n return Object.keys(namedCaptures).length > 0 ? namedCaptures : undefined;\n};\n\n/**\n * Gets the last defined positional capture group from a match array.\n *\n * Used for `lineStartsAfter` patterns where the content capture (`.*`)\n * is always at the end of the pattern. Named captures may shift the\n * positional indices, so we iterate backward to find the actual content.\n *\n * @param match - RegExp exec result array\n * @returns The last defined capture group value, or `undefined` if none\n *\n * @example\n * // Pattern: ^(?:(?<num>[٠-٩]+) - )(.*)\n * // Match array: ['٦٦٩٦ - content', '٦٦٩٦', 'content']\n * getLastPositionalCapture(match)\n * // → 'content'\n *\n * @example\n * // No captures\n * getLastPositionalCapture(['full match'])\n * // → undefined\n */\nexport const getLastPositionalCapture = (match: RegExpExecArray): string | undefined => {\n if (match.length <= 1) {\n return undefined;\n }\n\n for (let i = match.length - 1; i >= 1; i--) {\n if (match[i] !== undefined) {\n return match[i];\n }\n }\n return undefined;\n};\n\n/**\n * Filters matches to only include those within page ID constraints.\n *\n * Applies the `min`, `max`, and `exclude` constraints from a rule to filter out\n * matches that occur on pages outside the allowed range or explicitly excluded.\n *\n * @param matches - Array of match results to filter\n * @param rule - Rule containing `min`, `max`, and/or `exclude` page constraints\n * @param getId - Function that returns the page ID for a given offset\n * @returns Filtered array containing only matches within constraints\n *\n * @example\n * const matches = [\n * { start: 0, end: 10 }, // Page 1\n * { start: 100, end: 110 }, // Page 5\n * { start: 200, end: 210 }, // Page 10\n * ];\n * filterByConstraints(matches, { min: 3, max: 8 }, getId)\n * // → [{ start: 100, end: 110 }] (only page 5 match)\n */\nexport const filterByConstraints = (\n matches: MatchResult[],\n rule: Pick<SplitRule, 'min' | 'max' | 'exclude'>,\n getId: (offset: number) => number,\n): MatchResult[] => {\n return matches.filter((m) => {\n const id = getId(m.start);\n if (rule.min !== undefined && id < rule.min) {\n return false;\n }\n if (rule.max !== undefined && id > rule.max) {\n return false;\n }\n if (isPageExcluded(id, rule.exclude)) {\n return false;\n }\n return true;\n });\n};\n\n/**\n * Filters matches based on occurrence setting (first, last, or all).\n *\n * Applies occurrence-based selection to a list of matches:\n * - `'all'` or `undefined`: Return all matches (default)\n * - `'first'`: Return only the first match\n * - `'last'`: Return only the last match\n *\n * @param matches - Array of match results to filter\n * @param occurrence - Which occurrence(s) to keep\n * @returns Filtered array based on occurrence setting\n *\n * @example\n * const matches = [{ start: 0 }, { start: 10 }, { start: 20 }];\n *\n * filterByOccurrence(matches, 'first')\n * // → [{ start: 0 }]\n *\n * filterByOccurrence(matches, 'last')\n * // → [{ start: 20 }]\n *\n * filterByOccurrence(matches, 'all')\n * // → [{ start: 0 }, { start: 10 }, { start: 20 }]\n *\n * filterByOccurrence(matches, undefined)\n * // → [{ start: 0 }, { start: 10 }, { start: 20 }] (default: all)\n */\nexport const filterByOccurrence = (matches: MatchResult[], occurrence?: 'first' | 'last' | 'all'): MatchResult[] => {\n if (!matches.length) {\n return [];\n }\n if (occurrence === 'first') {\n return [matches[0]];\n }\n if (occurrence === 'last') {\n return [matches[matches.length - 1]];\n }\n return matches;\n};\n\n/**\n * Groups matches using a sliding window approach based on page ID difference.\n *\n * Uses a lookahead algorithm where `maxSpan` is the maximum page ID difference\n * allowed when looking ahead for the next split point. This prefers longer\n * segments by looking as far ahead as allowed before selecting a match.\n *\n * Algorithm:\n * 1. Start from the first page in the pages list\n * 2. Look for matches within `maxSpan` page IDs ahead\n * 3. Apply occurrence filter (e.g., 'last') to select a match\n * 4. If match found, add it; move window to start from the next page after the match\n * 5. If no match in window, skip to the next page and repeat\n *\n * @param matches - Array of match results (must be sorted by start position)\n * @param maxSpan - Maximum page ID difference allowed when looking ahead\n * @param occurrence - Which occurrence(s) to keep within each window\n * @param getId - Function that returns the page ID for a given offset\n * @param pageIds - Sorted array of all page IDs in the content\n * @returns Filtered array with sliding window and occurrence filter applied\n *\n * @example\n * // Pages: [1, 2, 3], maxSpan=1, occurrence='last'\n * // Window from page 1: pages 1-2 (diff <= 1)\n * // Finds last match in pages 1-2, adds it\n * // Next window from page 3: just page 3\n * // Result: segments span pages 1-2 and page 3\n */\nexport const groupBySpanAndFilter = (\n matches: MatchResult[],\n maxSpan: number,\n occurrence: 'first' | 'last' | 'all' | undefined,\n getId: (offset: number) => number,\n pageIds?: number[],\n): MatchResult[] => {\n if (!matches.length) {\n return [];\n }\n\n // Precompute pageId per match once to avoid O(P×M) behavior for large inputs.\n // Since match offsets are in concatenated page order, pageIds are expected to be non-decreasing.\n const matchPageIds = matches.map((m) => getId(m.start));\n\n // If no pageIds provided, fall back to unique page IDs from matches\n const uniquePageIds =\n pageIds ?? [...new Set(matchPageIds)].sort((a, b) => a - b);\n\n if (!uniquePageIds.length) {\n return filterByOccurrence(matches, occurrence);\n }\n\n const result: MatchResult[] = [];\n let windowStartIdx = 0; // Index into uniquePageIds\n let matchIdx = 0; // Index into matches/matchPageIds\n\n while (windowStartIdx < uniquePageIds.length) {\n const windowStartPageId = uniquePageIds[windowStartIdx];\n const windowEndPageId = windowStartPageId + maxSpan;\n\n // Advance matchIdx to first match in or after the window start page.\n while (matchIdx < matches.length && matchPageIds[matchIdx] < windowStartPageId) {\n matchIdx++;\n }\n\n // No remaining matches anywhere\n if (matchIdx >= matches.length) {\n break;\n }\n\n // Find range of matches that fall within [windowStartPageId, windowEndPageId]\n const windowMatchStart = matchIdx;\n let windowMatchEndExclusive = windowMatchStart;\n while (windowMatchEndExclusive < matches.length && matchPageIds[windowMatchEndExclusive] <= windowEndPageId) {\n windowMatchEndExclusive++;\n }\n\n if (windowMatchEndExclusive <= windowMatchStart) {\n // No matches in this window, move to next page\n windowStartIdx++;\n continue;\n }\n\n // Apply occurrence selection without allocating/filtering per window.\n let selectedStart = windowMatchStart;\n let selectedEndExclusive = windowMatchEndExclusive;\n if (occurrence === 'first') {\n selectedEndExclusive = selectedStart + 1;\n } else if (occurrence === 'last') {\n selectedStart = windowMatchEndExclusive - 1;\n }\n\n for (let i = selectedStart; i < selectedEndExclusive; i++) {\n result.push(matches[i]);\n }\n\n const lastSelectedIndex = selectedEndExclusive - 1;\n const lastMatchPageId = matchPageIds[lastSelectedIndex];\n\n // Move window to start after the last selected match's page\n while (windowStartIdx < uniquePageIds.length && uniquePageIds[windowStartIdx] <= lastMatchPageId) {\n windowStartIdx++;\n }\n\n // Matches before this index can never be selected again (windowStartPageId only increases)\n matchIdx = lastSelectedIndex + 1;\n }\n\n return result;\n};\n\n/**\n * Checks if any rule in the list allows the given page ID.\n *\n * A rule allows an ID if it falls within the rule's `min`/`max` constraints.\n * Rules without constraints allow all page IDs.\n *\n * This is used to determine whether to create a segment for content\n * that appears before any split points (the \"first segment\").\n *\n * @param rules - Array of rules with optional `min` and `max` constraints\n * @param pageId - Page ID to check\n * @returns `true` if at least one rule allows the page ID\n *\n * @example\n * const rules = [\n * { min: 5, max: 10 }, // Allows pages 5-10\n * { min: 20 }, // Allows pages 20+\n * ];\n *\n * anyRuleAllowsId(rules, 7) // → true (first rule allows)\n * anyRuleAllowsId(rules, 3) // → false (no rule allows)\n * anyRuleAllowsId(rules, 25) // → true (second rule allows)\n *\n * @example\n * // Rules without constraints allow everything\n * anyRuleAllowsId([{}], 999) // → true\n */\nexport const anyRuleAllowsId = (rules: Pick<SplitRule, 'min' | 'max'>[], pageId: number): boolean => {\n return rules.some((r) => {\n const minOk = r.min === undefined || pageId >= r.min;\n const maxOk = r.max === undefined || pageId <= r.max;\n return minOk && maxOk;\n });\n};\n","/**\n * Strip all HTML tags from content, keeping only text.\n *\n * @param html - HTML content\n * @returns Plain text content\n */\nexport const stripHtmlTags = (html: string): string => {\n return html.replace(/<[^>]*>/g, '');\n};\n\n/**\n * Normalizes line endings to Unix-style (`\\n`).\n *\n * Converts Windows (`\\r\\n`) and old Mac (`\\r`) line endings to Unix style\n * for consistent pattern matching across platforms.\n *\n * @param content - Raw content with potentially mixed line endings\n * @returns Content with all line endings normalized to `\\n`\n */\n// OPTIMIZATION: Fast-path when no \\r present (common case for Unix/Mac content)\nexport const normalizeLineEndings = (content: string) =>\n content.includes('\\r') ? content.replace(/\\r\\n?/g, '\\n') : content;\n","/**\n * Token-based template system for Arabic text pattern matching.\n *\n * This module provides a human-readable way to define regex patterns using\n * `{{token}}` placeholders that expand to their regex equivalents. It supports\n * named capture groups for extracting matched values into metadata.\n *\n * @module tokens\n *\n * @example\n * // Simple token expansion\n * expandTokens('{{raqms}} {{dash}}')\n * // → '[\\\\u0660-\\\\u0669]+ [-–—ـ]'\n *\n * @example\n * // Named capture groups\n * expandTokensWithCaptures('{{raqms:num}} {{dash}}')\n * // → { pattern: '(?<num>[\\\\u0660-\\\\u0669]+) [-–—ـ]', captureNames: ['num'], hasCaptures: true }\n */\n\n/**\n * Token definitions mapping human-readable token names to regex patterns.\n *\n * Tokens are used in template strings with double-brace syntax:\n * - `{{token}}` - Expands to the pattern (non-capturing in context)\n * - `{{token:name}}` - Expands to a named capture group `(?<name>pattern)`\n * - `{{:name}}` - Captures any content with the given name `(?<name>.+)`\n *\n * @remarks\n * These patterns are designed for Arabic text matching. For diacritic-insensitive\n * matching of Arabic patterns, use the `fuzzy: true` option in split rules,\n * which applies `makeDiacriticInsensitive()` to the expanded patterns.\n *\n * @example\n * // Using tokens in a split rule\n * { lineStartsWith: ['{{kitab}}', '{{bab}}'], split: 'at', fuzzy: true }\n *\n * @example\n * // Using tokens with named captures\n * { lineStartsAfter: ['{{raqms:hadithNum}} {{dash}} '], split: 'at' }\n */\n// ─────────────────────────────────────────────────────────────\n// Auto-escaping for template patterns\n// ─────────────────────────────────────────────────────────────\n\n/**\n * Escapes regex metacharacters (parentheses and brackets) in template patterns,\n * but preserves content inside `{{...}}` token delimiters.\n *\n * This allows users to write intuitive patterns like `({{harf}}):` instead of\n * the verbose `\\\\({{harf}}\\\\):`. The escaping is applied BEFORE token expansion,\n * so tokens like `{{harf}}` which expand to `[أ-ي]` work correctly.\n *\n * @param pattern - Template pattern that may contain `()[]` and `{{tokens}}`\n * @returns Pattern with `()[]` escaped outside of `{{...}}` delimiters\n *\n * @example\n * escapeTemplateBrackets('({{harf}}): ')\n * // → '\\\\({{harf}}\\\\): '\n *\n * @example\n * escapeTemplateBrackets('[{{raqm}}] ')\n * // → '\\\\[{{raqm}}\\\\] '\n *\n * @example\n * escapeTemplateBrackets('{{harf}}')\n * // → '{{harf}}' (unchanged - no brackets outside tokens)\n */\nexport const escapeTemplateBrackets = (pattern: string): string => {\n // Match either a token ({{...}}) or a bracket character\n // Tokens are preserved as-is, brackets are escaped\n return pattern.replace(/(\\{\\{[^}]*\\}\\})|([()[\\]])/g, (match, token, bracket) => {\n if (token) {\n return token; // Leave tokens intact\n }\n return `\\\\${bracket}`; // Escape the bracket\n });\n};\n\n// ─────────────────────────────────────────────────────────────\n// Base tokens - raw regex patterns (no template references)\n// ─────────────────────────────────────────────────────────────\n\n/**\n * Base token definitions mapping human-readable token names to regex patterns.\n *\n * These tokens contain raw regex patterns and do not reference other tokens.\n * For composite tokens that build on these, see `COMPOSITE_TOKENS`.\n *\n * @internal\n */\nconst BASE_TOKENS: Record<string, string> = {\n /**\n * Chapter marker - Arabic word for \"chapter\" (باب).\n *\n * Commonly used in hadith collections to mark chapter divisions.\n *\n * @example 'باب ما جاء في الصلاة' (Chapter on what came regarding prayer)\n */\n bab: 'باب',\n\n /**\n * Basmala pattern - Arabic invocation \"In the name of Allah\" (بسم الله).\n *\n * Matches the beginning of the basmala formula, commonly appearing\n * at the start of chapters, books, or documents.\n *\n * @example 'بسم الله الرحمن الرحيم' (In the name of Allah, the Most Gracious, the Most Merciful)\n */\n basmalah: ['بسم الله', '﷽'].join('|'),\n\n /**\n * Bullet point variants - common bullet characters.\n *\n * Character class matching: `•` (bullet), `*` (asterisk), `°` (degree).\n *\n * @example '• First item'\n */\n bullet: '[•*°]',\n\n /**\n * Dash variants - various dash and separator characters.\n *\n * Character class matching:\n * - `-` (hyphen-minus U+002D)\n * - `–` (en-dash U+2013)\n * - `—` (em-dash U+2014)\n * - `ـ` (tatweel U+0640, Arabic elongation character)\n *\n * @example '٦٦٩٦ - حدثنا' or '٦٦٩٦ ـ حدثنا'\n */\n dash: '[-–—ـ]',\n\n /**\n * Section marker - Arabic word for \"section/issue\".\n * Commonly used for fiqh books.\n */\n fasl: ['مسألة', 'فصل'].join('|'),\n\n /**\n * Single Arabic letter - matches any Arabic letter character.\n *\n * Character range from أ (alef with hamza) to ي (ya).\n * Does NOT include diacritics (harakat/tashkeel).\n *\n * @example '{{harf}}' matches 'ب' in 'باب'\n */\n harf: '[أ-ي]',\n\n /**\n * Book marker - Arabic word for \"book\" (كتاب).\n *\n * Commonly used in hadith collections to mark major book divisions.\n *\n * @example 'كتاب الإيمان' (Book of Faith)\n */\n kitab: 'كتاب',\n\n /**\n * Naql (transmission) phrases - common hadith transmission phrases.\n *\n * Alternation of Arabic phrases used to indicate narration chains:\n * - حدثنا (he narrated to us)\n * - أخبرنا (he informed us)\n * - حدثني (he narrated to me)\n * - وحدثنا (and he narrated to us)\n * - أنبأنا (he reported to us)\n * - سمعت (I heard)\n *\n * @example '{{naql}}' matches any of the above phrases\n */\n naql: ['حدثني', 'وأخبرنا', 'حدثنا', 'سمعت', 'أنبأنا', 'وحدثنا', 'أخبرنا'].join('|'),\n\n /**\n * Single Arabic-Indic digit - matches one digit (٠-٩).\n *\n * Unicode range: U+0660 to U+0669 (Arabic-Indic digits).\n * Use `{{raqms}}` for one or more digits.\n *\n * @example '{{raqm}}' matches '٥' in '٥ - '\n */\n raqm: '[\\\\u0660-\\\\u0669]',\n\n /**\n * One or more Arabic-Indic digits - matches digit sequences (٠-٩)+.\n *\n * Unicode range: U+0660 to U+0669 (Arabic-Indic digits).\n * Commonly used for hadith numbers, verse numbers, etc.\n *\n * @example '{{raqms}}' matches '٦٦٩٦' in '٦٦٩٦ - حدثنا'\n */\n raqms: '[\\\\u0660-\\\\u0669]+',\n\n /**\n * Punctuation characters.\n * Use {{tarqim}} which is especially useful when splitting using split: 'after' on punctuation marks.\n */\n tarqim: '[.!?؟؛]',\n};\n\n// ─────────────────────────────────────────────────────────────\n// Composite tokens - templates that reference base tokens\n// These are pre-expanded at module load time for performance\n// ─────────────────────────────────────────────────────────────\n\n/**\n * Composite token definitions using template syntax.\n *\n * These tokens reference base tokens using `{{token}}` syntax and are\n * automatically expanded to their final regex patterns at module load time.\n *\n * This provides better abstraction - if base tokens change, composites\n * automatically update on the next build.\n *\n * @internal\n */\nconst COMPOSITE_TOKENS: Record<string, string> = {\n /**\n * Numbered hadith marker - common format for hadith numbering.\n *\n * Matches patterns like \"٢٢ - \" (number, space, dash, space).\n * This is the most common format in hadith collections.\n *\n * Use with `lineStartsAfter` to cleanly extract hadith content:\n * ```typescript\n * { lineStartsAfter: ['{{numbered}}'], split: 'at' }\n * ```\n *\n * For capturing the hadith number, use explicit capture syntax:\n * ```typescript\n * { lineStartsAfter: ['{{raqms:hadithNum}} {{dash}} '], split: 'at' }\n * ```\n *\n * @example '٢٢ - حدثنا' matches, content starts after '٢٢ - '\n * @example '٦٦٩٦ – أخبرنا' matches (with en-dash)\n */\n numbered: '{{raqms}} {{dash}} ',\n};\n\n/**\n * Expands base tokens in a template string.\n * Used internally to pre-expand composite tokens.\n *\n * @param template - Template string with `{{token}}` placeholders\n * @returns Expanded pattern with base tokens replaced\n * @internal\n */\nconst expandBaseTokens = (template: string): string => {\n return template.replace(/\\{\\{(\\w+)\\}\\}/g, (_, tokenName) => {\n return BASE_TOKENS[tokenName] ?? `{{${tokenName}}}`;\n });\n};\n\n/**\n * Token definitions mapping human-readable token names to regex patterns.\n *\n * Tokens are used in template strings with double-brace syntax:\n * - `{{token}}` - Expands to the pattern (non-capturing in context)\n * - `{{token:name}}` - Expands to a named capture group `(?<name>pattern)`\n * - `{{:name}}` - Captures any content with the given name `(?<name>.+)`\n *\n * @remarks\n * These patterns are designed for Arabic text matching. For diacritic-insensitive\n * matching of Arabic patterns, use the `fuzzy: true` option in split rules,\n * which applies `makeDiacriticInsensitive()` to the expanded patterns.\n *\n * @example\n * // Using tokens in a split rule\n * { lineStartsWith: ['{{kitab}}', '{{bab}}'], split: 'at', fuzzy: true }\n *\n * @example\n * // Using tokens with named captures\n * { lineStartsAfter: ['{{raqms:hadithNum}} {{dash}} '], split: 'at' }\n *\n * @example\n * // Using the numbered convenience token\n * { lineStartsAfter: ['{{numbered}}'], split: 'at' }\n */\nexport const TOKEN_PATTERNS: Record<string, string> = {\n ...BASE_TOKENS,\n // Pre-expand composite tokens at module load time\n ...Object.fromEntries(Object.entries(COMPOSITE_TOKENS).map(([k, v]) => [k, expandBaseTokens(v)])),\n};\n\n/**\n * Regex pattern for matching tokens with optional named capture syntax.\n *\n * Matches:\n * - `{{token}}` - Simple token (group 1 = token name, group 2 = empty)\n * - `{{token:name}}` - Token with capture (group 1 = token, group 2 = name)\n * - `{{:name}}` - Capture-only (group 1 = empty, group 2 = name)\n *\n * @internal\n */\nconst TOKEN_WITH_CAPTURE_REGEX = /\\{\\{(\\w*):?(\\w*)\\}\\}/g;\n\n/**\n * Regex pattern for simple token matching (no capture syntax).\n *\n * Matches only `{{token}}` format where token is one or more word characters.\n * Used by `containsTokens()` for quick detection.\n *\n * @internal\n */\nconst SIMPLE_TOKEN_REGEX = /\\{\\{(\\w+)\\}\\}/g;\n\n/**\n * Checks if a query string contains template tokens.\n *\n * Performs a quick test for `{{token}}` patterns without actually\n * expanding them. Useful for determining whether to apply token\n * expansion to a string.\n *\n * @param query - String to check for tokens\n * @returns `true` if the string contains at least one `{{token}}` pattern\n *\n * @example\n * containsTokens('{{raqms}} {{dash}}') // → true\n * containsTokens('plain text') // → false\n * containsTokens('[٠-٩]+ - ') // → false (raw regex, no tokens)\n */\nexport const containsTokens = (query: string): boolean => {\n SIMPLE_TOKEN_REGEX.lastIndex = 0;\n return SIMPLE_TOKEN_REGEX.test(query);\n};\n\n/**\n * Result from expanding tokens with capture information.\n *\n * Contains the expanded pattern string along with metadata about\n * any named capture groups that were created.\n */\nexport type ExpandResult = {\n /**\n * The expanded regex pattern string with all tokens replaced.\n *\n * Named captures use the `(?<name>pattern)` syntax.\n */\n pattern: string;\n\n /**\n * Names of captured groups extracted from `{{token:name}}` syntax.\n *\n * Empty array if no named captures were found.\n */\n captureNames: string[];\n\n /**\n * Whether the pattern has any named capturing groups.\n *\n * Equivalent to `captureNames.length > 0`.\n */\n hasCaptures: boolean;\n};\n\n/**\n * Expands template tokens with support for named captures.\n *\n * This is the primary token expansion function that handles all token syntax:\n * - `{{token}}` → Expands to the token's pattern (no capture group)\n * - `{{token:name}}` → Expands to `(?<name>pattern)` (named capture)\n * - `{{:name}}` → Expands to `(?<name>.+)` (capture anything)\n *\n * Unknown tokens are left as-is in the output, allowing for partial templates.\n *\n * @param query - The template string containing tokens\n * @param fuzzyTransform - Optional function to transform Arabic text for fuzzy matching.\n * Applied to both token patterns and plain Arabic text between tokens.\n * Typically `makeDiacriticInsensitive` from the fuzzy module.\n * @returns Object with expanded pattern, capture names, and capture flag\n *\n * @example\n * // Simple token expansion\n * expandTokensWithCaptures('{{raqms}} {{dash}}')\n * // → { pattern: '[\\\\u0660-\\\\u0669]+ [-–—ـ]', captureNames: [], hasCaptures: false }\n *\n * @example\n * // Named capture\n * expandTokensWithCaptures('{{raqms:num}} {{dash}}')\n * // → { pattern: '(?<num>[\\\\u0660-\\\\u0669]+) [-–—ـ]', captureNames: ['num'], hasCaptures: true }\n *\n * @example\n * // Capture-only token\n * expandTokensWithCaptures('{{raqms:num}} {{dash}} {{:content}}')\n * // → { pattern: '(?<num>[٠-٩]+) [-–—ـ] (?<content>.+)', captureNames: ['num', 'content'], hasCaptures: true }\n *\n * @example\n * // With fuzzy transform\n * expandTokensWithCaptures('{{bab}}', makeDiacriticInsensitive)\n * // → { pattern: 'بَ?ا?بٌ?', captureNames: [], hasCaptures: false }\n */\nexport const expandTokensWithCaptures = (query: string, fuzzyTransform?: (pattern: string) => string): ExpandResult => {\n const captureNames: string[] = [];\n // Track capture name usage counts to handle duplicates\n const captureNameCounts = new Map<string, number>();\n\n /**\n * Gets a unique capture name, appending _2, _3, etc. for duplicates.\n * This prevents invalid regex with duplicate named groups.\n */\n const getUniqueCaptureName = (baseName: string): string => {\n const count = captureNameCounts.get(baseName) ?? 0;\n captureNameCounts.set(baseName, count + 1);\n return count === 0 ? baseName : `${baseName}_${count + 1}`;\n };\n\n // Split the query into token matches and non-token segments\n const segments: Array<{ type: 'token' | 'text'; value: string }> = [];\n let lastIndex = 0;\n TOKEN_WITH_CAPTURE_REGEX.lastIndex = 0;\n let match: RegExpExecArray | null;\n\n // biome-ignore lint/suspicious/noAssignInExpressions: standard regex exec loop pattern\n while ((match = TOKEN_WITH_CAPTURE_REGEX.exec(query)) !== null) {\n // Add text before this token\n if (match.index > lastIndex) {\n segments.push({ type: 'text', value: query.slice(lastIndex, match.index) });\n }\n // Add the token\n segments.push({ type: 'token', value: match[0] });\n lastIndex = match.index + match[0].length;\n }\n // Add remaining text after last token\n if (lastIndex < query.length) {\n segments.push({ type: 'text', value: query.slice(lastIndex) });\n }\n\n // Process each segment\n const processedParts = segments.map((segment) => {\n if (segment.type === 'text') {\n // Plain text - apply fuzzy if it contains Arabic and fuzzyTransform is provided\n if (fuzzyTransform && /[\\u0600-\\u06FF]/.test(segment.value)) {\n return fuzzyTransform(segment.value);\n }\n return segment.value;\n }\n\n // Token - extract tokenName and captureName\n TOKEN_WITH_CAPTURE_REGEX.lastIndex = 0;\n const tokenMatch = TOKEN_WITH_CAPTURE_REGEX.exec(segment.value);\n if (!tokenMatch) {\n return segment.value;\n }\n\n const [, tokenName, captureName] = tokenMatch;\n\n // {{:name}} - capture anything with name\n if (!tokenName && captureName) {\n const uniqueName = getUniqueCaptureName(captureName);\n captureNames.push(uniqueName);\n return `(?<${uniqueName}>.+)`;\n }\n\n // Get the token pattern\n let tokenPattern = TOKEN_PATTERNS[tokenName];\n if (!tokenPattern) {\n // Unknown token - leave as-is\n return segment.value;\n }\n\n // Apply fuzzy transform to the token pattern\n if (fuzzyTransform) {\n // For tokens with alternation, apply fuzzy to each alternative\n tokenPattern = tokenPattern\n .split('|')\n .map((part) => (/[\\u0600-\\u06FF]/.test(part) ? fuzzyTransform(part) : part))\n .join('|');\n }\n\n // {{token:name}} - capture with name\n if (captureName) {\n const uniqueName = getUniqueCaptureName(captureName);\n captureNames.push(uniqueName);\n return `(?<${uniqueName}>${tokenPattern})`;\n }\n\n // {{token}} - no capture, just expand\n return tokenPattern;\n });\n\n return {\n captureNames,\n hasCaptures: captureNames.length > 0,\n pattern: processedParts.join(''),\n };\n};\n\n/**\n * Expands template tokens in a query string to their regex equivalents.\n *\n * This is the simple version without capture support. It returns only the\n * expanded pattern string, not capture metadata.\n *\n * Unknown tokens are left as-is, allowing for partial templates.\n *\n * @param query - Template string containing `{{token}}` placeholders\n * @returns Expanded regex pattern string\n *\n * @example\n * expandTokens('، {{raqms}}') // → '، [\\\\u0660-\\\\u0669]+'\n * expandTokens('{{raqm}}*') // → '[\\\\u0660-\\\\u0669]*'\n * expandTokens('{{dash}}{{raqm}}') // → '[-–—ـ][\\\\u0660-\\\\u0669]'\n * expandTokens('{{unknown}}') // → '{{unknown}}' (left as-is)\n *\n * @see expandTokensWithCaptures for full capture group support\n */\nexport const expandTokens = (query: string) => expandTokensWithCaptures(query).pattern;\n\n/**\n * Converts a template string to a compiled RegExp.\n *\n * Expands all tokens and attempts to compile the result as a RegExp\n * with Unicode flag. Returns `null` if the resulting pattern is invalid.\n *\n * @remarks\n * This function dynamically compiles regular expressions from template strings.\n * If templates may come from untrusted sources, be aware of potential ReDoS\n * (Regular Expression Denial of Service) risks due to catastrophic backtracking.\n * Consider validating pattern complexity or applying execution timeouts when\n * running user-submitted patterns.\n *\n * @param template - Template string containing `{{token}}` placeholders\n * @returns Compiled RegExp with 'u' flag, or `null` if invalid\n *\n * @example\n * templateToRegex('، {{raqms}}') // → /، [٠-٩]+/u\n * templateToRegex('{{raqms}}+') // → /[٠-٩]++/u (might be invalid in some engines)\n * templateToRegex('(((') // → null (invalid regex)\n */\nexport const templateToRegex = (template: string) => {\n const expanded = expandTokens(template);\n try {\n return new RegExp(expanded, 'u');\n } catch {\n return null;\n }\n};\n\n/**\n * Lists all available token names defined in `TOKEN_PATTERNS`.\n *\n * Useful for documentation, validation, or building user interfaces\n * that show available tokens.\n *\n * @returns Array of token names (e.g., `['bab', 'basmala', 'bullet', ...]`)\n *\n * @example\n * getAvailableTokens()\n * // → ['bab', 'basmala', 'bullet', 'dash', 'harf', 'kitab', 'naql', 'raqm', 'raqms']\n */\nexport const getAvailableTokens = () => Object.keys(TOKEN_PATTERNS);\n\n/**\n * Gets the regex pattern for a specific token name.\n *\n * Returns the raw pattern string as defined in `TOKEN_PATTERNS`,\n * without any expansion or capture group wrapping.\n *\n * @param tokenName - The token name to look up (e.g., 'raqms', 'dash')\n * @returns The regex pattern string, or `undefined` if token doesn't exist\n *\n * @example\n * getTokenPattern('raqms') // → '[\\\\u0660-\\\\u0669]+'\n * getTokenPattern('dash') // → '[-–—ـ]'\n * getTokenPattern('unknown') // → undefined\n */\nexport const getTokenPattern = (tokenName: string): string | undefined => TOKEN_PATTERNS[tokenName];\n","/**\n * Core segmentation engine for splitting Arabic text pages into logical segments.\n *\n * The segmenter takes an array of pages and applies pattern-based rules to\n * identify split points, producing segments with content, page references,\n * and optional metadata.\n *\n * @module segmenter\n */\n\nimport {\n type BreakpointContext,\n createSegment,\n expandBreakpoints,\n findActualEndPage,\n findActualStartPage,\n findBreakPosition,\n hasExcludedPageInRange,\n type NormalizedPage,\n} from './breakpoint-utils.js';\nimport { makeDiacriticInsensitive } from './fuzzy.js';\nimport {\n anyRuleAllowsId,\n extractNamedCaptures,\n filterByConstraints,\n filterByOccurrence,\n getLastPositionalCapture,\n type MatchResult,\n} from './match-utils.js';\nimport { normalizeLineEndings } from './textUtils.js';\nimport { escapeTemplateBrackets, expandTokensWithCaptures } from './tokens.js';\nimport type { Breakpoint, Logger, Page, Segment, SegmentationOptions, SplitRule } from './types.js';\n\n/**\n * Checks if a regex pattern contains standard (anonymous) capturing groups.\n *\n * Detects standard capturing groups `(...)` while excluding:\n * - Non-capturing groups `(?:...)`\n * - Lookahead assertions `(?=...)` and `(?!...)`\n * - Lookbehind assertions `(?<=...)` and `(?<!...)`\n * - Named groups `(?<name>...)` (start with `(?` so excluded here)\n *\n * **Note**: Named capture groups `(?<name>...)` ARE capturing groups but are\n * excluded by this check because they are tracked separately via the\n * `captureNames` array from token expansion. This function only detects\n * anonymous capturing groups like `(.*)`.\n *\n * @param pattern - Regex pattern string to analyze\n * @returns `true` if the pattern contains at least one anonymous capturing group\n */\nconst hasCapturingGroup = (pattern: string): boolean => {\n // Match ( that is NOT followed by ? (excludes non-capturing and named groups)\n return /\\((?!\\?)/.test(pattern);\n};\n\n/**\n * Result of processing a pattern with token expansion and optional fuzzy matching.\n */\ntype ProcessedPattern = {\n /** The expanded regex pattern string (tokens replaced with regex) */\n pattern: string;\n /** Names of captured groups extracted from `{{token:name}}` syntax */\n captureNames: string[];\n};\n\n/**\n * Processes a pattern string by expanding tokens and optionally applying fuzzy matching.\n *\n * Fuzzy matching makes Arabic text diacritic-insensitive. When enabled, the\n * transform is applied to token patterns BEFORE wrapping with capture groups,\n * ensuring regex metacharacters (`(`, `)`, `|`, etc.) are not corrupted.\n *\n * @param pattern - Pattern string potentially containing `{{token}}` placeholders\n * @param fuzzy - Whether to apply diacritic-insensitive transformation\n * @returns Processed pattern with expanded tokens and capture names\n *\n * @example\n * processPattern('{{raqms:num}} {{dash}}', false)\n * // → { pattern: '(?<num>[٠-٩]+) [-–—ـ]', captureNames: ['num'] }\n *\n * @example\n * processPattern('{{naql}}', true)\n * // → { pattern: 'حَ?دَّ?ثَ?نَ?ا|...', captureNames: [] }\n */\nconst processPattern = (pattern: string, fuzzy: boolean): ProcessedPattern => {\n // First escape brackets ()[] outside of {{tokens}} - allows intuitive patterns like ({{harf}}):\n const escaped = escapeTemplateBrackets(pattern);\n // Pass fuzzy transform to expandTokensWithCaptures so it can apply to raw token patterns\n const fuzzyTransform = fuzzy ? makeDiacriticInsensitive : undefined;\n const { pattern: expanded, captureNames } = expandTokensWithCaptures(escaped, fuzzyTransform);\n return { captureNames, pattern: expanded };\n};\n\n/**\n * Compiled regex and metadata for a split rule.\n */\ntype RuleRegex = {\n /** Compiled RegExp with 'gmu' flags (global, multiline, unicode) */\n regex: RegExp;\n /** Whether the regex uses capturing groups for content extraction */\n usesCapture: boolean;\n /** Names of captured groups from `{{token:name}}` syntax */\n captureNames: string[];\n /** Whether this rule uses `lineStartsAfter` (content capture at end) */\n usesLineStartsAfter: boolean;\n};\n\n/**\n * Builds a compiled regex and metadata from a split rule.\n *\n * Handles all pattern types:\n * - `regex`: Used as-is (no token expansion)\n * - `template`: Tokens expanded via `expandTokensWithCaptures`\n * - `lineStartsWith`: Converted to `^(?:patterns...)`\n * - `lineStartsAfter`: Converted to `^(?:patterns...)(.*)`\n * - `lineEndsWith`: Converted to `(?:patterns...)$`\n *\n * @param rule - Split rule containing pattern and options\n * @returns Compiled regex with capture metadata\n */\nconst buildRuleRegex = (rule: SplitRule): RuleRegex => {\n const s: {\n lineStartsWith?: string[];\n lineStartsAfter?: string[];\n lineEndsWith?: string[];\n template?: string;\n regex?: string;\n } = { ...rule };\n\n const fuzzy = (rule as { fuzzy?: boolean }).fuzzy ?? false;\n let allCaptureNames: string[] = [];\n\n /**\n * Safely compiles a regex pattern, throwing a helpful error if invalid.\n *\n * @remarks\n * This catches syntax errors only. It does NOT protect against ReDoS\n * (catastrophic backtracking) from pathological patterns. Avoid compiling\n * patterns from untrusted sources.\n */\n const compileRegex = (pattern: string): RegExp => {\n try {\n return new RegExp(pattern, 'gmu');\n } catch (error) {\n const message = error instanceof Error ? error.message : String(error);\n throw new Error(`Invalid regex pattern: ${pattern}\\n Cause: ${message}`);\n }\n };\n\n // lineStartsAfter: creates a capturing group to exclude the marker from content\n if (s.lineStartsAfter?.length) {\n const processed = s.lineStartsAfter.map((p) => processPattern(p, fuzzy));\n const patterns = processed.map((p) => p.pattern).join('|');\n allCaptureNames = processed.flatMap((p) => p.captureNames);\n // Wrap patterns with named captures in a non-capturing group, then capture rest\n s.regex = `^(?:${patterns})(.*)`;\n return {\n captureNames: allCaptureNames,\n regex: compileRegex(s.regex),\n usesCapture: true,\n usesLineStartsAfter: true,\n };\n }\n\n if (s.lineStartsWith?.length) {\n const processed = s.lineStartsWith.map((p) => processPattern(p, fuzzy));\n const patterns = processed.map((p) => p.pattern).join('|');\n allCaptureNames = processed.flatMap((p) => p.captureNames);\n s.regex = `^(?:${patterns})`;\n }\n if (s.lineEndsWith?.length) {\n const processed = s.lineEndsWith.map((p) => processPattern(p, fuzzy));\n const patterns = processed.map((p) => p.pattern).join('|');\n allCaptureNames = processed.flatMap((p) => p.captureNames);\n s.regex = `(?:${patterns})$`;\n }\n if (s.template) {\n // Template from user: first escape brackets, then expand tokens with captures\n const escaped = escapeTemplateBrackets(s.template);\n const { pattern, captureNames } = expandTokensWithCaptures(escaped);\n s.regex = pattern;\n allCaptureNames = [...allCaptureNames, ...captureNames];\n }\n\n if (!s.regex) {\n throw new Error(\n 'Rule must specify exactly one pattern type: regex, template, lineStartsWith, lineStartsAfter, or lineEndsWith',\n );\n }\n\n const usesCapture = hasCapturingGroup(s.regex) || allCaptureNames.length > 0;\n return {\n captureNames: allCaptureNames,\n regex: compileRegex(s.regex),\n usesCapture,\n usesLineStartsAfter: false,\n };\n};\n\n/**\n * Represents the byte offset boundaries of a single page within concatenated content.\n */\ntype PageBoundary = {\n /** Start offset (inclusive) in the concatenated content string */\n start: number;\n /** End offset (inclusive) in the concatenated content string */\n end: number;\n /** Page ID from the original `Page` */\n id: number;\n};\n\n/**\n * Page mapping utilities for tracking positions across concatenated pages.\n */\ntype PageMap = {\n /**\n * Returns the page ID for a given offset in the concatenated content.\n *\n * @param offset - Character offset in concatenated content\n * @returns Page ID containing that offset\n */\n getId: (offset: number) => number;\n /** Array of page boundaries in order */\n boundaries: PageBoundary[];\n /** Sorted array of offsets where page breaks occur (for binary search) */\n pageBreaks: number[];\n /** Array of all page IDs in order (for sliding window algorithm) */\n pageIds: number[];\n};\n\n/**\n * Builds a concatenated content string and page mapping from input pages.\n *\n * Pages are joined with newline characters, and a page map is created to\n * track which page each offset belongs to. This allows pattern matching\n * across page boundaries while preserving page reference information.\n *\n * @param pages - Array of input pages with id and content\n * @returns Concatenated content string and page mapping utilities\n *\n * @example\n * const pages = [\n * { id: 1, content: 'Page 1 text' },\n * { id: 2, content: 'Page 2 text' }\n * ];\n * const { content, pageMap } = buildPageMap(pages);\n * // content = 'Page 1 text\\nPage 2 text'\n * // pageMap.getId(0) = 1\n * // pageMap.getId(12) = 2\n */\nconst buildPageMap = (pages: Page[]): { content: string; normalizedPages: string[]; pageMap: PageMap } => {\n const boundaries: PageBoundary[] = [];\n const pageBreaks: number[] = []; // Sorted array for binary search\n let offset = 0;\n const parts: string[] = [];\n\n for (let i = 0; i < pages.length; i++) {\n const normalized = normalizeLineEndings(pages[i].content);\n boundaries.push({ end: offset + normalized.length, id: pages[i].id, start: offset });\n parts.push(normalized);\n if (i < pages.length - 1) {\n pageBreaks.push(offset + normalized.length); // Already in sorted order\n offset += normalized.length + 1;\n } else {\n offset += normalized.length;\n }\n }\n\n /**\n * Finds the page boundary containing the given offset using binary search.\n * O(log n) complexity for efficient lookup with many pages.\n *\n * @param off - Character offset to look up\n * @returns Page boundary or the last boundary as fallback\n */\n const findBoundary = (off: number): PageBoundary | undefined => {\n let lo = 0;\n let hi = boundaries.length - 1;\n\n while (lo <= hi) {\n const mid = (lo + hi) >>> 1; // Unsigned right shift for floor division\n const b = boundaries[mid];\n if (off < b.start) {\n hi = mid - 1;\n } else if (off > b.end) {\n lo = mid + 1;\n } else {\n return b;\n }\n }\n // Fallback to last boundary if not found\n return boundaries[boundaries.length - 1];\n };\n\n return {\n content: parts.join('\\n'),\n normalizedPages: parts, // OPTIMIZATION: Return already-normalized content for reuse\n pageMap: {\n boundaries,\n getId: (off: number) => findBoundary(off)?.id ?? 0,\n pageBreaks,\n pageIds: boundaries.map((b) => b.id),\n },\n };\n};\n\n/**\n * Represents a position where content should be split, with associated metadata.\n */\ntype SplitPoint = {\n /** Character index in the concatenated content where the split occurs */\n index: number;\n /** Static metadata from the matched rule */\n meta?: Record<string, unknown>;\n /** Content captured by regex patterns with capturing groups */\n capturedContent?: string;\n /** Named captures from `{{token:name}}` patterns */\n namedCaptures?: Record<string, string>;\n /**\n * Offset from index where content actually starts (for lineStartsAfter).\n * If set, the segment content starts at `index + contentStartOffset`.\n * This allows excluding the marker from content while keeping the split index\n * at the match start so previous segment doesn't include the marker.\n */\n contentStartOffset?: number;\n};\n\n/**\n * Executes a regex against content and extracts match results with capture information.\n *\n * @param content - Full content string to search\n * @param regex - Compiled regex with 'g' flag\n * @param usesCapture - Whether to extract captured content\n * @param captureNames - Names of expected named capture groups\n * @returns Array of match results with positions and captures\n */\nconst findMatches = (content: string, regex: RegExp, usesCapture: boolean, captureNames: string[]) => {\n const matches: MatchResult[] = [];\n regex.lastIndex = 0;\n let m = regex.exec(content);\n\n while (m !== null) {\n const result: MatchResult = { end: m.index + m[0].length, start: m.index };\n\n // Extract named captures if present\n result.namedCaptures = extractNamedCaptures(m.groups, captureNames);\n\n // For lineStartsAfter, get the last positional capture (the .* content)\n if (usesCapture) {\n result.captured = getLastPositionalCapture(m);\n }\n\n matches.push(result);\n\n if (m[0].length === 0) {\n regex.lastIndex++;\n }\n m = regex.exec(content);\n }\n\n return matches;\n};\n\n/**\n * Finds page breaks within a given offset range using binary search.\n * O(log n + k) where n = total breaks, k = breaks in range.\n *\n * @param startOffset - Start of range (inclusive)\n * @param endOffset - End of range (exclusive)\n * @param sortedBreaks - Sorted array of page break offsets\n * @returns Array of break offsets relative to startOffset\n */\nconst findBreaksInRange = (startOffset: number, endOffset: number, sortedBreaks: number[]) => {\n if (sortedBreaks.length === 0) {\n return [];\n }\n\n // Binary search for first break >= startOffset\n let lo = 0;\n let hi = sortedBreaks.length;\n while (lo < hi) {\n const mid = (lo + hi) >>> 1;\n if (sortedBreaks[mid] < startOffset) {\n lo = mid + 1;\n } else {\n hi = mid;\n }\n }\n\n // Collect breaks until we exceed endOffset\n const result: number[] = [];\n for (let i = lo; i < sortedBreaks.length && sortedBreaks[i] < endOffset; i++) {\n result.push(sortedBreaks[i] - startOffset);\n }\n return result;\n};\n\n/**\n * Converts page-break newlines to spaces in segment content.\n *\n * When a segment spans multiple pages, the newline characters that were\n * inserted as page separators during concatenation are converted to spaces\n * for more natural reading.\n *\n * Uses binary search for O(log n + k) lookup instead of O(n) iteration.\n *\n * @param content - Segment content string\n * @param startOffset - Starting offset of this content in concatenated string\n * @param pageBreaks - Sorted array of page break offsets\n * @returns Content with page-break newlines converted to spaces\n */\nconst convertPageBreaks = (content: string, startOffset: number, pageBreaks: number[]): string => {\n // OPTIMIZATION: Fast-path for empty or no-newline content (common cases)\n if (!content || !content.includes('\\n')) {\n return content;\n }\n\n const endOffset = startOffset + content.length;\n const breaksInRange = findBreaksInRange(startOffset, endOffset, pageBreaks);\n\n // No page breaks in this segment - return as-is (most common case)\n if (breaksInRange.length === 0) {\n return content;\n }\n\n // Convert ONLY page-break newlines (the ones inserted during concatenation) to spaces.\n //\n // NOTE: Offsets from findBreaksInRange are string indices (code units). Using Array.from()\n // would index by Unicode code points and can desync indices if surrogate pairs appear.\n const breakSet = new Set(breaksInRange);\n return content.replace(/\\n/g, (match, offset: number) => (breakSet.has(offset) ? ' ' : match));\n};\n\n/**\n * Applies breakpoints to oversized segments.\n *\n * For each segment that spans more than maxPages, tries the breakpoint patterns\n * in order to find a suitable split point. Structural markers (from rules) are\n * always respected - segments are only broken within their boundaries.\n *\n * @param segments - Initial segments from rule processing\n * @param pages - Original pages for page lookup\n * @param maxPages - Maximum pages before breakpoints apply\n * @param breakpoints - Patterns to try in order (tokens supported)\n * @param prefer - 'longer' for last match, 'shorter' for first match\n * @returns Processed segments with oversized ones broken up\n */\nconst applyBreakpoints = (\n segments: Segment[],\n pages: Page[],\n normalizedContent: string[], // OPTIMIZATION: Pre-normalized content from buildPageMap\n maxPages: number,\n breakpoints: Breakpoint[],\n prefer: 'longer' | 'shorter',\n logger?: Logger,\n): Segment[] => {\n const findExclusionBreakPosition = (\n currentFromIdx: number,\n windowEndIdx: number,\n toIdx: number,\n pageIds: number[],\n expandedBreakpoints: Array<{ excludeSet: Set<number> }>,\n cumulativeOffsets: number[],\n ): number => {\n const startingPageId = pageIds[currentFromIdx];\n const startingPageExcluded = expandedBreakpoints.some((bp) => bp.excludeSet.has(startingPageId));\n if (startingPageExcluded && currentFromIdx < toIdx) {\n // Output just this one page as a segment (break at next page boundary)\n return cumulativeOffsets[currentFromIdx + 1] - cumulativeOffsets[currentFromIdx];\n }\n\n // Find the first excluded page AFTER the starting page (within window) and split BEFORE it\n for (let pageIdx = currentFromIdx + 1; pageIdx <= windowEndIdx; pageIdx++) {\n const pageId = pageIds[pageIdx];\n const isExcluded = expandedBreakpoints.some((bp) => bp.excludeSet.has(pageId));\n if (isExcluded) {\n return cumulativeOffsets[pageIdx] - cumulativeOffsets[currentFromIdx];\n }\n }\n return -1;\n };\n\n // Get page IDs in order\n const pageIds = pages.map((p) => p.id);\n\n // OPTIMIZATION: Build pageId to index Map for O(1) lookups instead of O(P) indexOf\n const pageIdToIndex = new Map(pageIds.map((id, i) => [id, i]));\n\n // OPTIMIZATION: Build normalized pages Map from pre-normalized content\n const normalizedPages = new Map<number, NormalizedPage>();\n for (let i = 0; i < pages.length; i++) {\n const content = normalizedContent[i];\n normalizedPages.set(pages[i].id, { content, index: i, length: content.length });\n }\n\n // OPTIMIZATION: Pre-compute cumulative offsets for O(1) window size calculation\n const cumulativeOffsets: number[] = [0];\n let totalOffset = 0;\n for (let i = 0; i < pageIds.length; i++) {\n const pageData = normalizedPages.get(pageIds[i]);\n totalOffset += pageData ? pageData.length : 0;\n if (i < pageIds.length - 1) {\n totalOffset += 1; // separator between pages\n }\n cumulativeOffsets.push(totalOffset);\n }\n\n // Use extracted helper to expand breakpoints\n // Create pattern processor function for breakpoint-utils\n const patternProcessor = (p: string) => processPattern(p, false).pattern;\n const expandedBreakpoints = expandBreakpoints(breakpoints, patternProcessor);\n\n const result: Segment[] = [];\n\n logger?.info?.('Starting breakpoint processing', { maxPages, segmentCount: segments.length });\n\n for (const segment of segments) {\n const fromIdx = pageIdToIndex.get(segment.from) ?? -1;\n const toIdx = segment.to !== undefined ? (pageIdToIndex.get(segment.to) ?? fromIdx) : fromIdx;\n\n logger?.debug?.('Processing segment', {\n contentLength: segment.content.length,\n contentPreview: segment.content.slice(0, 100),\n from: segment.from,\n fromIdx,\n to: segment.to,\n toIdx,\n });\n\n // Calculate span using actual page IDs (not array indices)\n const segmentSpan = (segment.to ?? segment.from) - segment.from;\n // If segment span is within limit AND no pages are excluded, keep as-is\n // Check if any page in this segment is excluded by any breakpoint\n const hasExclusions = expandedBreakpoints.some((bp) =>\n hasExcludedPageInRange(bp.excludeSet, pageIds, fromIdx, toIdx),\n );\n\n if (segmentSpan <= maxPages && !hasExclusions) {\n logger?.trace?.('Segment within limit, keeping as-is');\n\n result.push(segment);\n continue;\n }\n\n logger?.debug?.('Segment exceeds limit or has exclusions, breaking it up');\n\n // Rebuild content for this segment from individual pages\n // We need to work with the actual page content, not the merged segment content\n\n // Process this segment, potentially breaking it into multiple\n let remainingContent = segment.content;\n let currentFromIdx = fromIdx;\n let isFirstPiece = true;\n let iterationCount = 0;\n const maxIterations = 10000; // Safety limit\n\n while (currentFromIdx <= toIdx) {\n iterationCount++;\n if (iterationCount > maxIterations) {\n logger?.error?.('INFINITE LOOP DETECTED! Breaking out, you should report this bug', {\n iterationCount: maxIterations,\n });\n logger?.error?.('Loop state', {\n currentFromIdx,\n remainingContentLength: remainingContent.length,\n toIdx,\n });\n break;\n }\n\n // Calculate remaining span using actual page IDs (not array indices)\n const remainingSpan = pageIds[toIdx] - pageIds[currentFromIdx];\n\n logger?.trace?.('Loop iteration', {\n currentFromIdx,\n currentPageId: pageIds[currentFromIdx],\n iterationCount,\n remainingContentLength: remainingContent.length,\n remainingContentPreview: remainingContent.slice(0, 80),\n remainingSpan,\n toIdx,\n toPageId: pageIds[toIdx],\n });\n\n // Check if any page in remaining segment is excluded\n const remainingHasExclusions = expandedBreakpoints.some((bp) =>\n hasExcludedPageInRange(bp.excludeSet, pageIds, currentFromIdx, toIdx),\n );\n\n // If remaining span is within limit AND no exclusions, output and done\n if (remainingSpan <= maxPages && !remainingHasExclusions) {\n logger?.debug?.('Remaining span within limit, outputting final segment');\n\n const finalSeg = createSegment(\n remainingContent,\n pageIds[currentFromIdx],\n currentFromIdx !== toIdx ? pageIds[toIdx] : undefined,\n isFirstPiece ? segment.meta : undefined,\n );\n if (finalSeg) {\n result.push(finalSeg);\n }\n break;\n }\n\n // Need to break within maxPages window (based on page IDs, not indices)\n // Find the last page index where pageId <= currentPageId + maxPages\n const currentPageId = pageIds[currentFromIdx];\n const maxWindowPageId = currentPageId + maxPages;\n let windowEndIdx = currentFromIdx;\n for (let i = currentFromIdx; i <= toIdx; i++) {\n if (pageIds[i] <= maxWindowPageId) {\n windowEndIdx = i;\n } else {\n break;\n }\n }\n\n logger?.trace?.('Window calculation', {\n currentPageId,\n maxWindowPageId,\n windowEndIdx,\n windowEndPageId: pageIds[windowEndIdx],\n });\n\n // Special case: if we have exclusions IN THE CURRENT WINDOW, handle them\n // Check if any page in the WINDOW (not entire segment) is excluded\n const windowHasExclusions = expandedBreakpoints.some((bp) =>\n hasExcludedPageInRange(bp.excludeSet, pageIds, currentFromIdx, windowEndIdx),\n );\n\n let breakPosition = -1;\n\n if (windowHasExclusions) {\n logger?.trace?.('Window has exclusions, finding exclusion break position');\n\n breakPosition = findExclusionBreakPosition(\n currentFromIdx,\n windowEndIdx,\n toIdx,\n pageIds,\n expandedBreakpoints,\n cumulativeOffsets,\n );\n\n logger?.trace?.('Exclusion break position', { breakPosition });\n }\n\n // If no exclusion-based split found, use normal breakpoint finding\n if (breakPosition <= 0) {\n // Use extracted helper to find break position\n const breakpointCtx: BreakpointContext = {\n cumulativeOffsets,\n expandedBreakpoints,\n normalizedPages,\n pageIds,\n prefer,\n };\n\n logger?.trace?.('Finding break position using patterns...');\n\n breakPosition = findBreakPosition(remainingContent, currentFromIdx, toIdx, windowEndIdx, breakpointCtx);\n\n logger?.trace?.('Pattern break position', { breakPosition });\n }\n\n if (breakPosition <= 0) {\n logger?.debug?.('No pattern matched, falling back to page boundary');\n\n // No pattern matched - fallback to page boundary split\n // If only one page in window, output it and continue to next page\n if (windowEndIdx === currentFromIdx) {\n logger?.trace?.('Single page window, outputting page and advancing');\n\n // Output this single page as a segment\n const pageContent =\n cumulativeOffsets[currentFromIdx + 1] !== undefined\n ? remainingContent.slice(\n 0,\n cumulativeOffsets[currentFromIdx + 1] - cumulativeOffsets[currentFromIdx],\n )\n : remainingContent;\n const pageSeg = createSegment(\n pageContent.trim(),\n pageIds[currentFromIdx],\n undefined,\n isFirstPiece ? segment.meta : undefined,\n );\n if (pageSeg) {\n result.push(pageSeg);\n }\n // Move to next page\n remainingContent = remainingContent.slice(pageContent.length).trim();\n currentFromIdx++;\n isFirstPiece = false;\n\n logger?.trace?.('After single page', {\n currentFromIdx,\n remainingContentLength: remainingContent.length,\n });\n\n continue;\n }\n // Multi-page window with no pattern match - output entire window and continue\n breakPosition = cumulativeOffsets[windowEndIdx + 1] - cumulativeOffsets[currentFromIdx];\n logger?.trace?.('Multi-page window, using full window break position', { breakPosition });\n }\n\n const pieceContent = remainingContent.slice(0, breakPosition).trim();\n\n logger?.trace?.('Piece extracted', {\n breakPosition,\n pieceContentLength: pieceContent.length,\n pieceContentPreview: pieceContent.slice(0, 80),\n });\n\n // Find the actual starting and ending pages for this piece content\n // currentFromIdx might not be the actual starting page if content was split across pages\n const actualStartIdx = pieceContent\n ? findActualStartPage(pieceContent, currentFromIdx, toIdx, pageIds, normalizedPages)\n : currentFromIdx;\n const actualEndIdx = pieceContent\n ? findActualEndPage(pieceContent, actualStartIdx, windowEndIdx, pageIds, normalizedPages)\n : currentFromIdx;\n\n logger?.trace?.('Actual page indices', {\n actualEndIdx,\n actualStartIdx,\n pieceHasContent: !!pieceContent,\n });\n\n if (pieceContent) {\n const pieceSeg = createSegment(\n pieceContent,\n pageIds[actualStartIdx],\n actualEndIdx > actualStartIdx ? pageIds[actualEndIdx] : undefined,\n isFirstPiece ? segment.meta : undefined,\n );\n if (pieceSeg) {\n result.push(pieceSeg);\n\n logger?.debug?.('Created segment', {\n contentLength: pieceSeg.content.length,\n from: pieceSeg.from,\n to: pieceSeg.to,\n });\n }\n }\n\n // Update for next iteration\n const prevRemainingLength = remainingContent.length;\n remainingContent = remainingContent.slice(breakPosition).trim();\n\n logger?.trace?.('After slicing remainingContent', {\n newLength: remainingContent.length,\n prevLength: prevRemainingLength,\n slicedAmount: breakPosition,\n });\n\n // If no remaining content, we're done with this segment\n if (!remainingContent) {\n logger?.debug?.('No remaining content, breaking out of loop');\n break;\n }\n\n // Find which page the remaining content actually starts on\n // The next piece starts from actualEndIdx OR the next page if the break was at a page boundary\n let nextFromIdx = actualEndIdx;\n\n // Check if remaining content starts with content from the next page\n if (remainingContent && actualEndIdx + 1 <= toIdx) {\n const nextPageData = normalizedPages.get(pageIds[actualEndIdx + 1]);\n if (nextPageData) {\n const nextPrefix = nextPageData.content.slice(0, Math.min(30, nextPageData.length));\n if (nextPrefix && remainingContent.startsWith(nextPrefix)) {\n nextFromIdx = actualEndIdx + 1;\n logger?.trace?.('Content starts with next page prefix', { advancingTo: nextFromIdx });\n }\n }\n }\n\n logger?.trace?.('End of iteration', {\n nextFromIdx,\n prevCurrentFromIdx: currentFromIdx,\n willAdvance: nextFromIdx !== currentFromIdx,\n });\n\n currentFromIdx = nextFromIdx;\n isFirstPiece = false;\n }\n }\n\n logger?.info?.('Breakpoint processing completed', { resultCount: result.length });\n\n return result;\n};\n\n/**\n * Segments pages of content based on pattern-matching rules.\n *\n * This is the main entry point for the segmentation engine. It takes an array\n * of pages and applies the provided rules to identify split points, producing\n * an array of segments with content, page references, and metadata.\n *\n * @param pages - Array of pages with id and content\n * @param options - Segmentation options including splitting rules\n * @returns Array of segments with content, from/to page references, and optional metadata\n *\n * @example\n * // Split markdown by headers\n * const segments = segmentPages(pages, {\n * rules: [\n * { lineStartsWith: ['## '], split: 'at', meta: { type: 'chapter' } }\n * ]\n * });\n *\n * @example\n * // Split Arabic hadith text with number extraction\n * const segments = segmentPages(pages, {\n * rules: [\n * {\n * lineStartsAfter: ['{{raqms:hadithNum}} {{dash}} '],\n * split: 'at',\n * fuzzy: true,\n * meta: { type: 'hadith' }\n * }\n * ]\n * });\n *\n * @example\n * // Multiple rules with page constraints\n * const segments = segmentPages(pages, {\n * rules: [\n * { lineStartsWith: ['{{kitab}}'], split: 'at', meta: { type: 'book' } },\n * { lineStartsWith: ['{{bab}}'], split: 'at', min: 10, meta: { type: 'chapter' } },\n * { regex: '^[٠-٩]+ - ', split: 'at', meta: { type: 'hadith' } }\n * ]\n * });\n */\nexport const segmentPages = (pages: Page[], options: SegmentationOptions): Segment[] => {\n const { rules = [], maxPages, breakpoints, prefer = 'longer', logger } = options;\n if (!pages.length) {\n return [];\n }\n\n const { content: matchContent, normalizedPages: normalizedContent, pageMap } = buildPageMap(pages);\n const splitPoints: SplitPoint[] = [];\n\n // Process rules to find structural split points\n for (const rule of rules) {\n const { regex, usesCapture, captureNames, usesLineStartsAfter } = buildRuleRegex(rule);\n const allMatches = findMatches(matchContent, regex, usesCapture, captureNames);\n\n // Filter matches by page ID constraints\n const constrainedMatches = filterByConstraints(allMatches, rule, pageMap.getId);\n\n // Apply occurrence filtering (global)\n const finalMatches = filterByOccurrence(constrainedMatches, rule.occurrence);\n\n for (const m of finalMatches) {\n // For lineStartsAfter: we want to exclude the marker from content.\n // - Split at m.start so previous segment doesn't include the marker\n // - Set contentStartOffset to skip the marker when slicing this segment\n const isLineStartsAfter = usesLineStartsAfter && m.captured !== undefined;\n const markerLength = isLineStartsAfter ? m.end - m.captured!.length - m.start : 0;\n\n splitPoints.push({\n // lineStartsAfter: DON'T use capturedContent, let normal slicing extend to next split\n capturedContent: isLineStartsAfter ? undefined : m.captured,\n // lineStartsAfter: skip the marker when slicing content\n contentStartOffset: isLineStartsAfter ? markerLength : undefined,\n index: (rule.split ?? 'at') === 'at' ? m.start : m.end,\n meta: rule.meta,\n namedCaptures: m.namedCaptures,\n });\n }\n }\n\n // Deduplicate split points by index, preferring ones with more information\n // (contentStartOffset or meta over plain splits)\n const byIndex = new Map<number, SplitPoint>();\n for (const p of splitPoints) {\n const existing = byIndex.get(p.index);\n if (!existing) {\n byIndex.set(p.index, p);\n } else {\n // Prefer split with contentStartOffset (for lineStartsAfter stripping)\n // or with meta over one without\n const hasMoreInfo =\n (p.contentStartOffset !== undefined && existing.contentStartOffset === undefined) ||\n (p.meta !== undefined && existing.meta === undefined);\n if (hasMoreInfo) {\n byIndex.set(p.index, p);\n }\n }\n }\n const unique = [...byIndex.values()];\n unique.sort((a, b) => a.index - b.index);\n\n // Build initial segments from structural rules\n let segments = buildSegments(unique, matchContent, pageMap, rules);\n\n // Handle case where no rules or no split points - create one segment from all content\n // This allows breakpoints to still process the content\n if (segments.length === 0 && pages.length > 0) {\n const firstPage = pages[0];\n const lastPage = pages[pages.length - 1];\n // OPTIMIZATION: Reuse pre-normalized content from buildPageMap instead of re-normalizing\n const allContent = normalizedContent.join('\\n');\n const initialSeg: Segment = {\n content: allContent.trim(),\n from: firstPage.id,\n };\n if (lastPage.id !== firstPage.id) {\n initialSeg.to = lastPage.id;\n }\n if (initialSeg.content) {\n segments = [initialSeg];\n }\n }\n\n // Apply breakpoints post-processing for oversized segments\n if (maxPages !== undefined && maxPages >= 0 && breakpoints?.length) {\n return applyBreakpoints(segments, pages, normalizedContent, maxPages, breakpoints, prefer, logger);\n }\n\n return segments;\n};\n\n/**\n * Creates segment objects from split points.\n *\n * Handles segment creation including:\n * - Content extraction (with captured content for `lineStartsAfter`)\n * - Page break conversion to spaces\n * - From/to page reference calculation\n * - Metadata merging (static + named captures)\n *\n * @param splitPoints - Sorted, unique split points\n * @param content - Full concatenated content string\n * @param pageMap - Page mapping utilities\n * @param rules - Original rules (for constraint checking on first segment)\n * @returns Array of segment objects\n */\nconst buildSegments = (splitPoints: SplitPoint[], content: string, pageMap: PageMap, rules: SplitRule[]): Segment[] => {\n /**\n * Creates a single segment from a content range.\n */\n const createSegment = (\n start: number,\n end: number,\n meta?: Record<string, unknown>,\n capturedContent?: string,\n namedCaptures?: Record<string, string>,\n contentStartOffset?: number,\n ): Segment | null => {\n // For lineStartsAfter, skip the marker by using contentStartOffset\n const actualStart = start + (contentStartOffset ?? 0);\n // For lineStartsAfter (contentStartOffset set), trim leading whitespace after marker\n // For other rules, only trim trailing whitespace to preserve intentional leading spaces\n const sliced = content.slice(actualStart, end);\n let text = capturedContent?.trim() ?? (contentStartOffset ? sliced.trim() : sliced.replace(/[\\s\\n]+$/, ''));\n if (!text) {\n return null;\n }\n if (!capturedContent) {\n text = convertPageBreaks(text, actualStart, pageMap.pageBreaks);\n }\n const from = pageMap.getId(actualStart);\n const to = capturedContent ? pageMap.getId(end - 1) : pageMap.getId(actualStart + text.length - 1);\n const seg: Segment = { content: text, from };\n if (to !== from) {\n seg.to = to;\n }\n if (meta || namedCaptures) {\n seg.meta = { ...meta, ...namedCaptures };\n }\n return seg;\n };\n\n /**\n * Creates segments from an array of split points.\n */\n const createSegmentsFromSplitPoints = (): Segment[] => {\n const result: Segment[] = [];\n for (let i = 0; i < splitPoints.length; i++) {\n const sp = splitPoints[i];\n const end = i < splitPoints.length - 1 ? splitPoints[i + 1].index : content.length;\n const s = createSegment(\n sp.index,\n end,\n sp.meta,\n sp.capturedContent,\n sp.namedCaptures,\n sp.contentStartOffset,\n );\n if (s) {\n result.push(s);\n }\n }\n return result;\n };\n\n const segments: Segment[] = [];\n\n // Handle case with no split points\n if (!splitPoints.length) {\n const firstId = pageMap.getId(0);\n if (anyRuleAllowsId(rules, firstId)) {\n const s = createSegment(0, content.length);\n if (s) {\n segments.push(s);\n }\n }\n return segments;\n }\n\n // Add first segment if there's content before first split\n if (splitPoints[0].index > 0) {\n const firstId = pageMap.getId(0);\n if (anyRuleAllowsId(rules, firstId)) {\n const s = createSegment(0, splitPoints[0].index);\n if (s) {\n segments.push(s);\n }\n }\n }\n\n // Create segments from split points using extracted utility\n return [...segments, ...createSegmentsFromSplitPoints()];\n};\n","/**\n * Pattern detection utilities for recognizing template tokens in Arabic text.\n * Used to auto-detect patterns from user-highlighted text in the segmentation dialog.\n *\n * @module pattern-detection\n */\n\nimport { getAvailableTokens, TOKEN_PATTERNS } from './segmentation/tokens.js';\n\n/**\n * Result of detecting a token pattern in text\n */\nexport type DetectedPattern = {\n /** Token name from TOKEN_PATTERNS (e.g., 'raqms', 'dash') */\n token: string;\n /** The matched text */\n match: string;\n /** Start index in the original text */\n index: number;\n /** End index (exclusive) */\n endIndex: number;\n};\n\n/**\n * Token detection order - more specific patterns first to avoid partial matches.\n * Example: 'raqms' before 'raqm' so \"٣٤\" matches 'raqms' not just the first digit.\n *\n * Tokens not in this list are appended in alphabetical order from TOKEN_PATTERNS.\n */\nconst TOKEN_PRIORITY_ORDER: string[] = [\n 'basmalah', // Most specific - full phrase\n 'kitab',\n 'bab',\n 'fasl',\n 'naql',\n 'numbered', // Composite: raqms + dash\n 'raqms', // Multiple digits before single digit\n 'raqm',\n 'tarqim',\n 'bullet',\n 'dash',\n 'harf',\n];\n\n/**\n * Gets the token detection priority order.\n * Returns tokens in priority order, with any TOKEN_PATTERNS not in the priority list appended.\n */\nconst getTokenPriority = (): string[] => {\n const allTokens = getAvailableTokens();\n const prioritized = TOKEN_PRIORITY_ORDER.filter((t) => allTokens.includes(t));\n const remaining = allTokens.filter((t) => !TOKEN_PRIORITY_ORDER.includes(t)).sort();\n return [...prioritized, ...remaining];\n};\n\n/**\n * Analyzes text and returns all detected token patterns with their positions.\n * Patterns are detected in priority order to avoid partial matches.\n *\n * @param text - The text to analyze for token patterns\n * @returns Array of detected patterns sorted by position\n *\n * @example\n * detectTokenPatterns(\"٣٤ - حدثنا\")\n * // Returns: [\n * // { token: 'raqms', match: '٣٤', index: 0, endIndex: 2 },\n * // { token: 'dash', match: '-', index: 3, endIndex: 4 },\n * // { token: 'naql', match: 'حدثنا', index: 5, endIndex: 10 }\n * // ]\n */\nexport const detectTokenPatterns = (text: string): DetectedPattern[] => {\n if (!text) {\n return [];\n }\n\n const results: DetectedPattern[] = [];\n const coveredRanges: Array<[number, number]> = [];\n\n // Check if a position is already covered by a detected pattern\n const isPositionCovered = (start: number, end: number): boolean => {\n return coveredRanges.some(\n ([s, e]) => (start >= s && start < e) || (end > s && end <= e) || (start <= s && end >= e),\n );\n };\n\n // Process tokens in priority order\n for (const tokenName of getTokenPriority()) {\n const pattern = TOKEN_PATTERNS[tokenName];\n if (!pattern) {\n continue;\n }\n\n try {\n // Create a global regex to find all matches\n const regex = new RegExp(`(${pattern})`, 'gu');\n let match: RegExpExecArray | null;\n\n // biome-ignore lint/suspicious/noAssignInExpressions: standard regex exec loop pattern\n while ((match = regex.exec(text)) !== null) {\n const startIndex = match.index;\n const endIndex = startIndex + match[0].length;\n\n // Skip if this range overlaps with an already detected pattern\n if (isPositionCovered(startIndex, endIndex)) {\n continue;\n }\n\n results.push({ endIndex, index: startIndex, match: match[0], token: tokenName });\n\n coveredRanges.push([startIndex, endIndex]);\n }\n } catch {}\n }\n\n return results.sort((a, b) => a.index - b.index);\n};\n\n/**\n * Generates a template pattern from text using detected tokens.\n * Replaces matched portions with {{token}} syntax.\n *\n * @param text - Original text\n * @param detected - Array of detected patterns from detectTokenPatterns\n * @returns Template string with tokens, e.g., \"{{raqms}} {{dash}} \"\n *\n * @example\n * const detected = detectTokenPatterns(\"٣٤ - \");\n * generateTemplateFromText(\"٣٤ - \", detected);\n * // Returns: \"{{raqms}} {{dash}} \"\n */\nexport const generateTemplateFromText = (text: string, detected: DetectedPattern[]): string => {\n if (!text || detected.length === 0) {\n return text;\n }\n\n // Build template by replacing detected patterns with tokens\n // Process in reverse order to preserve indices\n let template = text;\n const sortedByIndexDesc = [...detected].sort((a, b) => b.index - a.index);\n\n for (const d of sortedByIndexDesc) {\n template = `${template.slice(0, d.index)}{{${d.token}}}${template.slice(d.endIndex)}`;\n }\n\n return template;\n};\n\n/**\n * Determines the best pattern type for auto-generated rules based on detected patterns.\n *\n * @param detected - Array of detected patterns\n * @returns Suggested pattern type and whether to use fuzzy matching\n */\nexport const suggestPatternConfig = (\n detected: DetectedPattern[],\n): { patternType: 'lineStartsWith' | 'lineStartsAfter'; fuzzy: boolean; metaType?: string } => {\n // Check if the detected patterns suggest a structural marker (chapter, book, etc.)\n const hasStructuralToken = detected.some((d) => ['basmalah', 'kitab', 'bab', 'fasl'].includes(d.token));\n\n // Check if the pattern is numbered (hadith-style)\n const hasNumberedPattern = detected.some((d) => ['raqms', 'raqm', 'numbered'].includes(d.token));\n\n // If it starts with a structural token, use lineStartsWith (keep marker in content)\n if (hasStructuralToken) {\n return {\n fuzzy: true,\n metaType: detected.find((d) => ['kitab', 'bab', 'fasl'].includes(d.token))?.token || 'chapter',\n patternType: 'lineStartsWith',\n };\n }\n\n // If it's a numbered pattern (like hadith numbers), use lineStartsAfter (strip prefix)\n if (hasNumberedPattern) {\n return { fuzzy: false, metaType: 'hadith', patternType: 'lineStartsAfter' };\n }\n\n // Default: use lineStartsAfter without fuzzy\n return { fuzzy: false, patternType: 'lineStartsAfter' };\n};\n\n/**\n * Analyzes text and generates a complete suggested rule configuration.\n *\n * @param text - Highlighted text from the page\n * @returns Suggested rule configuration or null if no patterns detected\n */\nexport const analyzeTextForRule = (\n text: string,\n): {\n template: string;\n patternType: 'lineStartsWith' | 'lineStartsAfter';\n fuzzy: boolean;\n metaType?: string;\n detected: DetectedPattern[];\n} | null => {\n const detected = detectTokenPatterns(text);\n\n if (detected.length === 0) {\n return null;\n }\n\n const template = generateTemplateFromText(text, detected);\n const config = suggestPatternConfig(detected);\n\n return { detected, template, ...config };\n};\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AA+BA,MAAM,mBAAmB;;;;;;;;;;;;;;;AAgBzB,MAAMA,eAA2B;CAC7B;EAAC;EAAU;EAAU;EAAU;EAAS;CACxC,CAAC,KAAU,IAAS;CACpB,CAAC,KAAU,IAAS;CACvB;;;;;;;;;;;;;;AAeD,MAAa,eAAe,MAAsB,EAAE,QAAQ,uBAAuB,OAAO;;;;;;;;;;;;;;;;;;AAmB1F,MAAM,iBAAiB,OAAuB;AAC1C,MAAK,MAAM,SAAS,aAChB,KAAI,MAAM,SAAS,GAAG,CAElB,QAAO,IAAI,MAAM,KAAK,MAAM,YAAY,EAAE,CAAC,CAAC,KAAK,GAAG,CAAC;AAI7D,QAAO,YAAY,GAAG;;;;;;;;;;;;;;;;;;;;;;;;AAyB1B,MAAM,wBAAwB,QAAgB;AAC1C,QAAO,IACF,UAAU,MAAM,CAChB,QAAQ,mBAAmB,GAAG,CAC9B,QAAQ,QAAQ,IAAI,CACpB,MAAM;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAsCf,MAAa,4BAA4B,SAAiB;CACtD,MAAM,oBAAoB,GAAG,iBAAiB;CAC9C,MAAM,OAAO,qBAAqB,KAAK;AAEvC,QAAO,MAAM,KAAK,KAAK,CAClB,KAAK,OAAO,cAAc,GAAG,GAAG,kBAAkB,CAClD,KAAK,GAAG;;;;;;;;;;;;;;;;;;;AC9IjB,MAAa,uBAAuB,OAAoC,OAAO,OAAO,WAAW,EAAE,SAAS,IAAI,GAAG;;;;;;;;;;;;;;;;;;AAmBnH,MAAa,kBAAkB,QAAgB,gBAAkD;AAC7F,KAAI,CAAC,eAAe,YAAY,WAAW,EACvC,QAAO;AAEX,MAAK,MAAM,QAAQ,YACf,KAAI,OAAO,SAAS,UAChB;MAAI,WAAW,KACX,QAAO;QAER;EACH,MAAM,CAAC,MAAM,MAAM;AACnB,MAAI,UAAU,QAAQ,UAAU,GAC5B,QAAO;;AAInB,QAAO;;;;;;;;;;;;;;;;AAiBX,MAAa,uBAAuB,QAAgB,SAAkC;AAClF,KAAI,KAAK,QAAQ,UAAa,SAAS,KAAK,IACxC,QAAO;AAEX,KAAI,KAAK,QAAQ,UAAa,SAAS,KAAK,IACxC,QAAO;AAEX,QAAO,CAAC,eAAe,QAAQ,KAAK,QAAQ;;;;;;;;;;;;;;;;;;AAmBhD,MAAa,mBAAmB,gBAAsD;CAClF,MAAM,6BAAa,IAAI,KAAa;AACpC,MAAK,MAAM,QAAQ,eAAe,EAAE,CAChC,KAAI,OAAO,SAAS,SAChB,YAAW,IAAI,KAAK;KAEpB,MAAK,IAAI,IAAI,KAAK,IAAI,KAAK,KAAK,IAAI,IAChC,YAAW,IAAI,EAAE;AAI7B,QAAO;;;;;;;;;;;;;;;;;;;AAoBX,MAAa,iBACT,SACA,YACA,UACA,SACiB;CACjB,MAAM,UAAU,QAAQ,MAAM;AAC9B,KAAI,CAAC,QACD,QAAO;CAEX,MAAMC,MAAe;EAAE,SAAS;EAAS,MAAM;EAAY;AAC3D,KAAI,aAAa,UAAa,aAAa,WACvC,KAAI,KAAK;AAEb,KAAI,KACA,KAAI,OAAO;AAEf,QAAO;;;;;;;;;;;;;;AA0BX,MAAa,qBAAqB,aAA2B,qBACzD,YAAY,KAAK,OAAO;CACpB,MAAM,OAAO,oBAAoB,GAAG;CACpC,MAAM,aAAa,gBAAgB,KAAK,QAAQ;CAChD,MAAM,gBACF,KAAK,aAAa,gBACL;EACH,MAAM,eAAeC,iBAAe,KAAK,SAAS;AAClD,MAAI;AACA,UAAO,IAAI,OAAO,cAAc,KAAK;WAChC,OAAO;GACZ,MAAM,UAAU,iBAAiB,QAAQ,MAAM,UAAU,OAAO,MAAM;AACtE,SAAM,IAAI,MAAM,sCAAsC,KAAK,SAAS,aAAa,UAAU;;KAE/F,GACJ;AACV,KAAI,KAAK,YAAY,GACjB,QAAO;EAAE;EAAY,OAAO;EAAM;EAAM;EAAe;CAE3D,MAAM,WAAWA,iBAAe,KAAK,QAAQ;AAC7C,KAAI;AACA,SAAO;GAAE;GAAY,OAAO,IAAI,OAAO,UAAU,MAAM;GAAE;GAAM;GAAe;UACzE,OAAO;EACZ,MAAM,UAAU,iBAAiB,QAAQ,MAAM,UAAU,OAAO,MAAM;AACtE,QAAM,IAAI,MAAM,6BAA6B,KAAK,QAAQ,aAAa,UAAU;;EAEvF;;;;;;;;;;;;AAgBN,MAAa,qBACT,cACA,gBACA,OACA,SACA,oBACS;AACT,MAAK,IAAI,KAAK,OAAO,KAAK,gBAAgB,MAAM;EAC5C,MAAM,WAAW,gBAAgB,IAAI,QAAQ,IAAI;AACjD,MAAI,UAAU;GACV,MAAM,eAAe,SAAS,QAAQ,MAAM,GAAG,KAAK,IAAI,IAAI,SAAS,OAAO,CAAC;AAC7E,OAAI,aAAa,SAAS,KAAK,aAAa,QAAQ,aAAa,GAAG,EAChE,QAAO;;;AAInB,QAAO;;;;;;;;;;;;;;;;AAiBX,MAAa,uBACT,cACA,gBACA,OACA,SACA,oBACS;CACT,MAAM,eAAe,aAAa,WAAW;AAC7C,KAAI,CAAC,aACD,QAAO;AAIX,MAAK,IAAI,KAAK,gBAAgB,MAAM,OAAO,MAAM;EAC7C,MAAM,WAAW,gBAAgB,IAAI,QAAQ,IAAI;AACjD,MAAI,UAAU;GACV,MAAM,aAAa,SAAS,QAAQ,MAAM,GAAG,KAAK,IAAI,IAAI,SAAS,OAAO,CAAC,CAAC,MAAM;GAClF,MAAM,cAAc,aAAa,MAAM,GAAG,KAAK,IAAI,IAAI,aAAa,OAAO,CAAC;AAK5E,OAAI,WAAW,SAAS,GAAG;AACvB,QAAI,aAAa,WAAW,WAAW,CACnC,QAAO;AAEX,QAAI,SAAS,QAAQ,WAAW,CAAC,WAAW,YAAY,CACpD,QAAO;;;;AAKvB,QAAO;;;;;;;;;;;AAqBX,MAAa,0BACT,YACA,SACA,SACA,UACU;AACV,KAAI,WAAW,SAAS,EACpB,QAAO;AAEX,MAAK,IAAI,UAAU,SAAS,WAAW,OAAO,UAC1C,KAAI,WAAW,IAAI,QAAQ,SAAS,CAChC,QAAO;AAGf,QAAO;;;;;;;;;;AAWX,MAAa,wBAAwB,kBAA0B,iBAAyC;CACpG,MAAM,eAAe,aAAa,QAAQ,MAAM,CAAC,MAAM,GAAG,KAAK,IAAI,IAAI,aAAa,OAAO,CAAC;AAC5F,KAAI,aAAa,WAAW,EACxB,QAAO;CAEX,MAAM,MAAM,iBAAiB,QAAQ,aAAa;AAClD,QAAO,MAAM,IAAI,MAAM;;;;;;;;;;AAW3B,MAAa,4BACT,eACA,OACA,WACS;CAGT,IAAIC;CACJ,IAAIC;AACJ,MAAK,MAAM,KAAK,cAAc,SAAS,MAAM,EAAE;EAC3C,MAAM,QAAQ;GAAE,OAAO,EAAE;GAAO,QAAQ,EAAE,GAAG;GAAQ;AACrD,MAAI,CAAC,MACD,SAAQ;AAEZ,SAAO;;AAEX,KAAI,CAAC,MACD,QAAO;CAEX,MAAM,WAAW,WAAW,WAAW,OAAQ;AAC/C,QAAO,SAAS,QAAQ,SAAS;;;;;;;;;;;;;AAcrC,MAAa,qBACT,kBACA,gBACA,OACA,cACA,QACS;CACT,MAAM,EAAE,SAAS,iBAAiB,mBAAmB,qBAAqB,WAAW;AAErF,MAAK,MAAM,EAAE,MAAM,OAAO,YAAY,mBAAmB,qBAAqB;AAE1E,MAAI,CAAC,oBAAoB,QAAQ,iBAAiB,KAAK,CACnD;AAIJ,MAAI,uBAAuB,YAAY,SAAS,gBAAgB,aAAa,CACzE;AAIJ,MAAI,eAAe,KAAK,iBAAiB,CACrC;AAIJ,MAAI,UAAU,MAAM;GAChB,MAAM,cAAc,eAAe;AACnC,OAAI,eAAe,OAAO;IACtB,MAAM,eAAe,gBAAgB,IAAI,QAAQ,aAAa;AAC9D,QAAI,cAAc;KACd,MAAM,MAAM,qBAAqB,kBAAkB,aAAa;AAChE,SAAI,MAAM,EACN,QAAO;;;AAKnB,UAAO,KAAK,IACR,kBAAkB,eAAe,KAAK,kBAAkB,iBACxD,iBAAiB,OACpB;;EAIL,MAAM,oBAAoB,KAAK,IAC3B,kBAAkB,eAAe,KAAK,kBAAkB,iBACxD,iBAAiB,OACpB;EAED,MAAM,WAAW,yBADK,iBAAiB,MAAM,GAAG,kBAAkB,EACT,OAAO,OAAO;AACvE,MAAI,WAAW,EACX,QAAO;;AAIf,QAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;ACvWX,MAAa,wBACT,QACA,iBACqC;AACrC,KAAI,CAAC,UAAU,aAAa,WAAW,EACnC;CAGJ,MAAMC,gBAAwC,EAAE;AAChD,MAAK,MAAM,QAAQ,aACf,KAAI,OAAO,UAAU,OACjB,eAAc,QAAQ,OAAO;AAIrC,QAAO,OAAO,KAAK,cAAc,CAAC,SAAS,IAAI,gBAAgB;;;;;;;;;;;;;;;;;;;;;;;AAwBnE,MAAa,4BAA4B,UAA+C;AACpF,KAAI,MAAM,UAAU,EAChB;AAGJ,MAAK,IAAI,IAAI,MAAM,SAAS,GAAG,KAAK,GAAG,IACnC,KAAI,MAAM,OAAO,OACb,QAAO,MAAM;;;;;;;;;;;;;;;;;;;;;;AA0BzB,MAAa,uBACT,SACA,MACA,UACgB;AAChB,QAAO,QAAQ,QAAQ,MAAM;EACzB,MAAM,KAAK,MAAM,EAAE,MAAM;AACzB,MAAI,KAAK,QAAQ,UAAa,KAAK,KAAK,IACpC,QAAO;AAEX,MAAI,KAAK,QAAQ,UAAa,KAAK,KAAK,IACpC,QAAO;AAEX,MAAI,eAAe,IAAI,KAAK,QAAQ,CAChC,QAAO;AAEX,SAAO;GACT;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AA8BN,MAAa,sBAAsB,SAAwB,eAAyD;AAChH,KAAI,CAAC,QAAQ,OACT,QAAO,EAAE;AAEb,KAAI,eAAe,QACf,QAAO,CAAC,QAAQ,GAAG;AAEvB,KAAI,eAAe,OACf,QAAO,CAAC,QAAQ,QAAQ,SAAS,GAAG;AAExC,QAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AA4IX,MAAa,mBAAmB,OAAyC,WAA4B;AACjG,QAAO,MAAM,MAAM,MAAM;EACrB,MAAM,QAAQ,EAAE,QAAQ,UAAa,UAAU,EAAE;EACjD,MAAM,QAAQ,EAAE,QAAQ,UAAa,UAAU,EAAE;AACjD,SAAO,SAAS;GAClB;;;;;;;;;;;AC1VN,MAAa,iBAAiB,SAAyB;AACnD,QAAO,KAAK,QAAQ,YAAY,GAAG;;;;;;;;;;;AAavC,MAAa,wBAAwB,YACjC,QAAQ,SAAS,KAAK,GAAG,QAAQ,QAAQ,UAAU,KAAK,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AC+C/D,MAAa,0BAA0B,YAA4B;AAG/D,QAAO,QAAQ,QAAQ,+BAA+B,OAAO,OAAO,YAAY;AAC5E,MAAI,MACA,QAAO;AAEX,SAAO,KAAK;GACd;;;;;;;;;;AAeN,MAAMC,cAAsC;CAQxC,KAAK;CAUL,UAAU,CAAC,YAAY,IAAI,CAAC,KAAK,IAAI;CASrC,QAAQ;CAaR,MAAM;CAMN,MAAM,CAAC,SAAS,MAAM,CAAC,KAAK,IAAI;CAUhC,MAAM;CASN,OAAO;CAeP,MAAM;EAAC;EAAS;EAAW;EAAS;EAAQ;EAAU;EAAU;EAAS,CAAC,KAAK,IAAI;CAUnF,MAAM;CAUN,OAAO;CAMP,QAAQ;CACX;;;;;;;;;;;;AAkBD,MAAMC,mBAA2C,EAoB7C,UAAU,uBACb;;;;;;;;;AAUD,MAAM,oBAAoB,aAA6B;AACnD,QAAO,SAAS,QAAQ,mBAAmB,GAAG,cAAc;AACxD,SAAO,YAAY,cAAc,KAAK,UAAU;GAClD;;;;;;;;;;;;;;;;;;;;;;;;;;;AA4BN,MAAaC,iBAAyC;CAClD,GAAG;CAEH,GAAG,OAAO,YAAY,OAAO,QAAQ,iBAAiB,CAAC,KAAK,CAAC,GAAG,OAAO,CAAC,GAAG,iBAAiB,EAAE,CAAC,CAAC,CAAC;CACpG;;;;;;;;;;;AAYD,MAAM,2BAA2B;;;;;;;;;AAUjC,MAAM,qBAAqB;;;;;;;;;;;;;;;;AAiB3B,MAAa,kBAAkB,UAA2B;AACtD,oBAAmB,YAAY;AAC/B,QAAO,mBAAmB,KAAK,MAAM;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAoEzC,MAAa,4BAA4B,OAAe,mBAA+D;CACnH,MAAMC,eAAyB,EAAE;CAEjC,MAAM,oCAAoB,IAAI,KAAqB;;;;;CAMnD,MAAM,wBAAwB,aAA6B;EACvD,MAAM,QAAQ,kBAAkB,IAAI,SAAS,IAAI;AACjD,oBAAkB,IAAI,UAAU,QAAQ,EAAE;AAC1C,SAAO,UAAU,IAAI,WAAW,GAAG,SAAS,GAAG,QAAQ;;CAI3D,MAAMC,WAA6D,EAAE;CACrE,IAAI,YAAY;AAChB,0BAAyB,YAAY;CACrC,IAAIC;AAGJ,SAAQ,QAAQ,yBAAyB,KAAK,MAAM,MAAM,MAAM;AAE5D,MAAI,MAAM,QAAQ,UACd,UAAS,KAAK;GAAE,MAAM;GAAQ,OAAO,MAAM,MAAM,WAAW,MAAM,MAAM;GAAE,CAAC;AAG/E,WAAS,KAAK;GAAE,MAAM;GAAS,OAAO,MAAM;GAAI,CAAC;AACjD,cAAY,MAAM,QAAQ,MAAM,GAAG;;AAGvC,KAAI,YAAY,MAAM,OAClB,UAAS,KAAK;EAAE,MAAM;EAAQ,OAAO,MAAM,MAAM,UAAU;EAAE,CAAC;CAIlE,MAAM,iBAAiB,SAAS,KAAK,YAAY;AAC7C,MAAI,QAAQ,SAAS,QAAQ;AAEzB,OAAI,kBAAkB,kBAAkB,KAAK,QAAQ,MAAM,CACvD,QAAO,eAAe,QAAQ,MAAM;AAExC,UAAO,QAAQ;;AAInB,2BAAyB,YAAY;EACrC,MAAM,aAAa,yBAAyB,KAAK,QAAQ,MAAM;AAC/D,MAAI,CAAC,WACD,QAAO,QAAQ;EAGnB,MAAM,GAAG,WAAW,eAAe;AAGnC,MAAI,CAAC,aAAa,aAAa;GAC3B,MAAM,aAAa,qBAAqB,YAAY;AACpD,gBAAa,KAAK,WAAW;AAC7B,UAAO,MAAM,WAAW;;EAI5B,IAAI,eAAe,eAAe;AAClC,MAAI,CAAC,aAED,QAAO,QAAQ;AAInB,MAAI,eAEA,gBAAe,aACV,MAAM,IAAI,CACV,KAAK,SAAU,kBAAkB,KAAK,KAAK,GAAG,eAAe,KAAK,GAAG,KAAM,CAC3E,KAAK,IAAI;AAIlB,MAAI,aAAa;GACb,MAAM,aAAa,qBAAqB,YAAY;AACpD,gBAAa,KAAK,WAAW;AAC7B,UAAO,MAAM,WAAW,GAAG,aAAa;;AAI5C,SAAO;GACT;AAEF,QAAO;EACH;EACA,aAAa,aAAa,SAAS;EACnC,SAAS,eAAe,KAAK,GAAG;EACnC;;;;;;;;;;;;;;;;;;;;;AAsBL,MAAa,gBAAgB,UAAkB,yBAAyB,MAAM,CAAC;;;;;;;;;;;;;;;;;;;;;;AAuB/E,MAAa,mBAAmB,aAAqB;CACjD,MAAM,WAAW,aAAa,SAAS;AACvC,KAAI;AACA,SAAO,IAAI,OAAO,UAAU,IAAI;SAC5B;AACJ,SAAO;;;;;;;;;;;;;;;AAgBf,MAAa,2BAA2B,OAAO,KAAK,eAAe;;;;;;;;;;;;;;;AAgBnE,MAAa,mBAAmB,cAA0C,eAAe;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;ACpgBzF,MAAM,qBAAqB,YAA6B;AAEpD,QAAO,WAAW,KAAK,QAAQ;;;;;;;;;;;;;;;;;;;;;AAgCnC,MAAM,kBAAkB,SAAiB,UAAqC;CAK1E,MAAM,EAAE,SAAS,UAAU,iBAAiB,yBAH5B,uBAAuB,QAAQ,EAExB,QAAQ,2BAA2B,OACmC;AAC7F,QAAO;EAAE;EAAc,SAAS;EAAU;;;;;;;;;;;;;;;AA8B9C,MAAM,kBAAkB,SAA+B;CACnD,MAAMC,IAMF,EAAE,GAAG,MAAM;CAEf,MAAM,QAAS,KAA6B,SAAS;CACrD,IAAIC,kBAA4B,EAAE;;;;;;;;;CAUlC,MAAM,gBAAgB,YAA4B;AAC9C,MAAI;AACA,UAAO,IAAI,OAAO,SAAS,MAAM;WAC5B,OAAO;GACZ,MAAM,UAAU,iBAAiB,QAAQ,MAAM,UAAU,OAAO,MAAM;AACtE,SAAM,IAAI,MAAM,0BAA0B,QAAQ,aAAa,UAAU;;;AAKjF,KAAI,EAAE,iBAAiB,QAAQ;EAC3B,MAAM,YAAY,EAAE,gBAAgB,KAAK,MAAM,eAAe,GAAG,MAAM,CAAC;EACxE,MAAM,WAAW,UAAU,KAAK,MAAM,EAAE,QAAQ,CAAC,KAAK,IAAI;AAC1D,oBAAkB,UAAU,SAAS,MAAM,EAAE,aAAa;AAE1D,IAAE,QAAQ,OAAO,SAAS;AAC1B,SAAO;GACH,cAAc;GACd,OAAO,aAAa,EAAE,MAAM;GAC5B,aAAa;GACb,qBAAqB;GACxB;;AAGL,KAAI,EAAE,gBAAgB,QAAQ;EAC1B,MAAM,YAAY,EAAE,eAAe,KAAK,MAAM,eAAe,GAAG,MAAM,CAAC;EACvE,MAAM,WAAW,UAAU,KAAK,MAAM,EAAE,QAAQ,CAAC,KAAK,IAAI;AAC1D,oBAAkB,UAAU,SAAS,MAAM,EAAE,aAAa;AAC1D,IAAE,QAAQ,OAAO,SAAS;;AAE9B,KAAI,EAAE,cAAc,QAAQ;EACxB,MAAM,YAAY,EAAE,aAAa,KAAK,MAAM,eAAe,GAAG,MAAM,CAAC;EACrE,MAAM,WAAW,UAAU,KAAK,MAAM,EAAE,QAAQ,CAAC,KAAK,IAAI;AAC1D,oBAAkB,UAAU,SAAS,MAAM,EAAE,aAAa;AAC1D,IAAE,QAAQ,MAAM,SAAS;;AAE7B,KAAI,EAAE,UAAU;EAGZ,MAAM,EAAE,SAAS,iBAAiB,yBADlB,uBAAuB,EAAE,SAAS,CACiB;AACnE,IAAE,QAAQ;AACV,oBAAkB,CAAC,GAAG,iBAAiB,GAAG,aAAa;;AAG3D,KAAI,CAAC,EAAE,MACH,OAAM,IAAI,MACN,gHACH;CAGL,MAAM,cAAc,kBAAkB,EAAE,MAAM,IAAI,gBAAgB,SAAS;AAC3E,QAAO;EACH,cAAc;EACd,OAAO,aAAa,EAAE,MAAM;EAC5B;EACA,qBAAqB;EACxB;;;;;;;;;;;;;;;;;;;;;;AAsDL,MAAM,gBAAgB,UAAoF;CACtG,MAAMC,aAA6B,EAAE;CACrC,MAAMC,aAAuB,EAAE;CAC/B,IAAI,SAAS;CACb,MAAMC,QAAkB,EAAE;AAE1B,MAAK,IAAI,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;EACnC,MAAM,aAAa,qBAAqB,MAAM,GAAG,QAAQ;AACzD,aAAW,KAAK;GAAE,KAAK,SAAS,WAAW;GAAQ,IAAI,MAAM,GAAG;GAAI,OAAO;GAAQ,CAAC;AACpF,QAAM,KAAK,WAAW;AACtB,MAAI,IAAI,MAAM,SAAS,GAAG;AACtB,cAAW,KAAK,SAAS,WAAW,OAAO;AAC3C,aAAU,WAAW,SAAS;QAE9B,WAAU,WAAW;;;;;;;;;CAW7B,MAAM,gBAAgB,QAA0C;EAC5D,IAAI,KAAK;EACT,IAAI,KAAK,WAAW,SAAS;AAE7B,SAAO,MAAM,IAAI;GACb,MAAM,MAAO,KAAK,OAAQ;GAC1B,MAAM,IAAI,WAAW;AACrB,OAAI,MAAM,EAAE,MACR,MAAK,MAAM;YACJ,MAAM,EAAE,IACf,MAAK,MAAM;OAEX,QAAO;;AAIf,SAAO,WAAW,WAAW,SAAS;;AAG1C,QAAO;EACH,SAAS,MAAM,KAAK,KAAK;EACzB,iBAAiB;EACjB,SAAS;GACL;GACA,QAAQ,QAAgB,aAAa,IAAI,EAAE,MAAM;GACjD;GACA,SAAS,WAAW,KAAK,MAAM,EAAE,GAAG;GACvC;EACJ;;;;;;;;;;;AAiCL,MAAM,eAAe,SAAiB,OAAe,aAAsB,iBAA2B;CAClG,MAAMC,UAAyB,EAAE;AACjC,OAAM,YAAY;CAClB,IAAI,IAAI,MAAM,KAAK,QAAQ;AAE3B,QAAO,MAAM,MAAM;EACf,MAAMC,SAAsB;GAAE,KAAK,EAAE,QAAQ,EAAE,GAAG;GAAQ,OAAO,EAAE;GAAO;AAG1E,SAAO,gBAAgB,qBAAqB,EAAE,QAAQ,aAAa;AAGnE,MAAI,YACA,QAAO,WAAW,yBAAyB,EAAE;AAGjD,UAAQ,KAAK,OAAO;AAEpB,MAAI,EAAE,GAAG,WAAW,EAChB,OAAM;AAEV,MAAI,MAAM,KAAK,QAAQ;;AAG3B,QAAO;;;;;;;;;;;AAYX,MAAM,qBAAqB,aAAqB,WAAmB,iBAA2B;AAC1F,KAAI,aAAa,WAAW,EACxB,QAAO,EAAE;CAIb,IAAI,KAAK;CACT,IAAI,KAAK,aAAa;AACtB,QAAO,KAAK,IAAI;EACZ,MAAM,MAAO,KAAK,OAAQ;AAC1B,MAAI,aAAa,OAAO,YACpB,MAAK,MAAM;MAEX,MAAK;;CAKb,MAAMC,SAAmB,EAAE;AAC3B,MAAK,IAAI,IAAI,IAAI,IAAI,aAAa,UAAU,aAAa,KAAK,WAAW,IACrE,QAAO,KAAK,aAAa,KAAK,YAAY;AAE9C,QAAO;;;;;;;;;;;;;;;;AAiBX,MAAM,qBAAqB,SAAiB,aAAqB,eAAiC;AAE9F,KAAI,CAAC,WAAW,CAAC,QAAQ,SAAS,KAAK,CACnC,QAAO;CAIX,MAAM,gBAAgB,kBAAkB,aADtB,cAAc,QAAQ,QACwB,WAAW;AAG3E,KAAI,cAAc,WAAW,EACzB,QAAO;CAOX,MAAM,WAAW,IAAI,IAAI,cAAc;AACvC,QAAO,QAAQ,QAAQ,QAAQ,OAAO,WAAoB,SAAS,IAAI,OAAO,GAAG,MAAM,MAAO;;;;;;;;;;;;;;;;AAiBlG,MAAM,oBACF,UACA,OACA,mBACA,UACA,aACA,QACA,WACY;CACZ,MAAM,8BACF,gBACA,cACA,OACA,WACA,uBACA,wBACS;EACT,MAAM,iBAAiBC,UAAQ;AAE/B,MAD6BC,sBAAoB,MAAM,OAAO,GAAG,WAAW,IAAI,eAAe,CAAC,IACpE,iBAAiB,MAEzC,QAAOC,oBAAkB,iBAAiB,KAAKA,oBAAkB;AAIrE,OAAK,IAAI,UAAU,iBAAiB,GAAG,WAAW,cAAc,WAAW;GACvE,MAAM,SAASF,UAAQ;AAEvB,OADmBC,sBAAoB,MAAM,OAAO,GAAG,WAAW,IAAI,OAAO,CAAC,CAE1E,QAAOC,oBAAkB,WAAWA,oBAAkB;;AAG9D,SAAO;;CAIX,MAAM,UAAU,MAAM,KAAK,MAAM,EAAE,GAAG;CAGtC,MAAM,gBAAgB,IAAI,IAAI,QAAQ,KAAK,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;CAG9D,MAAM,kCAAkB,IAAI,KAA6B;AACzD,MAAK,IAAI,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;EACnC,MAAM,UAAU,kBAAkB;AAClC,kBAAgB,IAAI,MAAM,GAAG,IAAI;GAAE;GAAS,OAAO;GAAG,QAAQ,QAAQ;GAAQ,CAAC;;CAInF,MAAMC,oBAA8B,CAAC,EAAE;CACvC,IAAI,cAAc;AAClB,MAAK,IAAI,IAAI,GAAG,IAAI,QAAQ,QAAQ,KAAK;EACrC,MAAM,WAAW,gBAAgB,IAAI,QAAQ,GAAG;AAChD,iBAAe,WAAW,SAAS,SAAS;AAC5C,MAAI,IAAI,QAAQ,SAAS,EACrB,gBAAe;AAEnB,oBAAkB,KAAK,YAAY;;CAKvC,MAAM,oBAAoB,MAAc,eAAe,GAAG,MAAM,CAAC;CACjE,MAAM,sBAAsB,kBAAkB,aAAa,iBAAiB;CAE5E,MAAMC,SAAoB,EAAE;AAE5B,SAAQ,OAAO,kCAAkC;EAAE;EAAU,cAAc,SAAS;EAAQ,CAAC;AAE7F,MAAK,MAAM,WAAW,UAAU;EAC5B,MAAM,UAAU,cAAc,IAAI,QAAQ,KAAK,IAAI;EACnD,MAAM,QAAQ,QAAQ,OAAO,SAAa,cAAc,IAAI,QAAQ,GAAG,IAAI,UAAW;AAEtF,UAAQ,QAAQ,sBAAsB;GAClC,eAAe,QAAQ,QAAQ;GAC/B,gBAAgB,QAAQ,QAAQ,MAAM,GAAG,IAAI;GAC7C,MAAM,QAAQ;GACd;GACA,IAAI,QAAQ;GACZ;GACH,CAAC;EAGF,MAAM,eAAe,QAAQ,MAAM,QAAQ,QAAQ,QAAQ;EAG3D,MAAM,gBAAgB,oBAAoB,MAAM,OAC5C,uBAAuB,GAAG,YAAY,SAAS,SAAS,MAAM,CACjE;AAED,MAAI,eAAe,YAAY,CAAC,eAAe;AAC3C,WAAQ,QAAQ,sCAAsC;AAEtD,UAAO,KAAK,QAAQ;AACpB;;AAGJ,UAAQ,QAAQ,0DAA0D;EAM1E,IAAI,mBAAmB,QAAQ;EAC/B,IAAI,iBAAiB;EACrB,IAAI,eAAe;EACnB,IAAI,iBAAiB;EACrB,MAAM,gBAAgB;AAEtB,SAAO,kBAAkB,OAAO;AAC5B;AACA,OAAI,iBAAiB,eAAe;AAChC,YAAQ,QAAQ,oEAAoE,EAChF,gBAAgB,eACnB,CAAC;AACF,YAAQ,QAAQ,cAAc;KAC1B;KACA,wBAAwB,iBAAiB;KACzC;KACH,CAAC;AACF;;GAIJ,MAAM,gBAAgB,QAAQ,SAAS,QAAQ;AAE/C,WAAQ,QAAQ,kBAAkB;IAC9B;IACA,eAAe,QAAQ;IACvB;IACA,wBAAwB,iBAAiB;IACzC,yBAAyB,iBAAiB,MAAM,GAAG,GAAG;IACtD;IACA;IACA,UAAU,QAAQ;IACrB,CAAC;GAGF,MAAM,yBAAyB,oBAAoB,MAAM,OACrD,uBAAuB,GAAG,YAAY,SAAS,gBAAgB,MAAM,CACxE;AAGD,OAAI,iBAAiB,YAAY,CAAC,wBAAwB;AACtD,YAAQ,QAAQ,wDAAwD;IAExE,MAAM,WAAW,cACb,kBACA,QAAQ,iBACR,mBAAmB,QAAQ,QAAQ,SAAS,QAC5C,eAAe,QAAQ,OAAO,OACjC;AACD,QAAI,SACA,QAAO,KAAK,SAAS;AAEzB;;GAKJ,MAAM,gBAAgB,QAAQ;GAC9B,MAAM,kBAAkB,gBAAgB;GACxC,IAAI,eAAe;AACnB,QAAK,IAAI,IAAI,gBAAgB,KAAK,OAAO,IACrC,KAAI,QAAQ,MAAM,gBACd,gBAAe;OAEf;AAIR,WAAQ,QAAQ,sBAAsB;IAClC;IACA;IACA;IACA,iBAAiB,QAAQ;IAC5B,CAAC;GAIF,MAAM,sBAAsB,oBAAoB,MAAM,OAClD,uBAAuB,GAAG,YAAY,SAAS,gBAAgB,aAAa,CAC/E;GAED,IAAI,gBAAgB;AAEpB,OAAI,qBAAqB;AACrB,YAAQ,QAAQ,0DAA0D;AAE1E,oBAAgB,2BACZ,gBACA,cACA,OACA,SACA,qBACA,kBACH;AAED,YAAQ,QAAQ,4BAA4B,EAAE,eAAe,CAAC;;AAIlE,OAAI,iBAAiB,GAAG;IAEpB,MAAMC,gBAAmC;KACrC;KACA;KACA;KACA;KACA;KACH;AAED,YAAQ,QAAQ,2CAA2C;AAE3D,oBAAgB,kBAAkB,kBAAkB,gBAAgB,OAAO,cAAc,cAAc;AAEvG,YAAQ,QAAQ,0BAA0B,EAAE,eAAe,CAAC;;AAGhE,OAAI,iBAAiB,GAAG;AACpB,YAAQ,QAAQ,oDAAoD;AAIpE,QAAI,iBAAiB,gBAAgB;AACjC,aAAQ,QAAQ,oDAAoD;KAGpE,MAAM,cACF,kBAAkB,iBAAiB,OAAO,SACpC,iBAAiB,MACb,GACA,kBAAkB,iBAAiB,KAAK,kBAAkB,gBAC7D,GACD;KACV,MAAM,UAAU,cACZ,YAAY,MAAM,EAClB,QAAQ,iBACR,QACA,eAAe,QAAQ,OAAO,OACjC;AACD,SAAI,QACA,QAAO,KAAK,QAAQ;AAGxB,wBAAmB,iBAAiB,MAAM,YAAY,OAAO,CAAC,MAAM;AACpE;AACA,oBAAe;AAEf,aAAQ,QAAQ,qBAAqB;MACjC;MACA,wBAAwB,iBAAiB;MAC5C,CAAC;AAEF;;AAGJ,oBAAgB,kBAAkB,eAAe,KAAK,kBAAkB;AACxE,YAAQ,QAAQ,uDAAuD,EAAE,eAAe,CAAC;;GAG7F,MAAM,eAAe,iBAAiB,MAAM,GAAG,cAAc,CAAC,MAAM;AAEpE,WAAQ,QAAQ,mBAAmB;IAC/B;IACA,oBAAoB,aAAa;IACjC,qBAAqB,aAAa,MAAM,GAAG,GAAG;IACjD,CAAC;GAIF,MAAM,iBAAiB,eACjB,oBAAoB,cAAc,gBAAgB,OAAO,SAAS,gBAAgB,GAClF;GACN,MAAM,eAAe,eACf,kBAAkB,cAAc,gBAAgB,cAAc,SAAS,gBAAgB,GACvF;AAEN,WAAQ,QAAQ,uBAAuB;IACnC;IACA;IACA,iBAAiB,CAAC,CAAC;IACtB,CAAC;AAEF,OAAI,cAAc;IACd,MAAM,WAAW,cACb,cACA,QAAQ,iBACR,eAAe,iBAAiB,QAAQ,gBAAgB,QACxD,eAAe,QAAQ,OAAO,OACjC;AACD,QAAI,UAAU;AACV,YAAO,KAAK,SAAS;AAErB,aAAQ,QAAQ,mBAAmB;MAC/B,eAAe,SAAS,QAAQ;MAChC,MAAM,SAAS;MACf,IAAI,SAAS;MAChB,CAAC;;;GAKV,MAAM,sBAAsB,iBAAiB;AAC7C,sBAAmB,iBAAiB,MAAM,cAAc,CAAC,MAAM;AAE/D,WAAQ,QAAQ,kCAAkC;IAC9C,WAAW,iBAAiB;IAC5B,YAAY;IACZ,cAAc;IACjB,CAAC;AAGF,OAAI,CAAC,kBAAkB;AACnB,YAAQ,QAAQ,6CAA6C;AAC7D;;GAKJ,IAAI,cAAc;AAGlB,OAAI,oBAAoB,eAAe,KAAK,OAAO;IAC/C,MAAM,eAAe,gBAAgB,IAAI,QAAQ,eAAe,GAAG;AACnE,QAAI,cAAc;KACd,MAAM,aAAa,aAAa,QAAQ,MAAM,GAAG,KAAK,IAAI,IAAI,aAAa,OAAO,CAAC;AACnF,SAAI,cAAc,iBAAiB,WAAW,WAAW,EAAE;AACvD,oBAAc,eAAe;AAC7B,cAAQ,QAAQ,wCAAwC,EAAE,aAAa,aAAa,CAAC;;;;AAKjG,WAAQ,QAAQ,oBAAoB;IAChC;IACA,oBAAoB;IACpB,aAAa,gBAAgB;IAChC,CAAC;AAEF,oBAAiB;AACjB,kBAAe;;;AAIvB,SAAQ,OAAO,mCAAmC,EAAE,aAAa,OAAO,QAAQ,CAAC;AAEjF,QAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AA6CX,MAAa,gBAAgB,OAAe,YAA4C;CACpF,MAAM,EAAE,QAAQ,EAAE,EAAE,UAAU,aAAa,SAAS,UAAU,WAAW;AACzE,KAAI,CAAC,MAAM,OACP,QAAO,EAAE;CAGb,MAAM,EAAE,SAAS,cAAc,iBAAiB,mBAAmB,YAAY,aAAa,MAAM;CAClG,MAAMC,cAA4B,EAAE;AAGpC,MAAK,MAAM,QAAQ,OAAO;EACtB,MAAM,EAAE,OAAO,aAAa,cAAc,wBAAwB,eAAe,KAAK;EAOtF,MAAM,eAAe,mBAHM,oBAHR,YAAY,cAAc,OAAO,aAAa,aAAa,EAGnB,MAAM,QAAQ,MAAM,EAGnB,KAAK,WAAW;AAE5E,OAAK,MAAM,KAAK,cAAc;GAI1B,MAAM,oBAAoB,uBAAuB,EAAE,aAAa;GAChE,MAAM,eAAe,oBAAoB,EAAE,MAAM,EAAE,SAAU,SAAS,EAAE,QAAQ;AAEhF,eAAY,KAAK;IAEb,iBAAiB,oBAAoB,SAAY,EAAE;IAEnD,oBAAoB,oBAAoB,eAAe;IACvD,QAAQ,KAAK,SAAS,UAAU,OAAO,EAAE,QAAQ,EAAE;IACnD,MAAM,KAAK;IACX,eAAe,EAAE;IACpB,CAAC;;;CAMV,MAAM,0BAAU,IAAI,KAAyB;AAC7C,MAAK,MAAM,KAAK,aAAa;EACzB,MAAM,WAAW,QAAQ,IAAI,EAAE,MAAM;AACrC,MAAI,CAAC,SACD,SAAQ,IAAI,EAAE,OAAO,EAAE;WAKlB,EAAE,uBAAuB,UAAa,SAAS,uBAAuB,UACtE,EAAE,SAAS,UAAa,SAAS,SAAS,OAE3C,SAAQ,IAAI,EAAE,OAAO,EAAE;;CAInC,MAAM,SAAS,CAAC,GAAG,QAAQ,QAAQ,CAAC;AACpC,QAAO,MAAM,GAAG,MAAM,EAAE,QAAQ,EAAE,MAAM;CAGxC,IAAI,WAAW,cAAc,QAAQ,cAAc,SAAS,MAAM;AAIlE,KAAI,SAAS,WAAW,KAAK,MAAM,SAAS,GAAG;EAC3C,MAAM,YAAY,MAAM;EACxB,MAAM,WAAW,MAAM,MAAM,SAAS;EAGtC,MAAMC,aAAsB;GACxB,SAFe,kBAAkB,KAAK,KAAK,CAEvB,MAAM;GAC1B,MAAM,UAAU;GACnB;AACD,MAAI,SAAS,OAAO,UAAU,GAC1B,YAAW,KAAK,SAAS;AAE7B,MAAI,WAAW,QACX,YAAW,CAAC,WAAW;;AAK/B,KAAI,aAAa,UAAa,YAAY,KAAK,aAAa,OACxD,QAAO,iBAAiB,UAAU,OAAO,mBAAmB,UAAU,aAAa,QAAQ,OAAO;AAGtG,QAAO;;;;;;;;;;;;;;;;;AAkBX,MAAM,iBAAiB,aAA2B,SAAiB,SAAkB,UAAkC;;;;CAInH,MAAMC,mBACF,OACA,KACA,MACA,iBACA,eACA,uBACiB;EAEjB,MAAM,cAAc,SAAS,sBAAsB;EAGnD,MAAM,SAAS,QAAQ,MAAM,aAAa,IAAI;EAC9C,IAAI,OAAO,iBAAiB,MAAM,KAAK,qBAAqB,OAAO,MAAM,GAAG,OAAO,QAAQ,YAAY,GAAG;AAC1G,MAAI,CAAC,KACD,QAAO;AAEX,MAAI,CAAC,gBACD,QAAO,kBAAkB,MAAM,aAAa,QAAQ,WAAW;EAEnE,MAAM,OAAO,QAAQ,MAAM,YAAY;EACvC,MAAM,KAAK,kBAAkB,QAAQ,MAAM,MAAM,EAAE,GAAG,QAAQ,MAAM,cAAc,KAAK,SAAS,EAAE;EAClG,MAAMC,MAAe;GAAE,SAAS;GAAM;GAAM;AAC5C,MAAI,OAAO,KACP,KAAI,KAAK;AAEb,MAAI,QAAQ,cACR,KAAI,OAAO;GAAE,GAAG;GAAM,GAAG;GAAe;AAE5C,SAAO;;;;;CAMX,MAAM,sCAAiD;EACnD,MAAML,SAAoB,EAAE;AAC5B,OAAK,IAAI,IAAI,GAAG,IAAI,YAAY,QAAQ,KAAK;GACzC,MAAM,KAAK,YAAY;GACvB,MAAM,MAAM,IAAI,YAAY,SAAS,IAAI,YAAY,IAAI,GAAG,QAAQ,QAAQ;GAC5E,MAAM,IAAII,gBACN,GAAG,OACH,KACA,GAAG,MACH,GAAG,iBACH,GAAG,eACH,GAAG,mBACN;AACD,OAAI,EACA,QAAO,KAAK,EAAE;;AAGtB,SAAO;;CAGX,MAAME,WAAsB,EAAE;AAG9B,KAAI,CAAC,YAAY,QAAQ;AAErB,MAAI,gBAAgB,OADJ,QAAQ,MAAM,EAAE,CACG,EAAE;GACjC,MAAM,IAAIF,gBAAc,GAAG,QAAQ,OAAO;AAC1C,OAAI,EACA,UAAS,KAAK,EAAE;;AAGxB,SAAO;;AAIX,KAAI,YAAY,GAAG,QAAQ,GAEvB;MAAI,gBAAgB,OADJ,QAAQ,MAAM,EAAE,CACG,EAAE;GACjC,MAAM,IAAIA,gBAAc,GAAG,YAAY,GAAG,MAAM;AAChD,OAAI,EACA,UAAS,KAAK,EAAE;;;AAM5B,QAAO,CAAC,GAAG,UAAU,GAAG,+BAA+B,CAAC;;;;;;;;;;;;;;;;;ACx+B5D,MAAMG,uBAAiC;CACnC;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACH;;;;;AAMD,MAAM,yBAAmC;CACrC,MAAM,YAAY,oBAAoB;CACtC,MAAM,cAAc,qBAAqB,QAAQ,MAAM,UAAU,SAAS,EAAE,CAAC;CAC7E,MAAM,YAAY,UAAU,QAAQ,MAAM,CAAC,qBAAqB,SAAS,EAAE,CAAC,CAAC,MAAM;AACnF,QAAO,CAAC,GAAG,aAAa,GAAG,UAAU;;;;;;;;;;;;;;;;;AAkBzC,MAAa,uBAAuB,SAAoC;AACpE,KAAI,CAAC,KACD,QAAO,EAAE;CAGb,MAAMC,UAA6B,EAAE;CACrC,MAAMC,gBAAyC,EAAE;CAGjD,MAAM,qBAAqB,OAAe,QAAyB;AAC/D,SAAO,cAAc,MAChB,CAAC,GAAG,OAAQ,SAAS,KAAK,QAAQ,KAAO,MAAM,KAAK,OAAO,KAAO,SAAS,KAAK,OAAO,EAC3F;;AAIL,MAAK,MAAM,aAAa,kBAAkB,EAAE;EACxC,MAAM,UAAU,eAAe;AAC/B,MAAI,CAAC,QACD;AAGJ,MAAI;GAEA,MAAM,QAAQ,IAAI,OAAO,IAAI,QAAQ,IAAI,KAAK;GAC9C,IAAIC;AAGJ,WAAQ,QAAQ,MAAM,KAAK,KAAK,MAAM,MAAM;IACxC,MAAM,aAAa,MAAM;IACzB,MAAM,WAAW,aAAa,MAAM,GAAG;AAGvC,QAAI,kBAAkB,YAAY,SAAS,CACvC;AAGJ,YAAQ,KAAK;KAAE;KAAU,OAAO;KAAY,OAAO,MAAM;KAAI,OAAO;KAAW,CAAC;AAEhF,kBAAc,KAAK,CAAC,YAAY,SAAS,CAAC;;UAE1C;;AAGZ,QAAO,QAAQ,MAAM,GAAG,MAAM,EAAE,QAAQ,EAAE,MAAM;;;;;;;;;;;;;;;AAgBpD,MAAa,4BAA4B,MAAc,aAAwC;AAC3F,KAAI,CAAC,QAAQ,SAAS,WAAW,EAC7B,QAAO;CAKX,IAAI,WAAW;CACf,MAAM,oBAAoB,CAAC,GAAG,SAAS,CAAC,MAAM,GAAG,MAAM,EAAE,QAAQ,EAAE,MAAM;AAEzE,MAAK,MAAM,KAAK,kBACZ,YAAW,GAAG,SAAS,MAAM,GAAG,EAAE,MAAM,CAAC,IAAI,EAAE,MAAM,IAAI,SAAS,MAAM,EAAE,SAAS;AAGvF,QAAO;;;;;;;;AASX,MAAa,wBACT,aAC2F;CAE3F,MAAM,qBAAqB,SAAS,MAAM,MAAM;EAAC;EAAY;EAAS;EAAO;EAAO,CAAC,SAAS,EAAE,MAAM,CAAC;CAGvG,MAAM,qBAAqB,SAAS,MAAM,MAAM;EAAC;EAAS;EAAQ;EAAW,CAAC,SAAS,EAAE,MAAM,CAAC;AAGhG,KAAI,mBACA,QAAO;EACH,OAAO;EACP,UAAU,SAAS,MAAM,MAAM;GAAC;GAAS;GAAO;GAAO,CAAC,SAAS,EAAE,MAAM,CAAC,EAAE,SAAS;EACrF,aAAa;EAChB;AAIL,KAAI,mBACA,QAAO;EAAE,OAAO;EAAO,UAAU;EAAU,aAAa;EAAmB;AAI/E,QAAO;EAAE,OAAO;EAAO,aAAa;EAAmB;;;;;;;;AAS3D,MAAa,sBACT,SAOQ;CACR,MAAM,WAAW,oBAAoB,KAAK;AAE1C,KAAI,SAAS,WAAW,EACpB,QAAO;AAMX,QAAO;EAAE;EAAU,UAHF,yBAAyB,MAAM,SAAS;EAG5B,GAFd,qBAAqB,SAAS;EAEL"}
package/package.json CHANGED
@@ -7,7 +7,7 @@
7
7
  "devDependencies": {
8
8
  "@biomejs/biome": "2.3.8",
9
9
  "@types/bun": "^1.3.4",
10
- "tsdown": "^0.17.3",
10
+ "tsdown": "^0.17.4",
11
11
  "typescript": "^5.9.3"
12
12
  },
13
13
  "engines": {
@@ -49,5 +49,5 @@
49
49
  },
50
50
  "type": "module",
51
51
  "types": "./dist/index.d.mts",
52
- "version": "2.2.1"
52
+ "version": "2.2.2"
53
53
  }