flappa-doormal 2.3.0 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -967,7 +967,7 @@ type ExpandResult = {
967
967
  * expandTokensWithCaptures('{{bab}}', makeDiacriticInsensitive)
968
968
  * // → { pattern: 'بَ?ا?بٌ?', captureNames: [], hasCaptures: false }
969
969
  */
970
- declare const expandTokensWithCaptures: (query: string, fuzzyTransform?: (pattern: string) => string) => ExpandResult;
970
+ declare const expandTokensWithCaptures: (query: string, fuzzyTransform?: (pattern: string) => string, capturePrefix?: string) => ExpandResult;
971
971
  /**
972
972
  * Expands template tokens in a query string to their regex equivalents.
973
973
  *
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.mts","names":[],"sources":["../src/segmentation/fuzzy.ts","../src/segmentation/types.ts","../src/segmentation/segmenter.ts","../src/segmentation/textUtils.ts","../src/segmentation/tokens.ts","../src/pattern-detection.ts"],"sourcesContent":[],"mappings":";;AAkEA;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA2FzB;AAAkD;AA0GlD;;;;;AAkBA;AAqCA;AA0EY,cDzaC,WCyaqB,EAAA,CAAA,CAAA,EAAA,MAAc,EAAA,GAAA,MAAA;AA8BhD;AAiDA;;;;;AA+HA;;;;AC/SA;;;;;;;;ACpYA;AAcA;;;;ACgDA;AA6NA;AA2CA;AAWA;AA2DA;AAmHA;AAuBA;AAqBA;AAgBA;;;;ACrjBY,cLqJC,wBKrJc,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GAAA,MAAA;;;;ALsD3B;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA2FzB;AAAkD;AA0GlD;;;;;AAkBA,KA9VK,YAAA,GA8VW;EAqCJ;EA0EA,KAAA,EAAA,MAAU;AA8BtB,CAAA;AAiDA;;;;;AA+HA;;;;AC/SA;;;;;;;;ACpYA;AAcA;;;;ACgDA;AA6NA,KHvOK,eAAA,GG2OJ;EAuCY;EAWD,QAAA,EAAA,MAAY;AA2DxB,CAAA;AAmHA;AAuBA;AAqBA;AAgBA;;;;ACrjBA;AA0DA;AA4DA;AAuBA;AAiCA;;;;;;;;;;;;;;KJlGK,qBAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAiCA,sBAAA;;;;;;;;;;;;;;;;;;;;;;;KAwBA,mBAAA;;;;;;;;;;;;;;KAeA,WAAA,GACC,eACA,kBACA,wBACA,yBACA;;;;;;;KAYD,aAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KA+EO,SAAA;;;;;;;KAYP,eAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAyCS;;;;;;;;;;;;SAaH;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAwCC,SAAA,GAAY,cAAc,gBAAgB;;;;;;;;;;;;;KAkB1C,IAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAqCA,cAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAqCE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAqCF,UAAA,YAAsB;;;;;;;;;;;;;;;;;;;;;;;;;UA8BjB,MAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAiDL,mBAAA;;;;;;;;UAQA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;gBA8CM;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;WAwDL;;;;;;;;;;;;;;;;KAiBD,OAAA;;;;;;;;;;;;;;;;;;;;;;;;;;SA6BD;;;;;;;;;AEhtBX;AAcA;;;;ACgDA;AA6NA;AA2CA;AAWA;AA2DA;AAmHA;AAuBA;AAqBA;AAgBA;;;;ACrjBA;AA0DA;AA4DA;AAuBA;AAiCA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;cHgNa,sBAAuB,iBAAiB,wBAAsB;;;;AFxU3E;AA+FA;;;;ACnIK,cExBQ,aFwBI,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GAAA,MAAA;AAAA;AA4BG;AA8BM;AAiCC;AAwBH;;;;;AAoBlB,cEjJO,oBFiJP,EAAA,CAAA,OAAA,EAAA,MAAA,EAAA,GAAA,MAAA;;;;ADnGN;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA2FzB;AAAkD;AA0GlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;;AA+HA;;;;AC/SA;;;;;;;;ACpYA;AAcA;;;;ACgDA;AA6NA;AA2CA;AAWA;AA2DA;AAmHA;AAuBA;AAqBA;AAgBA;;;cA7fa;ACxDb;AA0DA;AA4DA;AAuBA;AAiCA;;;;;;;;;;;;;;;;;;;;;cDuGa,gBAAgB;;;;;;;;;;;;;;;;cA2ChB;;;;;;;KAWD,YAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;cA2DC,2FAA0F;;;;;;;;;;;;;;;;;;;;cAmH1F;;;;;;;;;;;;;;;;;;;;;;cAuBA,uCAAmC;;;;;;;;;;;;;cAqBnC;;;;;;;;;;;;;;;cAgBA;;;;AJ/fb;AA+FA;;;;;ACnIiB;AA4BG;AA+Df,KI7GO,eAAA,GJ6Ge;EAwBtB;EAeA,KAAA,EAAA,MAAA;EACC;EACA,KAAA,EAAA,MAAA;EACA;EACA,KAAA,EAAA,MAAA;EACA;EAAmB,QAAA,EAAA,MAAA;AAAA,CAAA;AA2FzB;AAAkD;AA0GlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;AA8GmB,cIlmBN,mBJkmBM,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GIlmBgC,eJkmBhC,EAAA;AAiBnB;;;;AC/SA;;;;;;;;ACpYA;AAca,cE8GA,wBF7GyD,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,QAAA,EE6GL,eF7GK,EAAA,EAAA,GAAA,MAAA;;;;AC+CtE;AA6NA;AA2CA;AAWY,cC9LC,oBD8LW,EAAA,CAAA,QAAA,EC7LV,eD6LU,EAAA,EAAA,GAAA;EA2DX,WAAA,EAAA,gBA8FZ,GAAA,iBA9FsG;EAmH1F,KAAA,EAAA,OAAA;EAuBA,QAAA,CAAA,EAAA,MAAA;AAqBb,CAAA;AAgBA;;;;ACrjBA;AA0DA;AA4Da,cAwDA,kBAzCZ,EAAA,CAfgE,IAAA,EAAA,MAAA,EAAA,GAAe;EAuBnE,QAAA,EAAA,MAAA;EAiCA,WAAA,EAAA,gBAmBZ,GAZa,iBAAe;;;YAAf"}
1
+ {"version":3,"file":"index.d.mts","names":[],"sources":["../src/segmentation/fuzzy.ts","../src/segmentation/types.ts","../src/segmentation/segmenter.ts","../src/segmentation/textUtils.ts","../src/segmentation/tokens.ts","../src/pattern-detection.ts"],"sourcesContent":[],"mappings":";;AAkEA;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA2FzB;AAAkD;AA0GlD;;;;;AAkBA;AAqCA;AA0EY,cDzaC,WCyaqB,EAAA,CAAA,CAAA,EAAA,MAAc,EAAA,GAAA,MAAA;AA8BhD;AAiDA;;;;;AA+HA;;;;AC/FA;;;;;;;;ACplBA;AAcA;;;;ACgDA;AA6NA;AA2CA;AAWA;AA2DA;AAyHA;AAuBA;AAqBA;AAgBA;;;;AC3jBY,cLqJC,wBKrJc,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GAAA,MAAA;;;;ALsD3B;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA2FzB;AAAkD;AA0GlD;;;;;AAkBA,KA9VK,YAAA,GA8VW;EAqCJ;EA0EA,KAAA,EAAA,MAAU;AA8BtB,CAAA;AAiDA;;;;;AA+HA;;;;AC/FA;;;;;;;;ACplBA;AAcA;;;;ACgDA;AA6NA,KHvOK,eAAA,GG2OJ;EAuCY;EAWD,QAAA,EAAA,MAAY;AA2DxB,CAAA;AAyHA;AAuBA;AAqBA;AAgBA;;;;AC3jBA;AA0DA;AA4DA;AAuBA;AAiCA;;;;;;;;;;;;;;KJlGK,qBAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAiCA,sBAAA;;;;;;;;;;;;;;;;;;;;;;;KAwBA,mBAAA;;;;;;;;;;;;;;KAeA,WAAA,GACC,eACA,kBACA,wBACA,yBACA;;;;;;;KAYD,aAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KA+EO,SAAA;;;;;;;KAYP,eAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAyCS;;;;;;;;;;;;SAaH;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAwCC,SAAA,GAAY,cAAc,gBAAgB;;;;;;;;;;;;;KAkB1C,IAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAqCA,cAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAqCE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAqCF,UAAA,YAAsB;;;;;;;;;;;;;;;;;;;;;;;;;UA8BjB,MAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAiDL,mBAAA;;;;;;;;UAQA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;gBA8CM;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;WAwDL;;;;;;;;;;;;;;;;KAiBD,OAAA;;;;;;;;;;;;;;;;;;;;;;;;;;SA6BD;;;;;;;;;AEhtBX;AAcA;;;;ACgDA;AA6NA;AA2CA;AAWA;AA2DA;AAyHA;AAuBA;AAqBA;AAgBA;;;;AC3jBA;AA0DA;AA4DA;AAuBA;AAiCA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;cHgaa,sBAAuB,iBAAiB,wBAAsB;;;;AFxhB3E;AA+FA;;;;ACnIK,cExBQ,aFwBI,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GAAA,MAAA;AAAA;AA4BG;AA8BM;AAiCC;AAwBH;;;;;AAoBlB,cEjJO,oBFiJP,EAAA,CAAA,OAAA,EAAA,MAAA,EAAA,GAAA,MAAA;;;;ADnGN;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA2FzB;AAAkD;AA0GlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;;AA+HA;;;;AC/FA;;;;;;;;ACplBA;AAcA;;;;ACgDA;AA6NA;AA2CA;AAWA;AA2DA;AAyHA;AAuBA;AAqBA;AAgBA;;;cAngBa;ACxDb;AA0DA;AA4DA;AAuBA;AAiCA;;;;;;;;;;;;;;;;;;;;;cDuGa,gBAAgB;;;;;;;;;;;;;;;;cA2ChB;;;;;;;KAWD,YAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;cA2DC,mHAIV;;;;;;;;;;;;;;;;;;;;cAqHU;;;;;;;;;;;;;;;;;;;;;;cAuBA,uCAAmC;;;;;;;;;;;;;cAqBnC;;;;;;;;;;;;;;;cAgBA;;;;AJrgBb;AA+FA;;;;;ACnIiB;AA4BG;AA+Df,KI7GO,eAAA,GJ6Ge;EAwBtB;EAeA,KAAA,EAAA,MAAA;EACC;EACA,KAAA,EAAA,MAAA;EACA;EACA,KAAA,EAAA,MAAA;EACA;EAAmB,QAAA,EAAA,MAAA;AAAA,CAAA;AA2FzB;AAAkD;AA0GlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;AA8GmB,cIlmBN,mBJkmBM,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GIlmBgC,eJkmBhC,EAAA;AAiBnB;;;;AC/FA;;;;;;;;ACplBA;AAca,cE8GA,wBF7GyD,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,QAAA,EE6GL,eF7GK,EAAA,EAAA,GAAA,MAAA;;;;AC+CtE;AA6NA;AA2CA;AAWY,cC9LC,oBD8LW,EAAA,CAAA,QAAA,EC7LV,eD6LU,EAAA,EAAA,GAAA;EA2DX,WAAA,EAAA,gBAoGZ,GAAA,iBAhGE;EAqHU,KAAA,EAAA,OAAA;EAuBA,QAAA,CAAA,EAAA,MAAA;AAqBb,CAAA;AAgBA;;;;AC3jBA;AA0DA;AA4Da,cAwDA,kBAzCZ,EAAA,CAfgE,IAAA,EAAA,MAAA,EAAA,GAAe;EAuBnE,QAAA,EAAA,MAAA;EAiCA,WAAA,EAAA,gBAmBZ,GAZa,iBAAe;;;YAAf"}
package/dist/index.mjs CHANGED
@@ -732,171 +732,6 @@ const applyBreakpoints = (segments, pages, normalizedContent, maxPages, breakpoi
732
732
  return result;
733
733
  };
734
734
 
735
- //#endregion
736
- //#region src/segmentation/match-utils.ts
737
- /**
738
- * Utility functions for regex matching and result processing.
739
- *
740
- * These functions were extracted from `segmenter.ts` to reduce complexity
741
- * and enable independent testing. They handle match filtering, capture
742
- * extraction, and occurrence-based selection.
743
- *
744
- * @module match-utils
745
- */
746
- /**
747
- * Extracts named capture groups from a regex match.
748
- *
749
- * Only includes groups that are in the `captureNames` list and have
750
- * defined values. This filters out positional captures and ensures
751
- * only explicitly requested named captures are returned.
752
- *
753
- * @param groups - The `match.groups` object from `RegExp.exec()`
754
- * @param captureNames - List of capture names to extract (from `{{token:name}}` syntax)
755
- * @returns Object with capture name → value pairs, or `undefined` if none found
756
- *
757
- * @example
758
- * const match = /(?<num>[٠-٩]+) -/.exec('٦٦٩٦ - text');
759
- * extractNamedCaptures(match.groups, ['num'])
760
- * // → { num: '٦٦٩٦' }
761
- *
762
- * @example
763
- * // No matching captures
764
- * extractNamedCaptures({}, ['num'])
765
- * // → undefined
766
- *
767
- * @example
768
- * // Undefined groups
769
- * extractNamedCaptures(undefined, ['num'])
770
- * // → undefined
771
- */
772
- const extractNamedCaptures = (groups, captureNames) => {
773
- if (!groups || captureNames.length === 0) return;
774
- const namedCaptures = {};
775
- for (const name of captureNames) if (groups[name] !== void 0) namedCaptures[name] = groups[name];
776
- return Object.keys(namedCaptures).length > 0 ? namedCaptures : void 0;
777
- };
778
- /**
779
- * Gets the last defined positional capture group from a match array.
780
- *
781
- * Used for `lineStartsAfter` patterns where the content capture (`.*`)
782
- * is always at the end of the pattern. Named captures may shift the
783
- * positional indices, so we iterate backward to find the actual content.
784
- *
785
- * @param match - RegExp exec result array
786
- * @returns The last defined capture group value, or `undefined` if none
787
- *
788
- * @example
789
- * // Pattern: ^(?:(?<num>[٠-٩]+) - )(.*)
790
- * // Match array: ['٦٦٩٦ - content', '٦٦٩٦', 'content']
791
- * getLastPositionalCapture(match)
792
- * // → 'content'
793
- *
794
- * @example
795
- * // No captures
796
- * getLastPositionalCapture(['full match'])
797
- * // → undefined
798
- */
799
- const getLastPositionalCapture = (match) => {
800
- if (match.length <= 1) return;
801
- for (let i = match.length - 1; i >= 1; i--) if (match[i] !== void 0) return match[i];
802
- };
803
- /**
804
- * Filters matches to only include those within page ID constraints.
805
- *
806
- * Applies the `min`, `max`, and `exclude` constraints from a rule to filter out
807
- * matches that occur on pages outside the allowed range or explicitly excluded.
808
- *
809
- * @param matches - Array of match results to filter
810
- * @param rule - Rule containing `min`, `max`, and/or `exclude` page constraints
811
- * @param getId - Function that returns the page ID for a given offset
812
- * @returns Filtered array containing only matches within constraints
813
- *
814
- * @example
815
- * const matches = [
816
- * { start: 0, end: 10 }, // Page 1
817
- * { start: 100, end: 110 }, // Page 5
818
- * { start: 200, end: 210 }, // Page 10
819
- * ];
820
- * filterByConstraints(matches, { min: 3, max: 8 }, getId)
821
- * // → [{ start: 100, end: 110 }] (only page 5 match)
822
- */
823
- const filterByConstraints = (matches, rule, getId) => {
824
- return matches.filter((m) => {
825
- const id = getId(m.start);
826
- if (rule.min !== void 0 && id < rule.min) return false;
827
- if (rule.max !== void 0 && id > rule.max) return false;
828
- if (isPageExcluded(id, rule.exclude)) return false;
829
- return true;
830
- });
831
- };
832
- /**
833
- * Filters matches based on occurrence setting (first, last, or all).
834
- *
835
- * Applies occurrence-based selection to a list of matches:
836
- * - `'all'` or `undefined`: Return all matches (default)
837
- * - `'first'`: Return only the first match
838
- * - `'last'`: Return only the last match
839
- *
840
- * @param matches - Array of match results to filter
841
- * @param occurrence - Which occurrence(s) to keep
842
- * @returns Filtered array based on occurrence setting
843
- *
844
- * @example
845
- * const matches = [{ start: 0 }, { start: 10 }, { start: 20 }];
846
- *
847
- * filterByOccurrence(matches, 'first')
848
- * // → [{ start: 0 }]
849
- *
850
- * filterByOccurrence(matches, 'last')
851
- * // → [{ start: 20 }]
852
- *
853
- * filterByOccurrence(matches, 'all')
854
- * // → [{ start: 0 }, { start: 10 }, { start: 20 }]
855
- *
856
- * filterByOccurrence(matches, undefined)
857
- * // → [{ start: 0 }, { start: 10 }, { start: 20 }] (default: all)
858
- */
859
- const filterByOccurrence = (matches, occurrence) => {
860
- if (!matches.length) return [];
861
- if (occurrence === "first") return [matches[0]];
862
- if (occurrence === "last") return [matches[matches.length - 1]];
863
- return matches;
864
- };
865
- /**
866
- * Checks if any rule in the list allows the given page ID.
867
- *
868
- * A rule allows an ID if it falls within the rule's `min`/`max` constraints.
869
- * Rules without constraints allow all page IDs.
870
- *
871
- * This is used to determine whether to create a segment for content
872
- * that appears before any split points (the "first segment").
873
- *
874
- * @param rules - Array of rules with optional `min` and `max` constraints
875
- * @param pageId - Page ID to check
876
- * @returns `true` if at least one rule allows the page ID
877
- *
878
- * @example
879
- * const rules = [
880
- * { min: 5, max: 10 }, // Allows pages 5-10
881
- * { min: 20 }, // Allows pages 20+
882
- * ];
883
- *
884
- * anyRuleAllowsId(rules, 7) // → true (first rule allows)
885
- * anyRuleAllowsId(rules, 3) // → false (no rule allows)
886
- * anyRuleAllowsId(rules, 25) // → true (second rule allows)
887
- *
888
- * @example
889
- * // Rules without constraints allow everything
890
- * anyRuleAllowsId([{}], 999) // → true
891
- */
892
- const anyRuleAllowsId = (rules, pageId) => {
893
- return rules.some((r) => {
894
- const minOk = r.min === void 0 || pageId >= r.min;
895
- const maxOk = r.max === void 0 || pageId <= r.max;
896
- return minOk && maxOk;
897
- });
898
- };
899
-
900
735
  //#endregion
901
736
  //#region src/segmentation/tokens.ts
902
737
  /**
@@ -1127,7 +962,7 @@ const containsTokens = (query) => {
1127
962
  * expandTokensWithCaptures('{{bab}}', makeDiacriticInsensitive)
1128
963
  * // → { pattern: 'بَ?ا?بٌ?', captureNames: [], hasCaptures: false }
1129
964
  */
1130
- const expandTokensWithCaptures = (query, fuzzyTransform) => {
965
+ const expandTokensWithCaptures = (query, fuzzyTransform, capturePrefix) => {
1131
966
  const captureNames = [];
1132
967
  const captureNameCounts = /* @__PURE__ */ new Map();
1133
968
  /**
@@ -1169,16 +1004,18 @@ const expandTokensWithCaptures = (query, fuzzyTransform) => {
1169
1004
  const [, tokenName, captureName] = tokenMatch;
1170
1005
  if (!tokenName && captureName) {
1171
1006
  const uniqueName = getUniqueCaptureName(captureName);
1172
- captureNames.push(uniqueName);
1173
- return `(?<${uniqueName}>.+)`;
1007
+ const prefixedName = capturePrefix ? `${capturePrefix}${uniqueName}` : uniqueName;
1008
+ captureNames.push(prefixedName);
1009
+ return `(?<${prefixedName}>.+)`;
1174
1010
  }
1175
1011
  let tokenPattern = TOKEN_PATTERNS[tokenName];
1176
1012
  if (!tokenPattern) return segment.value;
1177
1013
  if (fuzzyTransform) tokenPattern = tokenPattern.split("|").map((part) => /[\u0600-\u06FF]/.test(part) ? fuzzyTransform(part) : part).join("|");
1178
1014
  if (captureName) {
1179
1015
  const uniqueName = getUniqueCaptureName(captureName);
1180
- captureNames.push(uniqueName);
1181
- return `(?<${uniqueName}>${tokenPattern})`;
1016
+ const prefixedName = capturePrefix ? `${capturePrefix}${uniqueName}` : uniqueName;
1017
+ captureNames.push(prefixedName);
1018
+ return `(?<${prefixedName}>${tokenPattern})`;
1182
1019
  }
1183
1020
  return tokenPattern;
1184
1021
  });
@@ -1266,6 +1103,224 @@ const getAvailableTokens = () => Object.keys(TOKEN_PATTERNS);
1266
1103
  */
1267
1104
  const getTokenPattern = (tokenName) => TOKEN_PATTERNS[tokenName];
1268
1105
 
1106
+ //#endregion
1107
+ //#region src/segmentation/fast-fuzzy-prefix.ts
1108
+ /**
1109
+ * Fast-path fuzzy prefix matching for common Arabic line-start markers.
1110
+ *
1111
+ * This exists to avoid running expensive fuzzy-expanded regex alternations over
1112
+ * a giant concatenated string. Instead, we match only at known line-start
1113
+ * offsets and perform a small deterministic comparison:
1114
+ * - Skip Arabic diacritics in the CONTENT
1115
+ * - Treat common equivalence groups as equal (ا/آ/أ/إ, ة/ه, ى/ي)
1116
+ *
1117
+ * This module is intentionally conservative: it only supports "literal"
1118
+ * token patterns (plain text alternation via `|`), not general regex.
1119
+ */
1120
+ const isArabicDiacriticCode = (code) => code >= 1611 && code <= 1618;
1121
+ const equivKey = (ch) => {
1122
+ switch (ch) {
1123
+ case "آ":
1124
+ case "أ":
1125
+ case "إ": return "ا";
1126
+ case "ه": return "ة";
1127
+ case "ي": return "ى";
1128
+ default: return ch;
1129
+ }
1130
+ };
1131
+ /**
1132
+ * Match a fuzzy literal prefix at a given offset.
1133
+ *
1134
+ * - Skips diacritics in the content
1135
+ * - Applies equivalence groups on both content and literal
1136
+ *
1137
+ * @returns endOffset (exclusive) in CONTENT if matched; otherwise null.
1138
+ */
1139
+ const matchFuzzyLiteralPrefixAt = (content, offset, literal) => {
1140
+ let i = offset;
1141
+ while (i < content.length && isArabicDiacriticCode(content.charCodeAt(i))) i++;
1142
+ for (let j = 0; j < literal.length; j++) {
1143
+ const litCh = literal[j];
1144
+ while (i < content.length && isArabicDiacriticCode(content.charCodeAt(i))) i++;
1145
+ if (i >= content.length) return null;
1146
+ const cCh = content[i];
1147
+ if (equivKey(cCh) !== equivKey(litCh)) return null;
1148
+ i++;
1149
+ }
1150
+ while (i < content.length && isArabicDiacriticCode(content.charCodeAt(i))) i++;
1151
+ return i;
1152
+ };
1153
+ const isLiteralOnly = (s) => {
1154
+ return !/[\\[\]{}()^$.*+?]/.test(s);
1155
+ };
1156
+ const compileLiteralAlternation = (pattern) => {
1157
+ if (!pattern) return null;
1158
+ if (!isLiteralOnly(pattern)) return null;
1159
+ const alternatives = pattern.split("|").map((s) => s.trim()).filter(Boolean);
1160
+ if (!alternatives.length) return null;
1161
+ return { alternatives };
1162
+ };
1163
+ /**
1164
+ * Attempt to compile a fast fuzzy rule from a single-token pattern like `{{kitab}}`.
1165
+ * Returns null if not eligible.
1166
+ */
1167
+ const compileFastFuzzyTokenRule = (tokenTemplate) => {
1168
+ const m = tokenTemplate.match(/^\{\{(\w+)\}\}$/);
1169
+ if (!m) return null;
1170
+ const token = m[1];
1171
+ const tokenPattern = getTokenPattern(token);
1172
+ if (!tokenPattern) return null;
1173
+ const compiled = compileLiteralAlternation(tokenPattern);
1174
+ if (!compiled) return null;
1175
+ return {
1176
+ alternatives: compiled.alternatives,
1177
+ token
1178
+ };
1179
+ };
1180
+ /**
1181
+ * Try matching any alternative for a compiled token at a line-start offset.
1182
+ * Returns endOffset (exclusive) on match, else null.
1183
+ */
1184
+ const matchFastFuzzyTokenAt = (content, offset, compiled) => {
1185
+ for (const alt of compiled.alternatives) {
1186
+ const end = matchFuzzyLiteralPrefixAt(content, offset, alt);
1187
+ if (end !== null) return end;
1188
+ }
1189
+ return null;
1190
+ };
1191
+
1192
+ //#endregion
1193
+ //#region src/segmentation/match-utils.ts
1194
+ /**
1195
+ * Utility functions for regex matching and result processing.
1196
+ *
1197
+ * These functions were extracted from `segmenter.ts` to reduce complexity
1198
+ * and enable independent testing. They handle match filtering, capture
1199
+ * extraction, and occurrence-based selection.
1200
+ *
1201
+ * @module match-utils
1202
+ */
1203
+ /**
1204
+ * Extracts named capture groups from a regex match.
1205
+ *
1206
+ * Only includes groups that are in the `captureNames` list and have
1207
+ * defined values. This filters out positional captures and ensures
1208
+ * only explicitly requested named captures are returned.
1209
+ *
1210
+ * @param groups - The `match.groups` object from `RegExp.exec()`
1211
+ * @param captureNames - List of capture names to extract (from `{{token:name}}` syntax)
1212
+ * @returns Object with capture name → value pairs, or `undefined` if none found
1213
+ *
1214
+ * @example
1215
+ * const match = /(?<num>[٠-٩]+) -/.exec('٦٦٩٦ - text');
1216
+ * extractNamedCaptures(match.groups, ['num'])
1217
+ * // → { num: '٦٦٩٦' }
1218
+ *
1219
+ * @example
1220
+ * // No matching captures
1221
+ * extractNamedCaptures({}, ['num'])
1222
+ * // → undefined
1223
+ *
1224
+ * @example
1225
+ * // Undefined groups
1226
+ * extractNamedCaptures(undefined, ['num'])
1227
+ * // → undefined
1228
+ */
1229
+ const extractNamedCaptures = (groups, captureNames) => {
1230
+ if (!groups || captureNames.length === 0) return;
1231
+ const namedCaptures = {};
1232
+ for (const name of captureNames) if (groups[name] !== void 0) namedCaptures[name] = groups[name];
1233
+ return Object.keys(namedCaptures).length > 0 ? namedCaptures : void 0;
1234
+ };
1235
+ /**
1236
+ * Gets the last defined positional capture group from a match array.
1237
+ *
1238
+ * Used for `lineStartsAfter` patterns where the content capture (`.*`)
1239
+ * is always at the end of the pattern. Named captures may shift the
1240
+ * positional indices, so we iterate backward to find the actual content.
1241
+ *
1242
+ * @param match - RegExp exec result array
1243
+ * @returns The last defined capture group value, or `undefined` if none
1244
+ *
1245
+ * @example
1246
+ * // Pattern: ^(?:(?<num>[٠-٩]+) - )(.*)
1247
+ * // Match array: ['٦٦٩٦ - content', '٦٦٩٦', 'content']
1248
+ * getLastPositionalCapture(match)
1249
+ * // → 'content'
1250
+ *
1251
+ * @example
1252
+ * // No captures
1253
+ * getLastPositionalCapture(['full match'])
1254
+ * // → undefined
1255
+ */
1256
+ const getLastPositionalCapture = (match) => {
1257
+ if (match.length <= 1) return;
1258
+ for (let i = match.length - 1; i >= 1; i--) if (match[i] !== void 0) return match[i];
1259
+ };
1260
+ /**
1261
+ * Filters matches to only include those within page ID constraints.
1262
+ *
1263
+ * Applies the `min`, `max`, and `exclude` constraints from a rule to filter out
1264
+ * matches that occur on pages outside the allowed range or explicitly excluded.
1265
+ *
1266
+ * @param matches - Array of match results to filter
1267
+ * @param rule - Rule containing `min`, `max`, and/or `exclude` page constraints
1268
+ * @param getId - Function that returns the page ID for a given offset
1269
+ * @returns Filtered array containing only matches within constraints
1270
+ *
1271
+ * @example
1272
+ * const matches = [
1273
+ * { start: 0, end: 10 }, // Page 1
1274
+ * { start: 100, end: 110 }, // Page 5
1275
+ * { start: 200, end: 210 }, // Page 10
1276
+ * ];
1277
+ * filterByConstraints(matches, { min: 3, max: 8 }, getId)
1278
+ * // → [{ start: 100, end: 110 }] (only page 5 match)
1279
+ */
1280
+ const filterByConstraints = (matches, rule, getId) => {
1281
+ return matches.filter((m) => {
1282
+ const id = getId(m.start);
1283
+ if (rule.min !== void 0 && id < rule.min) return false;
1284
+ if (rule.max !== void 0 && id > rule.max) return false;
1285
+ if (isPageExcluded(id, rule.exclude)) return false;
1286
+ return true;
1287
+ });
1288
+ };
1289
+ /**
1290
+ * Checks if any rule in the list allows the given page ID.
1291
+ *
1292
+ * A rule allows an ID if it falls within the rule's `min`/`max` constraints.
1293
+ * Rules without constraints allow all page IDs.
1294
+ *
1295
+ * This is used to determine whether to create a segment for content
1296
+ * that appears before any split points (the "first segment").
1297
+ *
1298
+ * @param rules - Array of rules with optional `min` and `max` constraints
1299
+ * @param pageId - Page ID to check
1300
+ * @returns `true` if at least one rule allows the page ID
1301
+ *
1302
+ * @example
1303
+ * const rules = [
1304
+ * { min: 5, max: 10 }, // Allows pages 5-10
1305
+ * { min: 20 }, // Allows pages 20+
1306
+ * ];
1307
+ *
1308
+ * anyRuleAllowsId(rules, 7) // → true (first rule allows)
1309
+ * anyRuleAllowsId(rules, 3) // → false (no rule allows)
1310
+ * anyRuleAllowsId(rules, 25) // → true (second rule allows)
1311
+ *
1312
+ * @example
1313
+ * // Rules without constraints allow everything
1314
+ * anyRuleAllowsId([{}], 999) // → true
1315
+ */
1316
+ const anyRuleAllowsId = (rules, pageId) => {
1317
+ return rules.some((r) => {
1318
+ const minOk = r.min === void 0 || pageId >= r.min;
1319
+ const maxOk = r.max === void 0 || pageId <= r.max;
1320
+ return minOk && maxOk;
1321
+ });
1322
+ };
1323
+
1269
1324
  //#endregion
1270
1325
  //#region src/segmentation/rule-regex.ts
1271
1326
  /**
@@ -1319,39 +1374,42 @@ const compileRuleRegex = (pattern) => {
1319
1374
  *
1320
1375
  * Brackets `()[]` outside `{{tokens}}` are auto-escaped.
1321
1376
  */
1322
- const processPattern = (pattern, fuzzy) => {
1323
- const { pattern: expanded, captureNames } = expandTokensWithCaptures(escapeTemplateBrackets(pattern), fuzzy ? makeDiacriticInsensitive : void 0);
1377
+ const processPattern = (pattern, fuzzy, capturePrefix) => {
1378
+ const { pattern: expanded, captureNames } = expandTokensWithCaptures(escapeTemplateBrackets(pattern), fuzzy ? makeDiacriticInsensitive : void 0, capturePrefix);
1324
1379
  return {
1325
1380
  captureNames,
1326
1381
  pattern: expanded
1327
1382
  };
1328
1383
  };
1329
- const buildLineStartsAfterRegexSource = (patterns, fuzzy) => {
1330
- const processed = patterns.map((p) => processPattern(p, fuzzy));
1384
+ const buildLineStartsAfterRegexSource = (patterns, fuzzy, capturePrefix) => {
1385
+ const processed = patterns.map((p) => processPattern(p, fuzzy, capturePrefix));
1331
1386
  const union = processed.map((p) => p.pattern).join("|");
1387
+ const captureNames = processed.flatMap((p) => p.captureNames);
1388
+ const contentCapture = capturePrefix ? `(?<${capturePrefix}content>.*)` : "(.*)";
1389
+ if (capturePrefix) captureNames.push(`${capturePrefix}content`);
1332
1390
  return {
1333
- captureNames: processed.flatMap((p) => p.captureNames),
1334
- regex: `^(?:${union})(.*)`
1391
+ captureNames,
1392
+ regex: `^(?:${union})${contentCapture}`
1335
1393
  };
1336
1394
  };
1337
- const buildLineStartsWithRegexSource = (patterns, fuzzy) => {
1338
- const processed = patterns.map((p) => processPattern(p, fuzzy));
1395
+ const buildLineStartsWithRegexSource = (patterns, fuzzy, capturePrefix) => {
1396
+ const processed = patterns.map((p) => processPattern(p, fuzzy, capturePrefix));
1339
1397
  const union = processed.map((p) => p.pattern).join("|");
1340
1398
  return {
1341
1399
  captureNames: processed.flatMap((p) => p.captureNames),
1342
1400
  regex: `^(?:${union})`
1343
1401
  };
1344
1402
  };
1345
- const buildLineEndsWithRegexSource = (patterns, fuzzy) => {
1346
- const processed = patterns.map((p) => processPattern(p, fuzzy));
1403
+ const buildLineEndsWithRegexSource = (patterns, fuzzy, capturePrefix) => {
1404
+ const processed = patterns.map((p) => processPattern(p, fuzzy, capturePrefix));
1347
1405
  const union = processed.map((p) => p.pattern).join("|");
1348
1406
  return {
1349
1407
  captureNames: processed.flatMap((p) => p.captureNames),
1350
1408
  regex: `(?:${union})$`
1351
1409
  };
1352
1410
  };
1353
- const buildTemplateRegexSource = (template) => {
1354
- const { pattern, captureNames } = expandTokensWithCaptures(escapeTemplateBrackets(template));
1411
+ const buildTemplateRegexSource = (template, capturePrefix) => {
1412
+ const { pattern, captureNames } = expandTokensWithCaptures(escapeTemplateBrackets(template), void 0, capturePrefix);
1355
1413
  return {
1356
1414
  captureNames,
1357
1415
  regex: pattern
@@ -1363,12 +1421,12 @@ const determineUsesCapture = (regexSource, _captureNames) => hasCapturingGroup(r
1363
1421
  *
1364
1422
  * Behavior mirrors the previous implementation in `segmenter.ts`.
1365
1423
  */
1366
- const buildRuleRegex = (rule) => {
1424
+ const buildRuleRegex = (rule, capturePrefix) => {
1367
1425
  const s = { ...rule };
1368
1426
  const fuzzy = rule.fuzzy ?? false;
1369
1427
  let allCaptureNames = [];
1370
1428
  if (s.lineStartsAfter?.length) {
1371
- const { regex, captureNames } = buildLineStartsAfterRegexSource(s.lineStartsAfter, fuzzy);
1429
+ const { regex, captureNames } = buildLineStartsAfterRegexSource(s.lineStartsAfter, fuzzy, capturePrefix);
1372
1430
  allCaptureNames = captureNames;
1373
1431
  return {
1374
1432
  captureNames: allCaptureNames,
@@ -1378,17 +1436,17 @@ const buildRuleRegex = (rule) => {
1378
1436
  };
1379
1437
  }
1380
1438
  if (s.lineStartsWith?.length) {
1381
- const { regex, captureNames } = buildLineStartsWithRegexSource(s.lineStartsWith, fuzzy);
1439
+ const { regex, captureNames } = buildLineStartsWithRegexSource(s.lineStartsWith, fuzzy, capturePrefix);
1382
1440
  s.regex = regex;
1383
1441
  allCaptureNames = captureNames;
1384
1442
  }
1385
1443
  if (s.lineEndsWith?.length) {
1386
- const { regex, captureNames } = buildLineEndsWithRegexSource(s.lineEndsWith, fuzzy);
1444
+ const { regex, captureNames } = buildLineEndsWithRegexSource(s.lineEndsWith, fuzzy, capturePrefix);
1387
1445
  s.regex = regex;
1388
1446
  allCaptureNames = captureNames;
1389
1447
  }
1390
1448
  if (s.template) {
1391
- const { regex, captureNames } = buildTemplateRegexSource(s.template);
1449
+ const { regex, captureNames } = buildTemplateRegexSource(s.template, capturePrefix);
1392
1450
  s.regex = regex;
1393
1451
  allCaptureNames = [...allCaptureNames, ...captureNames];
1394
1452
  }
@@ -1544,9 +1602,120 @@ const ensureFallbackSegment = (segments, pages, normalizedContent, pageJoiner) =
1544
1602
  return [initialSeg];
1545
1603
  };
1546
1604
  const collectSplitPointsFromRules = (rules, matchContent, pageMap) => {
1547
- const collectSplitPointsFromRule = (rule) => {
1605
+ const combinableRules = [];
1606
+ const standaloneRules = [];
1607
+ const fastFuzzyRules = [];
1608
+ rules.forEach((rule, index) => {
1609
+ let isCombinable = true;
1610
+ if (rule.fuzzy && "lineStartsWith" in rule && Array.isArray(rule.lineStartsWith)) {
1611
+ const compiled = rule.lineStartsWith.length === 1 ? compileFastFuzzyTokenRule(rule.lineStartsWith[0]) : null;
1612
+ if (compiled) {
1613
+ fastFuzzyRules.push({
1614
+ compiled,
1615
+ rule,
1616
+ ruleIndex: index
1617
+ });
1618
+ return;
1619
+ }
1620
+ }
1621
+ if ("regex" in rule && rule.regex) {
1622
+ const hasNamedCaptures = extractNamedCaptureNames(rule.regex).length > 0;
1623
+ const hasBackreferences = /\\[1-9]/.test(rule.regex);
1624
+ const hasAnonymousCaptures = hasCapturingGroup(rule.regex);
1625
+ if (hasNamedCaptures || hasBackreferences || hasAnonymousCaptures) isCombinable = false;
1626
+ }
1627
+ if (isCombinable) combinableRules.push({
1628
+ index,
1629
+ prefix: `r${index}_`,
1630
+ rule
1631
+ });
1632
+ else standaloneRules.push(rule);
1633
+ });
1634
+ const splitPointsByRule = /* @__PURE__ */ new Map();
1635
+ if (fastFuzzyRules.length > 0) {
1636
+ let boundaryIdx = 0;
1637
+ let currentBoundary = pageMap.boundaries[boundaryIdx];
1638
+ const advanceBoundaryTo = (offset) => {
1639
+ while (currentBoundary && offset > currentBoundary.end && boundaryIdx < pageMap.boundaries.length - 1) {
1640
+ boundaryIdx++;
1641
+ currentBoundary = pageMap.boundaries[boundaryIdx];
1642
+ }
1643
+ };
1644
+ const recordSplitPoint = (ruleIndex, sp) => {
1645
+ if (!splitPointsByRule.has(ruleIndex)) splitPointsByRule.set(ruleIndex, []);
1646
+ splitPointsByRule.get(ruleIndex).push(sp);
1647
+ };
1648
+ for (let lineStart = 0; lineStart <= matchContent.length;) {
1649
+ advanceBoundaryTo(lineStart);
1650
+ const pageId = currentBoundary?.id ?? 0;
1651
+ if (lineStart >= matchContent.length) break;
1652
+ for (const { compiled, rule, ruleIndex } of fastFuzzyRules) {
1653
+ if (!((rule.min === void 0 || pageId >= rule.min) && (rule.max === void 0 || pageId <= rule.max) && !isPageExcluded(pageId, rule.exclude))) continue;
1654
+ const end = matchFastFuzzyTokenAt(matchContent, lineStart, compiled);
1655
+ if (end === null) continue;
1656
+ recordSplitPoint(ruleIndex, {
1657
+ index: (rule.split ?? "at") === "at" ? lineStart : end,
1658
+ meta: rule.meta
1659
+ });
1660
+ }
1661
+ const nextNl = matchContent.indexOf("\n", lineStart);
1662
+ if (nextNl === -1) break;
1663
+ lineStart = nextNl + 1;
1664
+ }
1665
+ }
1666
+ if (combinableRules.length > 0) {
1667
+ const ruleRegexes = combinableRules.map(({ rule, prefix }) => {
1668
+ const built = buildRuleRegex(rule, prefix);
1669
+ return {
1670
+ prefix,
1671
+ source: `(?<${prefix}>${built.regex.source})`,
1672
+ ...built
1673
+ };
1674
+ });
1675
+ const combinedSource = ruleRegexes.map((r) => r.source).join("|");
1676
+ const combinedRegex = new RegExp(combinedSource, "gm");
1677
+ combinedRegex.lastIndex = 0;
1678
+ let m = combinedRegex.exec(matchContent);
1679
+ while (m !== null) {
1680
+ const matchedRuleIndex = combinableRules.findIndex(({ prefix }) => m?.groups?.[prefix] !== void 0);
1681
+ if (matchedRuleIndex !== -1) {
1682
+ const { rule, prefix, index: originalIndex } = combinableRules[matchedRuleIndex];
1683
+ const ruleInfo = ruleRegexes[matchedRuleIndex];
1684
+ const namedCaptures = {};
1685
+ if (m.groups) {
1686
+ for (const prefixedName of ruleInfo.captureNames) if (m.groups[prefixedName] !== void 0) {
1687
+ const cleanName = prefixedName.slice(prefix.length);
1688
+ namedCaptures[cleanName] = m.groups[prefixedName];
1689
+ }
1690
+ }
1691
+ let capturedContent;
1692
+ let contentStartOffset;
1693
+ if (ruleInfo.usesLineStartsAfter) {
1694
+ capturedContent = m.groups?.[`${prefix}content`];
1695
+ if (capturedContent !== void 0) contentStartOffset = (m.groups?.[prefix] || m[0]).length - capturedContent.length;
1696
+ }
1697
+ const start = m.index;
1698
+ const end = m.index + m[0].length;
1699
+ const pageId = pageMap.getId(start);
1700
+ if ((rule.min === void 0 || pageId >= rule.min) && (rule.max === void 0 || pageId <= rule.max) && !isPageExcluded(pageId, rule.exclude)) {
1701
+ const sp = {
1702
+ capturedContent: void 0,
1703
+ contentStartOffset,
1704
+ index: (rule.split ?? "at") === "at" ? start : end,
1705
+ meta: rule.meta,
1706
+ namedCaptures: Object.keys(namedCaptures).length > 0 ? namedCaptures : void 0
1707
+ };
1708
+ if (!splitPointsByRule.has(originalIndex)) splitPointsByRule.set(originalIndex, []);
1709
+ splitPointsByRule.get(originalIndex).push(sp);
1710
+ }
1711
+ }
1712
+ if (m[0].length === 0) combinedRegex.lastIndex++;
1713
+ m = combinedRegex.exec(matchContent);
1714
+ }
1715
+ }
1716
+ const collectSplitPointsFromRule = (rule, ruleIndex) => {
1548
1717
  const { regex, usesCapture, captureNames, usesLineStartsAfter } = buildRuleRegex(rule);
1549
- return filterByOccurrence(filterByConstraints(findMatches(matchContent, regex, usesCapture, captureNames), rule, pageMap.getId), rule.occurrence).map((m) => {
1718
+ const points = filterByConstraints(findMatches(matchContent, regex, usesCapture, captureNames), rule, pageMap.getId).map((m) => {
1550
1719
  const isLineStartsAfter = usesLineStartsAfter && m.captured !== void 0;
1551
1720
  const markerLength = isLineStartsAfter ? m.end - m.captured.length - m.start : 0;
1552
1721
  return {
@@ -1557,8 +1726,22 @@ const collectSplitPointsFromRules = (rules, matchContent, pageMap) => {
1557
1726
  namedCaptures: m.namedCaptures
1558
1727
  };
1559
1728
  });
1729
+ if (!splitPointsByRule.has(ruleIndex)) splitPointsByRule.set(ruleIndex, []);
1730
+ splitPointsByRule.get(ruleIndex).push(...points);
1560
1731
  };
1561
- return rules.flatMap(collectSplitPointsFromRule);
1732
+ standaloneRules.forEach((rule) => {
1733
+ collectSplitPointsFromRule(rule, rules.indexOf(rule));
1734
+ });
1735
+ const finalSplitPoints = [];
1736
+ rules.forEach((rule, index) => {
1737
+ const points = splitPointsByRule.get(index);
1738
+ if (!points || points.length === 0) return;
1739
+ let filtered = points;
1740
+ if (rule.occurrence === "first") filtered = [points[0]];
1741
+ else if (rule.occurrence === "last") filtered = [points[points.length - 1]];
1742
+ finalSplitPoints.push(...filtered);
1743
+ });
1744
+ return finalSplitPoints;
1562
1745
  };
1563
1746
  /**
1564
1747
  * Executes a regex against content and extracts match results with capture information.