flappa-doormal 2.5.1 → 2.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -777,6 +777,23 @@ const rule = {
777
777
  };
778
778
  ```
779
779
 
780
+ ### Expanding composite tokens (for adding named captures)
781
+
782
+ Some tokens are **composites** (e.g. `{{numbered}}`), which are great for quick signatures but less convenient when you want to add named captures (e.g. capture the number).
783
+
784
+ You can expand composites back into their underlying template form:
785
+
786
+ ```typescript
787
+ import { expandCompositeTokensInTemplate } from 'flappa-doormal';
788
+
789
+ const base = expandCompositeTokensInTemplate('{{numbered}}');
790
+ // base === '{{raqms}} {{dash}} '
791
+
792
+ // Now you can add a named capture:
793
+ const withCapture = base.replace('{{raqms}}', '{{raqms:num}}');
794
+ // withCapture === '{{raqms:num}} {{dash}} '
795
+ ```
796
+
780
797
  ## Types
781
798
 
782
799
  ### `SplitRule`
package/dist/index.d.mts CHANGED
@@ -864,6 +864,21 @@ declare const segmentPages: (pages: Page[], options: SegmentationOptions) => Seg
864
864
  * // → '{{harf}}' (unchanged - no brackets outside tokens)
865
865
  */
866
866
  declare const escapeTemplateBrackets: (pattern: string) => string;
867
+ /**
868
+ * Expands any *composite* tokens (like `{{numbered}}`) into their underlying template form
869
+ * (like `{{raqms}} {{dash}} `).
870
+ *
871
+ * This is useful when you want to take a signature produced by `analyzeCommonLineStarts()`
872
+ * and turn it into an editable template where you can add named captures, e.g.:
873
+ *
874
+ * - `{{numbered}}` → `{{raqms}} {{dash}} `
875
+ * - then: `{{raqms:num}} {{dash}} ` to capture the number
876
+ *
877
+ * Notes:
878
+ * - This only expands the plain `{{token}}` form (not `{{token:name}}`).
879
+ * - Expansion is repeated a few times to support nested composites.
880
+ */
881
+ declare const expandCompositeTokensInTemplate: (template: string) => string;
867
882
  /**
868
883
  * Token definitions mapping human-readable token names to regex patterns.
869
884
  *
@@ -1198,5 +1213,5 @@ declare const analyzeTextForRule: (text: string) => {
1198
1213
  detected: DetectedPattern[];
1199
1214
  } | null;
1200
1215
  //#endregion
1201
- export { type Breakpoint, type BreakpointRule, type CommonLineStartPattern, type DetectedPattern, type ExpandResult, type LineStartAnalysisOptions, type LineStartPatternExample, type Logger, type Page, type PageRange, type Segment, type SegmentationOptions, type SplitRule, TOKEN_PATTERNS, analyzeCommonLineStarts, analyzeTextForRule, containsTokens, detectTokenPatterns, escapeRegex, escapeTemplateBrackets, expandTokens, expandTokensWithCaptures, generateTemplateFromText, getAvailableTokens, getTokenPattern, makeDiacriticInsensitive, segmentPages, suggestPatternConfig, templateToRegex };
1216
+ export { type Breakpoint, type BreakpointRule, type CommonLineStartPattern, type DetectedPattern, type ExpandResult, type LineStartAnalysisOptions, type LineStartPatternExample, type Logger, type Page, type PageRange, type Segment, type SegmentationOptions, type SplitRule, TOKEN_PATTERNS, analyzeCommonLineStarts, analyzeTextForRule, containsTokens, detectTokenPatterns, escapeRegex, escapeTemplateBrackets, expandCompositeTokensInTemplate, expandTokens, expandTokensWithCaptures, generateTemplateFromText, getAvailableTokens, getTokenPattern, makeDiacriticInsensitive, segmentPages, suggestPatternConfig, templateToRegex };
1202
1217
  //# sourceMappingURL=index.d.mts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.mts","names":[],"sources":["../src/segmentation/fuzzy.ts","../src/segmentation/types.ts","../src/segmentation/segmenter.ts","../src/segmentation/tokens.ts","../src/analysis.ts","../src/detection.ts"],"sourcesContent":[],"mappings":";;AAkEA;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA2FzB;AAAkD;AAgIlD;;;;;AAkBA;AAqCA;AA0EY,cD/bC,WC+bqB,EAAA,CAAA,CAAA,EAAA,MAAc,EAAA,GAAA,MAAA;AA8BhD;AAiDA;;;;;AA+HA;;;;ACjPA;;;;;;;;AC1ZA;AAgQA;AA2CA;AAWA;AA2DA;AAyHA;AAuBA;AAqBA;AAgBA;;;;ACtmBA;AAkEA;AAEA;AAuRA;;AAEa,cJhMA,wBIgMA,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GAAA,MAAA;;;;AJ/Rb;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA2FzB;AAAkD;AAgIlD;;;;;AAkBA,KApXK,YAAA,GAoXW;EAqCJ;EA0EA,KAAA,EAAA,MAAU;AA8BtB,CAAA;AAiDA;;;;;AA+HA;;;;ACjPA;;;;;;;;AC1ZA;AAgQA;AA2CA;AAWA;AA2DA;AAyHA;AAuBA,KF3gBK,eAAA,GEkhBJ;EAcY;EAgBA,QAAA,EAAA,MAAA;;;;ACtmBb;AAkEA;AAEA;AAuRA;;;;;;;;ACnVA;AA+EA;AAgEA;AAuBA;AAiCA;;;;;;;;KJ3HK,qBAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAiCA,sBAAA;;;;;;;;;;;;;;;;;;;;;;;KAwBA,mBAAA;;;;;;;;;;;;;;KAeA,WAAA,GACC,eACA,kBACA,wBACA,yBACA;;;;;;;KAYD,aAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KA+EO,SAAA;;;;;;;KAYP,eAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAyCS;;;;;;;;;;;;SAaH;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KA8DC,SAAA,GAAY,cAAc,gBAAgB;;;;;;;;;;;;;KAkB1C,IAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAqCA,cAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAqCE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAqCF,UAAA,YAAsB;;;;;;;;;;;;;;;;;;;;;;;;;UA8BjB,MAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAiDL,mBAAA;;;;;;;;UAQA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;gBA8CM;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;WAwDL;;;;;;;;;;;;;;;;KAiBD,OAAA;;;;;;;;;;;;;;;;;;;;;;;;;;SA6BD;;;;;;AA1VX;AAqCA;AA0EA;AA8BA;AAiDA;;;;;AA+HA;;;;ACjPA;;;;;;;;AC1ZA;AAgQA;AA2CA;AAWA;AA2DA;AAyHA;AAuBA;AAqBA;AAgBA;;;;ACtmBA;AAkEA;AAEA;AAuRA;;;;;;;;ACnVA;AA+EA;AAgEA;AAuBA;AAiCA;;;;;;cH2Qa,sBAAuB,iBAAiB,wBAAsB;;;;AF5Z3E;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA2FzB;AAAkD;AAgIlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;;AA+HA;;;;ACjPA;;;;;;;;AC1ZA;AAgQA;AA2CA;AAWA;AA2DA;AAyHA;AAuBA;AAqBA;AAgBA;;;;ACtmBA;AAkEA;AAEA;AAuRA;AACW,cD5RE,sBC4RF,EAAA,CAAA,OAAA,EAAA,MAAA,EAAA,GAAA,MAAA;;;;;;;ACpVX;AA+EA;AAgEA;AAuBA;AAiCA;;;;;;;;;;;;;;;cFiHa,gBAAgB;;;;;;;;;;;;;;;;cA2ChB;;;;;;;KAWD,YAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;cA2DC,mHAIV;;;;;;;;;;;;;;;;;;;;cAqHU;;;;;;;;;;;;;;;;;;;;;;cAuBA,uCAAmC;;;;;;;;;;;;;cAqBnC;;;;;;;;;;;;;;;cAgBA;;;AHxiBA,KI9DD,wBAAA,GJ8D8E;EA+F7E;;;;ECnIR;EA4BA,aAAA,CAAA,EAAA,MAAe;EA8Bf;EAiCA,QAAA,CAAA,EAAA,MAAA;EAwBA;EAeA,WAAA,CAAA,EAAW,MAAA;EACV;;;;EAIA,wBAAA,CAAA,EAAA,OAAA;EAAmB;AAAA;AA2FzB;AAAkD;AAgIlD;;;;EAAqE,yBAAA,CAAA,EAAA,OAAA;EAkBzD;AAqCZ;AA0EA;AA8BA;AAiDA;;EAsDkB,MAAA,CAAA,EAAA,aAAA,GAAA,OAAA;EAwDL;;AAiBb;;;;ACjPA;;;;EAAkF,UAAA,CAAA,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,MAAA,EAAA,MAAA,EAAA,GAAA,OAAA;;;;AC1ZlF;AAgQA;AA2CA;AAWA;AA2DA;AAyHA;AAuBA;AAqBA;AAgBA;;mBC9iBqB;;AAxDrB;AAkEA;AAEA;AAuRA;;EAEa,UAAA,CAAA,EAAA,OAAA,GAAA,OAAA;CACV;AAAsB,KA5Rb,uBAAA,GA4Ra;;;;ACtVb,KD4DA,sBAAA,GC5De;EA+Ed,OAAA,EAAA,MAAA;EAgEA,KAAA,EAAA,MAAA;EAuBA,QAAA,EDvGC,uBCgIb,EAxBa;AAgCd,CAAA;;;;;;;cD4Ia,iCACF,kBACE,6BACV;;;;AJhSH;AA+FA;;;;;ACnIiB;AA4BG;AA+Df,KI7GO,eAAA,GJ6Ge;EAwBtB;EAeA,KAAA,EAAA,MAAA;EACC;EACA,KAAA,EAAA,MAAA;EACA;EACA,KAAA,EAAA,MAAA;EACA;EAAmB,QAAA,EAAA,MAAA;AAAA,CAAA;AA2FzB;AAAkD;AAgIlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;AA8GmB,cInmBN,mBJmmBM,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GInmB6B,eJmmB7B,EAAA;AAiBnB;;;;ACjPA;;;;;;;;AC1ZA;AAgQa,cEzKA,wBFyKsB,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,QAAA,EEzK8B,eFyK9B,EAAA,EAAA,GAAA,MAAA;AA2CnC;AAWA;AA2DA;AAyHA;AAuBA;AAqBA;AAgBa,cExbA,oBFwbsF,EAAA,CAAA,QAAA,EEvbrF,eFubqF,EAAA,EAAA,GAAA;;;;ACtmBnG,CAAA;AAkEA;AAEA;AAuRA;;;;AAGyB,cC/IZ,kBD+IY,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GAAA;;;;ECtVb,QAAA,CAAA,EAAA,MAAA;EA+EC,QAAA,EA+HC,eA9Eb,EAAA;AAeD,CAAA,GAAa,IAAA"}
1
+ {"version":3,"file":"index.d.mts","names":[],"sources":["../src/segmentation/fuzzy.ts","../src/segmentation/types.ts","../src/segmentation/segmenter.ts","../src/segmentation/tokens.ts","../src/analysis.ts","../src/detection.ts"],"sourcesContent":[],"mappings":";;AAkEA;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA2FzB;AAAkD;AAgIlD;;;;;AAkBA;AAqCA;AA0EY,cD/bC,WC+bqB,EAAA,CAAA,CAAA,EAAA,MAAc,EAAA,GAAA,MAAA;AA8BhD;AAiDA;;;;;AA+HA;;;;ACjPA;;;;;;;;AC1ZA;AAuOA;AAsDA;AA2CA;AAWA;AAuKA;AA6CA;AAuBA;AAqBA;AAgBA;;;;ACnqBA;AAkEA;AAEA;AAuRA;AACW,cJ/LE,wBI+LF,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GAAA,MAAA;;;;AJ9RX;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA2FzB;AAAkD;AAgIlD;;;;;AAkBA,KApXK,YAAA,GAoXW;EAqCJ;EA0EA,KAAA,EAAA,MAAU;AA8BtB,CAAA;AAiDA;;;;;AA+HA;;;;ACjPA;;;;;;;;AC1ZA;AAuOA;AAsDA;AA2CA;AAWA;AAuKA;AA6CA,KFjjBK,eAAA,GEijBiF;EAuBzE;EAqBA,QAAA,EAAA,MAAA;AAgBb,CAAA;;;;ACnqBA;AAkEA;AAEA;AAuRA;;;;;;;;ACnVA;AA+EA;AAgEA;AAuBA;AAiCA;;;;;;;KJ3HK,qBAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAiCA,sBAAA;;;;;;;;;;;;;;;;;;;;;;;KAwBA,mBAAA;;;;;;;;;;;;;;KAeA,WAAA,GACC,eACA,kBACA,wBACA,yBACA;;;;;;;KAYD,aAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KA+EO,SAAA;;;;;;;KAYP,eAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAyCS;;;;;;;;;;;;SAaH;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KA8DC,SAAA,GAAY,cAAc,gBAAgB;;;;;;;;;;;;;KAkB1C,IAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAqCA,cAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAqCE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAqCF,UAAA,YAAsB;;;;;;;;;;;;;;;;;;;;;;;;;UA8BjB,MAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAiDL,mBAAA;;;;;;;;UAQA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;gBA8CM;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;WAwDL;;;;;;;;;;;;;;;;KAiBD,OAAA;;;;;;;;;;;;;;;;;;;;;;;;;;SA6BD;;;;;;AA1VX;AAqCA;AA0EA;AA8BA;AAiDA;;;;;AA+HA;;;;ACjPA;;;;;;;;AC1ZA;AAuOA;AAsDA;AA2CA;AAWA;AAuKA;AA6CA;AAuBA;AAqBA;AAgBA;;;;ACnqBA;AAkEA;AAEA;AAuRA;;;;;;;;ACnVA;AA+EA;AAgEA;AAuBA;AAiCA;;;;;cH2Qa,sBAAuB,iBAAiB,wBAAsB;;;;AF5Z3E;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA2FzB;AAAkD;AAgIlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;;AA+HA;;;;ACjPA;;;;;;;;AC1ZA;AAuOA;AAsDA;AA2CA;AAWA;AAuKA;AA6CA;AAuBA;AAqBA;AAgBA;;;;ACnqBA;AAkEA;AAEA;AAuRa,cD3RA,sBC8WZ,EAAA,CAAA,OAAA,EAAA,MAAA,EAAA,GAAA,MAAA;;;;;;;;ACtaD;AA+EA;AAgEA;AAuBA;AAiCA;;;cFwFa;;;;;;;;;;;;;;;;;;;;;;;;;;cAsDA,gBAAgB;;;;;;;;;;;;;;;;cA2ChB;;;;;;;KAWD,YAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;cAuKC,mHAIV;;;;;;;;;;;;;;;;;;;;cAyCU;;;;;;;;;;;;;;;;;;;;;;cAuBA,uCAAmC;;;;;;;;;;;;;cAqBnC;;;;;;;;;;;;;;;cAgBA;;;AHrmBA,KI9DD,wBAAA,GJ8D8E;EA+F7E;;;;ECnIR;EA4BA,aAAA,CAAA,EAAA,MAAe;EA8Bf;EAiCA,QAAA,CAAA,EAAA,MAAA;EAwBA;EAeA,WAAA,CAAA,EAAW,MAAA;EACV;;;;EAIA,wBAAA,CAAA,EAAA,OAAA;EAAmB;AAAA;AA2FzB;AAAkD;AAgIlD;;;;EAAqE,yBAAA,CAAA,EAAA,OAAA;EAkBzD;AAqCZ;AA0EA;AA8BA;AAiDA;;EAsDkB,MAAA,CAAA,EAAA,aAAA,GAAA,OAAA;EAwDL;;AAiBb;;;;ACjPA;;;;EAAkF,UAAA,CAAA,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,MAAA,EAAA,MAAA,EAAA,GAAA,OAAA;;;;AC1ZlF;AAuOA;AAsDA;AA2CA;AAWA;AAuKA;AA6CA;AAuBA;AAqBA;AAgBA;mBC3mBqB;;;AAxDrB;AAkEA;AAEA;AAuRA;EACW,UAAA,CAAA,EAAA,OAAA,GAAA,OAAA;CACE;AACV,KA5RS,uBAAA,GA4RT;EAAsB,IAAA,EAAA,MAAA;;;KA1Rb,sBAAA;EC5DA,OAAA,EAAA,MAAA;EA+EC,KAAA,EAAA,MAAA;EAgEA,QAAA,EDhFC,uBC+Fb,EAAA;AAQD,CAAA;AAiCA;;;;;;cD4Ia,iCACF,kBACE,6BACV;;;;AJhSH;AA+FA;;;;;ACnIiB;AA4BG;AA+Df,KI7GO,eAAA,GJ6Ge;EAwBtB;EAeA,KAAA,EAAA,MAAA;EACC;EACA,KAAA,EAAA,MAAA;EACA;EACA,KAAA,EAAA,MAAA;EACA;EAAmB,QAAA,EAAA,MAAA;AAAA,CAAA;AA2FzB;AAAkD;AAgIlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;AA8GmB,cInmBN,mBJmmBM,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GInmB6B,eJmmB7B,EAAA;AAiBnB;;;;ACjPA;;;;;;;;AC1ZA;AAuOa,cEhJA,wBF6JZ,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,QAAA,EE7JgE,eF6JhE,EAAA,EAAA,GAAA,MAAA;AAyCD;AA2CA;AAWA;AAuKA;AA6CA;AAuBA;AAqBa,cEreA,oBFqesD,EAAA,CAAA,QAAA,EEperD,eFoeqD,EAAA,EAAA,GAAA;EAgBtD,WAAA,EAAA,gBAAsF,GAAA,iBAAA;;;;ACnqBnG;AAkEA;AAEA;AAuRA;;;AAGG,cC/IU,kBD+IV,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GAAA;EAAsB,QAAA,EAAA,MAAA;;;;ECtVb,QAAA,EA8ME,eA9Ma,EAAA;AA+E3B,CAAA,GAAa,IAAA"}
package/dist/index.mjs CHANGED
@@ -974,7 +974,7 @@ const escapeTemplateBrackets = (pattern) => {
974
974
  return `\\${bracket}`;
975
975
  });
976
976
  };
977
- const RUMUZ_ATOM = `(?:خت|خغ|بخ|عخ|مق|مت|عس|سي|كن|مد|قد|خد|فد|دل|كد|غد|صد|دت|تم|فق|دق|[خرزيمنصسدفلتقع]|(?<![\\u0660-\\u0669])٤(?![\\u0660-\\u0669]))`;
977
+ const RUMUZ_ATOM = `(?:خت|خغ|بخ|عخ|مق|مت|عس|سي|سن|كن|مد|قد|خد|فد|دل|كد|غد|صد|دت|تم|فق|دق|[خرزيمنصسدفلتقع]|(?<![\\u0660-\\u0669])٤(?![\\u0660-\\u0669]))`;
978
978
  const RUMUZ_BLOCK = `${RUMUZ_ATOM}(?:\\s+${RUMUZ_ATOM})*`;
979
979
  const BASE_TOKENS = {
980
980
  bab: "باب",
@@ -1012,6 +1012,31 @@ const BASE_TOKENS = {
1012
1012
  */
1013
1013
  const COMPOSITE_TOKENS = { numbered: "{{raqms}} {{dash}} " };
1014
1014
  /**
1015
+ * Expands any *composite* tokens (like `{{numbered}}`) into their underlying template form
1016
+ * (like `{{raqms}} {{dash}} `).
1017
+ *
1018
+ * This is useful when you want to take a signature produced by `analyzeCommonLineStarts()`
1019
+ * and turn it into an editable template where you can add named captures, e.g.:
1020
+ *
1021
+ * - `{{numbered}}` → `{{raqms}} {{dash}} `
1022
+ * - then: `{{raqms:num}} {{dash}} ` to capture the number
1023
+ *
1024
+ * Notes:
1025
+ * - This only expands the plain `{{token}}` form (not `{{token:name}}`).
1026
+ * - Expansion is repeated a few times to support nested composites.
1027
+ */
1028
+ const expandCompositeTokensInTemplate = (template) => {
1029
+ let out = template;
1030
+ for (let i = 0; i < 10; i++) {
1031
+ const next = out.replace(/\{\{(\w+)\}\}/g, (m, tokenName) => {
1032
+ return COMPOSITE_TOKENS[tokenName] ?? m;
1033
+ });
1034
+ if (next === out) break;
1035
+ out = next;
1036
+ }
1037
+ return out;
1038
+ };
1039
+ /**
1015
1040
  * Expands base tokens in a template string.
1016
1041
  * Used internally to pre-expand composite tokens.
1017
1042
  *
@@ -1092,6 +1117,73 @@ const containsTokens = (query) => {
1092
1117
  SIMPLE_TOKEN_REGEX.lastIndex = 0;
1093
1118
  return SIMPLE_TOKEN_REGEX.test(query);
1094
1119
  };
1120
+ const splitTemplateIntoSegments = (query) => {
1121
+ const segments = [];
1122
+ let lastIndex = 0;
1123
+ TOKEN_WITH_CAPTURE_REGEX.lastIndex = 0;
1124
+ let match;
1125
+ while ((match = TOKEN_WITH_CAPTURE_REGEX.exec(query)) !== null) {
1126
+ if (match.index > lastIndex) segments.push({
1127
+ type: "text",
1128
+ value: query.slice(lastIndex, match.index)
1129
+ });
1130
+ segments.push({
1131
+ type: "token",
1132
+ value: match[0]
1133
+ });
1134
+ lastIndex = match.index + match[0].length;
1135
+ }
1136
+ if (lastIndex < query.length) segments.push({
1137
+ type: "text",
1138
+ value: query.slice(lastIndex)
1139
+ });
1140
+ return segments;
1141
+ };
1142
+ const maybeApplyFuzzyToText = (text, fuzzyTransform) => {
1143
+ if (fuzzyTransform && /[\u0600-\u06FF]/u.test(text)) return fuzzyTransform(text);
1144
+ return text;
1145
+ };
1146
+ const maybeApplyFuzzyToTokenPattern = (tokenPattern, fuzzyTransform) => {
1147
+ if (!fuzzyTransform) return tokenPattern;
1148
+ return tokenPattern.split("|").map((part) => /[\u0600-\u06FF]/u.test(part) ? fuzzyTransform(part) : part).join("|");
1149
+ };
1150
+ const parseTokenLiteral = (literal) => {
1151
+ TOKEN_WITH_CAPTURE_REGEX.lastIndex = 0;
1152
+ const tokenMatch = TOKEN_WITH_CAPTURE_REGEX.exec(literal);
1153
+ if (!tokenMatch) return null;
1154
+ const [, tokenName, captureName] = tokenMatch;
1155
+ return {
1156
+ captureName,
1157
+ tokenName
1158
+ };
1159
+ };
1160
+ const createCaptureRegistry = (capturePrefix) => {
1161
+ const captureNames = [];
1162
+ const captureNameCounts = /* @__PURE__ */ new Map();
1163
+ const register = (baseName) => {
1164
+ const count = captureNameCounts.get(baseName) ?? 0;
1165
+ captureNameCounts.set(baseName, count + 1);
1166
+ const uniqueName = count === 0 ? baseName : `${baseName}_${count + 1}`;
1167
+ const prefixedName = capturePrefix ? `${capturePrefix}${uniqueName}` : uniqueName;
1168
+ captureNames.push(prefixedName);
1169
+ return prefixedName;
1170
+ };
1171
+ return {
1172
+ captureNames,
1173
+ register
1174
+ };
1175
+ };
1176
+ const expandTokenLiteral = (literal, opts) => {
1177
+ const parsed = parseTokenLiteral(literal);
1178
+ if (!parsed) return literal;
1179
+ const { tokenName, captureName } = parsed;
1180
+ if (!tokenName && captureName) return `(?<${opts.registerCapture(captureName)}>.+)`;
1181
+ let tokenPattern = TOKEN_PATTERNS[tokenName];
1182
+ if (!tokenPattern) return literal;
1183
+ tokenPattern = maybeApplyFuzzyToTokenPattern(tokenPattern, opts.fuzzyTransform);
1184
+ if (captureName) return `(?<${opts.registerCapture(captureName)}>${tokenPattern})`;
1185
+ return tokenPattern;
1186
+ };
1095
1187
  /**
1096
1188
  * Expands template tokens with support for named captures.
1097
1189
  *
@@ -1129,65 +1221,19 @@ const containsTokens = (query) => {
1129
1221
  * // → { pattern: 'بَ?ا?بٌ?', captureNames: [], hasCaptures: false }
1130
1222
  */
1131
1223
  const expandTokensWithCaptures = (query, fuzzyTransform, capturePrefix) => {
1132
- const captureNames = [];
1133
- const captureNameCounts = /* @__PURE__ */ new Map();
1134
- /**
1135
- * Gets a unique capture name, appending _2, _3, etc. for duplicates.
1136
- * This prevents invalid regex with duplicate named groups.
1137
- */
1138
- const getUniqueCaptureName = (baseName) => {
1139
- const count = captureNameCounts.get(baseName) ?? 0;
1140
- captureNameCounts.set(baseName, count + 1);
1141
- return count === 0 ? baseName : `${baseName}_${count + 1}`;
1142
- };
1143
- const segments = [];
1144
- let lastIndex = 0;
1145
- TOKEN_WITH_CAPTURE_REGEX.lastIndex = 0;
1146
- let match;
1147
- while ((match = TOKEN_WITH_CAPTURE_REGEX.exec(query)) !== null) {
1148
- if (match.index > lastIndex) segments.push({
1149
- type: "text",
1150
- value: query.slice(lastIndex, match.index)
1151
- });
1152
- segments.push({
1153
- type: "token",
1154
- value: match[0]
1155
- });
1156
- lastIndex = match.index + match[0].length;
1157
- }
1158
- if (lastIndex < query.length) segments.push({
1159
- type: "text",
1160
- value: query.slice(lastIndex)
1161
- });
1224
+ const segments = splitTemplateIntoSegments(query);
1225
+ const registry = createCaptureRegistry(capturePrefix);
1162
1226
  const processedParts = segments.map((segment) => {
1163
- if (segment.type === "text") {
1164
- if (fuzzyTransform && /[\u0600-\u06FF]/.test(segment.value)) return fuzzyTransform(segment.value);
1165
- return segment.value;
1166
- }
1167
- TOKEN_WITH_CAPTURE_REGEX.lastIndex = 0;
1168
- const tokenMatch = TOKEN_WITH_CAPTURE_REGEX.exec(segment.value);
1169
- if (!tokenMatch) return segment.value;
1170
- const [, tokenName, captureName] = tokenMatch;
1171
- if (!tokenName && captureName) {
1172
- const uniqueName = getUniqueCaptureName(captureName);
1173
- const prefixedName = capturePrefix ? `${capturePrefix}${uniqueName}` : uniqueName;
1174
- captureNames.push(prefixedName);
1175
- return `(?<${prefixedName}>.+)`;
1176
- }
1177
- let tokenPattern = TOKEN_PATTERNS[tokenName];
1178
- if (!tokenPattern) return segment.value;
1179
- if (fuzzyTransform) tokenPattern = tokenPattern.split("|").map((part) => /[\u0600-\u06FF]/.test(part) ? fuzzyTransform(part) : part).join("|");
1180
- if (captureName) {
1181
- const uniqueName = getUniqueCaptureName(captureName);
1182
- const prefixedName = capturePrefix ? `${capturePrefix}${uniqueName}` : uniqueName;
1183
- captureNames.push(prefixedName);
1184
- return `(?<${prefixedName}>${tokenPattern})`;
1185
- }
1186
- return tokenPattern;
1227
+ if (segment.type === "text") return maybeApplyFuzzyToText(segment.value, fuzzyTransform);
1228
+ return expandTokenLiteral(segment.value, {
1229
+ capturePrefix,
1230
+ fuzzyTransform,
1231
+ registerCapture: registry.register
1232
+ });
1187
1233
  });
1188
1234
  return {
1189
- captureNames,
1190
- hasCaptures: captureNames.length > 0,
1235
+ captureNames: registry.captureNames,
1236
+ hasCaptures: registry.captureNames.length > 0,
1191
1237
  pattern: processedParts.join("")
1192
1238
  };
1193
1239
  };
@@ -2449,5 +2495,5 @@ const analyzeTextForRule = (text) => {
2449
2495
  };
2450
2496
 
2451
2497
  //#endregion
2452
- export { TOKEN_PATTERNS, analyzeCommonLineStarts, analyzeTextForRule, containsTokens, detectTokenPatterns, escapeRegex, escapeTemplateBrackets, expandTokens, expandTokensWithCaptures, generateTemplateFromText, getAvailableTokens, getTokenPattern, makeDiacriticInsensitive, segmentPages, suggestPatternConfig, templateToRegex };
2498
+ export { TOKEN_PATTERNS, analyzeCommonLineStarts, analyzeTextForRule, containsTokens, detectTokenPatterns, escapeRegex, escapeTemplateBrackets, expandCompositeTokensInTemplate, expandTokens, expandTokensWithCaptures, generateTemplateFromText, getAvailableTokens, getTokenPattern, makeDiacriticInsensitive, segmentPages, suggestPatternConfig, templateToRegex };
2453
2499
  //# sourceMappingURL=index.mjs.map