flappa-doormal 2.5.1 → 2.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -0
- package/dist/index.d.mts +16 -1
- package/dist/index.d.mts.map +1 -1
- package/dist/index.mjs +104 -58
- package/dist/index.mjs.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -777,6 +777,23 @@ const rule = {
|
|
|
777
777
|
};
|
|
778
778
|
```
|
|
779
779
|
|
|
780
|
+
### Expanding composite tokens (for adding named captures)
|
|
781
|
+
|
|
782
|
+
Some tokens are **composites** (e.g. `{{numbered}}`), which are great for quick signatures but less convenient when you want to add named captures (e.g. capture the number).
|
|
783
|
+
|
|
784
|
+
You can expand composites back into their underlying template form:
|
|
785
|
+
|
|
786
|
+
```typescript
|
|
787
|
+
import { expandCompositeTokensInTemplate } from 'flappa-doormal';
|
|
788
|
+
|
|
789
|
+
const base = expandCompositeTokensInTemplate('{{numbered}}');
|
|
790
|
+
// base === '{{raqms}} {{dash}} '
|
|
791
|
+
|
|
792
|
+
// Now you can add a named capture:
|
|
793
|
+
const withCapture = base.replace('{{raqms}}', '{{raqms:num}}');
|
|
794
|
+
// withCapture === '{{raqms:num}} {{dash}} '
|
|
795
|
+
```
|
|
796
|
+
|
|
780
797
|
## Types
|
|
781
798
|
|
|
782
799
|
### `SplitRule`
|
package/dist/index.d.mts
CHANGED
|
@@ -864,6 +864,21 @@ declare const segmentPages: (pages: Page[], options: SegmentationOptions) => Seg
|
|
|
864
864
|
* // → '{{harf}}' (unchanged - no brackets outside tokens)
|
|
865
865
|
*/
|
|
866
866
|
declare const escapeTemplateBrackets: (pattern: string) => string;
|
|
867
|
+
/**
|
|
868
|
+
* Expands any *composite* tokens (like `{{numbered}}`) into their underlying template form
|
|
869
|
+
* (like `{{raqms}} {{dash}} `).
|
|
870
|
+
*
|
|
871
|
+
* This is useful when you want to take a signature produced by `analyzeCommonLineStarts()`
|
|
872
|
+
* and turn it into an editable template where you can add named captures, e.g.:
|
|
873
|
+
*
|
|
874
|
+
* - `{{numbered}}` → `{{raqms}} {{dash}} `
|
|
875
|
+
* - then: `{{raqms:num}} {{dash}} ` to capture the number
|
|
876
|
+
*
|
|
877
|
+
* Notes:
|
|
878
|
+
* - This only expands the plain `{{token}}` form (not `{{token:name}}`).
|
|
879
|
+
* - Expansion is repeated a few times to support nested composites.
|
|
880
|
+
*/
|
|
881
|
+
declare const expandCompositeTokensInTemplate: (template: string) => string;
|
|
867
882
|
/**
|
|
868
883
|
* Token definitions mapping human-readable token names to regex patterns.
|
|
869
884
|
*
|
|
@@ -1198,5 +1213,5 @@ declare const analyzeTextForRule: (text: string) => {
|
|
|
1198
1213
|
detected: DetectedPattern[];
|
|
1199
1214
|
} | null;
|
|
1200
1215
|
//#endregion
|
|
1201
|
-
export { type Breakpoint, type BreakpointRule, type CommonLineStartPattern, type DetectedPattern, type ExpandResult, type LineStartAnalysisOptions, type LineStartPatternExample, type Logger, type Page, type PageRange, type Segment, type SegmentationOptions, type SplitRule, TOKEN_PATTERNS, analyzeCommonLineStarts, analyzeTextForRule, containsTokens, detectTokenPatterns, escapeRegex, escapeTemplateBrackets, expandTokens, expandTokensWithCaptures, generateTemplateFromText, getAvailableTokens, getTokenPattern, makeDiacriticInsensitive, segmentPages, suggestPatternConfig, templateToRegex };
|
|
1216
|
+
export { type Breakpoint, type BreakpointRule, type CommonLineStartPattern, type DetectedPattern, type ExpandResult, type LineStartAnalysisOptions, type LineStartPatternExample, type Logger, type Page, type PageRange, type Segment, type SegmentationOptions, type SplitRule, TOKEN_PATTERNS, analyzeCommonLineStarts, analyzeTextForRule, containsTokens, detectTokenPatterns, escapeRegex, escapeTemplateBrackets, expandCompositeTokensInTemplate, expandTokens, expandTokensWithCaptures, generateTemplateFromText, getAvailableTokens, getTokenPattern, makeDiacriticInsensitive, segmentPages, suggestPatternConfig, templateToRegex };
|
|
1202
1217
|
//# sourceMappingURL=index.d.mts.map
|
package/dist/index.d.mts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.mts","names":[],"sources":["../src/segmentation/fuzzy.ts","../src/segmentation/types.ts","../src/segmentation/segmenter.ts","../src/segmentation/tokens.ts","../src/analysis.ts","../src/detection.ts"],"sourcesContent":[],"mappings":";;AAkEA;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA2FzB;AAAkD;AAgIlD;;;;;AAkBA;AAqCA;AA0EY,cD/bC,WC+bqB,EAAA,CAAA,CAAA,EAAA,MAAc,EAAA,GAAA,MAAA;AA8BhD;AAiDA;;;;;AA+HA;;;;ACjPA;;;;;;;;AC1ZA;
|
|
1
|
+
{"version":3,"file":"index.d.mts","names":[],"sources":["../src/segmentation/fuzzy.ts","../src/segmentation/types.ts","../src/segmentation/segmenter.ts","../src/segmentation/tokens.ts","../src/analysis.ts","../src/detection.ts"],"sourcesContent":[],"mappings":";;AAkEA;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA2FzB;AAAkD;AAgIlD;;;;;AAkBA;AAqCA;AA0EY,cD/bC,WC+bqB,EAAA,CAAA,CAAA,EAAA,MAAc,EAAA,GAAA,MAAA;AA8BhD;AAiDA;;;;;AA+HA;;;;ACjPA;;;;;;;;AC1ZA;AAuOA;AAsDA;AA2CA;AAWA;AAuKA;AA6CA;AAuBA;AAqBA;AAgBA;;;;ACnqBA;AAkEA;AAEA;AAuRA;AACW,cJ/LE,wBI+LF,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GAAA,MAAA;;;;AJ9RX;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA2FzB;AAAkD;AAgIlD;;;;;AAkBA,KApXK,YAAA,GAoXW;EAqCJ;EA0EA,KAAA,EAAA,MAAU;AA8BtB,CAAA;AAiDA;;;;;AA+HA;;;;ACjPA;;;;;;;;AC1ZA;AAuOA;AAsDA;AA2CA;AAWA;AAuKA;AA6CA,KFjjBK,eAAA,GEijBiF;EAuBzE;EAqBA,QAAA,EAAA,MAAA;AAgBb,CAAA;;;;ACnqBA;AAkEA;AAEA;AAuRA;;;;;;;;ACnVA;AA+EA;AAgEA;AAuBA;AAiCA;;;;;;;KJ3HK,qBAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAiCA,sBAAA;;;;;;;;;;;;;;;;;;;;;;;KAwBA,mBAAA;;;;;;;;;;;;;;KAeA,WAAA,GACC,eACA,kBACA,wBACA,yBACA;;;;;;;KAYD,aAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KA+EO,SAAA;;;;;;;KAYP,eAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAyCS;;;;;;;;;;;;SAaH;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KA8DC,SAAA,GAAY,cAAc,gBAAgB;;;;;;;;;;;;;KAkB1C,IAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAqCA,cAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAqCE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAqCF,UAAA,YAAsB;;;;;;;;;;;;;;;;;;;;;;;;;UA8BjB,MAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAiDL,mBAAA;;;;;;;;UAQA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;gBA8CM;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;WAwDL;;;;;;;;;;;;;;;;KAiBD,OAAA;;;;;;;;;;;;;;;;;;;;;;;;;;SA6BD;;;;;;AA1VX;AAqCA;AA0EA;AA8BA;AAiDA;;;;;AA+HA;;;;ACjPA;;;;;;;;AC1ZA;AAuOA;AAsDA;AA2CA;AAWA;AAuKA;AA6CA;AAuBA;AAqBA;AAgBA;;;;ACnqBA;AAkEA;AAEA;AAuRA;;;;;;;;ACnVA;AA+EA;AAgEA;AAuBA;AAiCA;;;;;cH2Qa,sBAAuB,iBAAiB,wBAAsB;;;;AF5Z3E;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA2FzB;AAAkD;AAgIlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;;AA+HA;;;;ACjPA;;;;;;;;AC1ZA;AAuOA;AAsDA;AA2CA;AAWA;AAuKA;AA6CA;AAuBA;AAqBA;AAgBA;;;;ACnqBA;AAkEA;AAEA;AAuRa,cD3RA,sBC8WZ,EAAA,CAAA,OAAA,EAAA,MAAA,EAAA,GAAA,MAAA;;;;;;;;ACtaD;AA+EA;AAgEA;AAuBA;AAiCA;;;cFwFa;;;;;;;;;;;;;;;;;;;;;;;;;;cAsDA,gBAAgB;;;;;;;;;;;;;;;;cA2ChB;;;;;;;KAWD,YAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;cAuKC,mHAIV;;;;;;;;;;;;;;;;;;;;cAyCU;;;;;;;;;;;;;;;;;;;;;;cAuBA,uCAAmC;;;;;;;;;;;;;cAqBnC;;;;;;;;;;;;;;;cAgBA;;;AHrmBA,KI9DD,wBAAA,GJ8D8E;EA+F7E;;;;ECnIR;EA4BA,aAAA,CAAA,EAAA,MAAe;EA8Bf;EAiCA,QAAA,CAAA,EAAA,MAAA;EAwBA;EAeA,WAAA,CAAA,EAAW,MAAA;EACV;;;;EAIA,wBAAA,CAAA,EAAA,OAAA;EAAmB;AAAA;AA2FzB;AAAkD;AAgIlD;;;;EAAqE,yBAAA,CAAA,EAAA,OAAA;EAkBzD;AAqCZ;AA0EA;AA8BA;AAiDA;;EAsDkB,MAAA,CAAA,EAAA,aAAA,GAAA,OAAA;EAwDL;;AAiBb;;;;ACjPA;;;;EAAkF,UAAA,CAAA,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,MAAA,EAAA,MAAA,EAAA,GAAA,OAAA;;;;AC1ZlF;AAuOA;AAsDA;AA2CA;AAWA;AAuKA;AA6CA;AAuBA;AAqBA;AAgBA;mBC3mBqB;;;AAxDrB;AAkEA;AAEA;AAuRA;EACW,UAAA,CAAA,EAAA,OAAA,GAAA,OAAA;CACE;AACV,KA5RS,uBAAA,GA4RT;EAAsB,IAAA,EAAA,MAAA;;;KA1Rb,sBAAA;EC5DA,OAAA,EAAA,MAAA;EA+EC,KAAA,EAAA,MAAA;EAgEA,QAAA,EDhFC,uBC+Fb,EAAA;AAQD,CAAA;AAiCA;;;;;;cD4Ia,iCACF,kBACE,6BACV;;;;AJhSH;AA+FA;;;;;ACnIiB;AA4BG;AA+Df,KI7GO,eAAA,GJ6Ge;EAwBtB;EAeA,KAAA,EAAA,MAAA;EACC;EACA,KAAA,EAAA,MAAA;EACA;EACA,KAAA,EAAA,MAAA;EACA;EAAmB,QAAA,EAAA,MAAA;AAAA,CAAA;AA2FzB;AAAkD;AAgIlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;AA8GmB,cInmBN,mBJmmBM,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GInmB6B,eJmmB7B,EAAA;AAiBnB;;;;ACjPA;;;;;;;;AC1ZA;AAuOa,cEhJA,wBF6JZ,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,QAAA,EE7JgE,eF6JhE,EAAA,EAAA,GAAA,MAAA;AAyCD;AA2CA;AAWA;AAuKA;AA6CA;AAuBA;AAqBa,cEreA,oBFqesD,EAAA,CAAA,QAAA,EEperD,eFoeqD,EAAA,EAAA,GAAA;EAgBtD,WAAA,EAAA,gBAAsF,GAAA,iBAAA;;;;ACnqBnG;AAkEA;AAEA;AAuRA;;;AAGG,cC/IU,kBD+IV,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GAAA;EAAsB,QAAA,EAAA,MAAA;;;;ECtVb,QAAA,EA8ME,eA9Ma,EAAA;AA+E3B,CAAA,GAAa,IAAA"}
|
package/dist/index.mjs
CHANGED
|
@@ -974,7 +974,7 @@ const escapeTemplateBrackets = (pattern) => {
|
|
|
974
974
|
return `\\${bracket}`;
|
|
975
975
|
});
|
|
976
976
|
};
|
|
977
|
-
const RUMUZ_ATOM = `(
|
|
977
|
+
const RUMUZ_ATOM = `(?:خت|خغ|بخ|عخ|مق|مت|عس|سي|سن|كن|مد|قد|خد|فد|دل|كد|غد|صد|دت|تم|فق|دق|[خرزيمنصسدفلتقع]|(?<![\\u0660-\\u0669])٤(?![\\u0660-\\u0669]))`;
|
|
978
978
|
const RUMUZ_BLOCK = `${RUMUZ_ATOM}(?:\\s+${RUMUZ_ATOM})*`;
|
|
979
979
|
const BASE_TOKENS = {
|
|
980
980
|
bab: "باب",
|
|
@@ -1012,6 +1012,31 @@ const BASE_TOKENS = {
|
|
|
1012
1012
|
*/
|
|
1013
1013
|
const COMPOSITE_TOKENS = { numbered: "{{raqms}} {{dash}} " };
|
|
1014
1014
|
/**
|
|
1015
|
+
* Expands any *composite* tokens (like `{{numbered}}`) into their underlying template form
|
|
1016
|
+
* (like `{{raqms}} {{dash}} `).
|
|
1017
|
+
*
|
|
1018
|
+
* This is useful when you want to take a signature produced by `analyzeCommonLineStarts()`
|
|
1019
|
+
* and turn it into an editable template where you can add named captures, e.g.:
|
|
1020
|
+
*
|
|
1021
|
+
* - `{{numbered}}` → `{{raqms}} {{dash}} `
|
|
1022
|
+
* - then: `{{raqms:num}} {{dash}} ` to capture the number
|
|
1023
|
+
*
|
|
1024
|
+
* Notes:
|
|
1025
|
+
* - This only expands the plain `{{token}}` form (not `{{token:name}}`).
|
|
1026
|
+
* - Expansion is repeated a few times to support nested composites.
|
|
1027
|
+
*/
|
|
1028
|
+
const expandCompositeTokensInTemplate = (template) => {
|
|
1029
|
+
let out = template;
|
|
1030
|
+
for (let i = 0; i < 10; i++) {
|
|
1031
|
+
const next = out.replace(/\{\{(\w+)\}\}/g, (m, tokenName) => {
|
|
1032
|
+
return COMPOSITE_TOKENS[tokenName] ?? m;
|
|
1033
|
+
});
|
|
1034
|
+
if (next === out) break;
|
|
1035
|
+
out = next;
|
|
1036
|
+
}
|
|
1037
|
+
return out;
|
|
1038
|
+
};
|
|
1039
|
+
/**
|
|
1015
1040
|
* Expands base tokens in a template string.
|
|
1016
1041
|
* Used internally to pre-expand composite tokens.
|
|
1017
1042
|
*
|
|
@@ -1092,6 +1117,73 @@ const containsTokens = (query) => {
|
|
|
1092
1117
|
SIMPLE_TOKEN_REGEX.lastIndex = 0;
|
|
1093
1118
|
return SIMPLE_TOKEN_REGEX.test(query);
|
|
1094
1119
|
};
|
|
1120
|
+
const splitTemplateIntoSegments = (query) => {
|
|
1121
|
+
const segments = [];
|
|
1122
|
+
let lastIndex = 0;
|
|
1123
|
+
TOKEN_WITH_CAPTURE_REGEX.lastIndex = 0;
|
|
1124
|
+
let match;
|
|
1125
|
+
while ((match = TOKEN_WITH_CAPTURE_REGEX.exec(query)) !== null) {
|
|
1126
|
+
if (match.index > lastIndex) segments.push({
|
|
1127
|
+
type: "text",
|
|
1128
|
+
value: query.slice(lastIndex, match.index)
|
|
1129
|
+
});
|
|
1130
|
+
segments.push({
|
|
1131
|
+
type: "token",
|
|
1132
|
+
value: match[0]
|
|
1133
|
+
});
|
|
1134
|
+
lastIndex = match.index + match[0].length;
|
|
1135
|
+
}
|
|
1136
|
+
if (lastIndex < query.length) segments.push({
|
|
1137
|
+
type: "text",
|
|
1138
|
+
value: query.slice(lastIndex)
|
|
1139
|
+
});
|
|
1140
|
+
return segments;
|
|
1141
|
+
};
|
|
1142
|
+
const maybeApplyFuzzyToText = (text, fuzzyTransform) => {
|
|
1143
|
+
if (fuzzyTransform && /[\u0600-\u06FF]/u.test(text)) return fuzzyTransform(text);
|
|
1144
|
+
return text;
|
|
1145
|
+
};
|
|
1146
|
+
const maybeApplyFuzzyToTokenPattern = (tokenPattern, fuzzyTransform) => {
|
|
1147
|
+
if (!fuzzyTransform) return tokenPattern;
|
|
1148
|
+
return tokenPattern.split("|").map((part) => /[\u0600-\u06FF]/u.test(part) ? fuzzyTransform(part) : part).join("|");
|
|
1149
|
+
};
|
|
1150
|
+
const parseTokenLiteral = (literal) => {
|
|
1151
|
+
TOKEN_WITH_CAPTURE_REGEX.lastIndex = 0;
|
|
1152
|
+
const tokenMatch = TOKEN_WITH_CAPTURE_REGEX.exec(literal);
|
|
1153
|
+
if (!tokenMatch) return null;
|
|
1154
|
+
const [, tokenName, captureName] = tokenMatch;
|
|
1155
|
+
return {
|
|
1156
|
+
captureName,
|
|
1157
|
+
tokenName
|
|
1158
|
+
};
|
|
1159
|
+
};
|
|
1160
|
+
const createCaptureRegistry = (capturePrefix) => {
|
|
1161
|
+
const captureNames = [];
|
|
1162
|
+
const captureNameCounts = /* @__PURE__ */ new Map();
|
|
1163
|
+
const register = (baseName) => {
|
|
1164
|
+
const count = captureNameCounts.get(baseName) ?? 0;
|
|
1165
|
+
captureNameCounts.set(baseName, count + 1);
|
|
1166
|
+
const uniqueName = count === 0 ? baseName : `${baseName}_${count + 1}`;
|
|
1167
|
+
const prefixedName = capturePrefix ? `${capturePrefix}${uniqueName}` : uniqueName;
|
|
1168
|
+
captureNames.push(prefixedName);
|
|
1169
|
+
return prefixedName;
|
|
1170
|
+
};
|
|
1171
|
+
return {
|
|
1172
|
+
captureNames,
|
|
1173
|
+
register
|
|
1174
|
+
};
|
|
1175
|
+
};
|
|
1176
|
+
const expandTokenLiteral = (literal, opts) => {
|
|
1177
|
+
const parsed = parseTokenLiteral(literal);
|
|
1178
|
+
if (!parsed) return literal;
|
|
1179
|
+
const { tokenName, captureName } = parsed;
|
|
1180
|
+
if (!tokenName && captureName) return `(?<${opts.registerCapture(captureName)}>.+)`;
|
|
1181
|
+
let tokenPattern = TOKEN_PATTERNS[tokenName];
|
|
1182
|
+
if (!tokenPattern) return literal;
|
|
1183
|
+
tokenPattern = maybeApplyFuzzyToTokenPattern(tokenPattern, opts.fuzzyTransform);
|
|
1184
|
+
if (captureName) return `(?<${opts.registerCapture(captureName)}>${tokenPattern})`;
|
|
1185
|
+
return tokenPattern;
|
|
1186
|
+
};
|
|
1095
1187
|
/**
|
|
1096
1188
|
* Expands template tokens with support for named captures.
|
|
1097
1189
|
*
|
|
@@ -1129,65 +1221,19 @@ const containsTokens = (query) => {
|
|
|
1129
1221
|
* // → { pattern: 'بَ?ا?بٌ?', captureNames: [], hasCaptures: false }
|
|
1130
1222
|
*/
|
|
1131
1223
|
const expandTokensWithCaptures = (query, fuzzyTransform, capturePrefix) => {
|
|
1132
|
-
const
|
|
1133
|
-
const
|
|
1134
|
-
/**
|
|
1135
|
-
* Gets a unique capture name, appending _2, _3, etc. for duplicates.
|
|
1136
|
-
* This prevents invalid regex with duplicate named groups.
|
|
1137
|
-
*/
|
|
1138
|
-
const getUniqueCaptureName = (baseName) => {
|
|
1139
|
-
const count = captureNameCounts.get(baseName) ?? 0;
|
|
1140
|
-
captureNameCounts.set(baseName, count + 1);
|
|
1141
|
-
return count === 0 ? baseName : `${baseName}_${count + 1}`;
|
|
1142
|
-
};
|
|
1143
|
-
const segments = [];
|
|
1144
|
-
let lastIndex = 0;
|
|
1145
|
-
TOKEN_WITH_CAPTURE_REGEX.lastIndex = 0;
|
|
1146
|
-
let match;
|
|
1147
|
-
while ((match = TOKEN_WITH_CAPTURE_REGEX.exec(query)) !== null) {
|
|
1148
|
-
if (match.index > lastIndex) segments.push({
|
|
1149
|
-
type: "text",
|
|
1150
|
-
value: query.slice(lastIndex, match.index)
|
|
1151
|
-
});
|
|
1152
|
-
segments.push({
|
|
1153
|
-
type: "token",
|
|
1154
|
-
value: match[0]
|
|
1155
|
-
});
|
|
1156
|
-
lastIndex = match.index + match[0].length;
|
|
1157
|
-
}
|
|
1158
|
-
if (lastIndex < query.length) segments.push({
|
|
1159
|
-
type: "text",
|
|
1160
|
-
value: query.slice(lastIndex)
|
|
1161
|
-
});
|
|
1224
|
+
const segments = splitTemplateIntoSegments(query);
|
|
1225
|
+
const registry = createCaptureRegistry(capturePrefix);
|
|
1162
1226
|
const processedParts = segments.map((segment) => {
|
|
1163
|
-
if (segment.type === "text")
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
if (!tokenMatch) return segment.value;
|
|
1170
|
-
const [, tokenName, captureName] = tokenMatch;
|
|
1171
|
-
if (!tokenName && captureName) {
|
|
1172
|
-
const uniqueName = getUniqueCaptureName(captureName);
|
|
1173
|
-
const prefixedName = capturePrefix ? `${capturePrefix}${uniqueName}` : uniqueName;
|
|
1174
|
-
captureNames.push(prefixedName);
|
|
1175
|
-
return `(?<${prefixedName}>.+)`;
|
|
1176
|
-
}
|
|
1177
|
-
let tokenPattern = TOKEN_PATTERNS[tokenName];
|
|
1178
|
-
if (!tokenPattern) return segment.value;
|
|
1179
|
-
if (fuzzyTransform) tokenPattern = tokenPattern.split("|").map((part) => /[\u0600-\u06FF]/.test(part) ? fuzzyTransform(part) : part).join("|");
|
|
1180
|
-
if (captureName) {
|
|
1181
|
-
const uniqueName = getUniqueCaptureName(captureName);
|
|
1182
|
-
const prefixedName = capturePrefix ? `${capturePrefix}${uniqueName}` : uniqueName;
|
|
1183
|
-
captureNames.push(prefixedName);
|
|
1184
|
-
return `(?<${prefixedName}>${tokenPattern})`;
|
|
1185
|
-
}
|
|
1186
|
-
return tokenPattern;
|
|
1227
|
+
if (segment.type === "text") return maybeApplyFuzzyToText(segment.value, fuzzyTransform);
|
|
1228
|
+
return expandTokenLiteral(segment.value, {
|
|
1229
|
+
capturePrefix,
|
|
1230
|
+
fuzzyTransform,
|
|
1231
|
+
registerCapture: registry.register
|
|
1232
|
+
});
|
|
1187
1233
|
});
|
|
1188
1234
|
return {
|
|
1189
|
-
captureNames,
|
|
1190
|
-
hasCaptures: captureNames.length > 0,
|
|
1235
|
+
captureNames: registry.captureNames,
|
|
1236
|
+
hasCaptures: registry.captureNames.length > 0,
|
|
1191
1237
|
pattern: processedParts.join("")
|
|
1192
1238
|
};
|
|
1193
1239
|
};
|
|
@@ -2449,5 +2495,5 @@ const analyzeTextForRule = (text) => {
|
|
|
2449
2495
|
};
|
|
2450
2496
|
|
|
2451
2497
|
//#endregion
|
|
2452
|
-
export { TOKEN_PATTERNS, analyzeCommonLineStarts, analyzeTextForRule, containsTokens, detectTokenPatterns, escapeRegex, escapeTemplateBrackets, expandTokens, expandTokensWithCaptures, generateTemplateFromText, getAvailableTokens, getTokenPattern, makeDiacriticInsensitive, segmentPages, suggestPatternConfig, templateToRegex };
|
|
2498
|
+
export { TOKEN_PATTERNS, analyzeCommonLineStarts, analyzeTextForRule, containsTokens, detectTokenPatterns, escapeRegex, escapeTemplateBrackets, expandCompositeTokensInTemplate, expandTokens, expandTokensWithCaptures, generateTemplateFromText, getAvailableTokens, getTokenPattern, makeDiacriticInsensitive, segmentPages, suggestPatternConfig, templateToRegex };
|
|
2453
2499
|
//# sourceMappingURL=index.mjs.map
|