flappa-doormal 2.18.0 → 2.19.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -285,6 +285,7 @@ const rules = [{
285
285
  | `lineEndsWith` | ✅ Included | Match patterns at end of line |
286
286
  | `template` | Depends | Custom pattern with full control |
287
287
  | `regex` | Depends | Raw regex for complex cases |
288
+ | `dictionaryEntry` | ✅ Included | Serializable Arabic dictionary headword rule |
288
289
 
289
290
  #### Building UIs with Pattern Type Keys
290
291
 
@@ -293,7 +294,7 @@ The library exports `PATTERN_TYPE_KEYS` (a const array) and `PatternTypeKey` (a
293
294
  ```typescript
294
295
  import { PATTERN_TYPE_KEYS, type PatternTypeKey } from 'flappa-doormal';
295
296
 
296
- // PATTERN_TYPE_KEYS = ['lineStartsWith', 'lineStartsAfter', 'lineEndsWith', 'template', 'regex']
297
+ // PATTERN_TYPE_KEYS = ['lineStartsWith', 'lineStartsAfter', 'lineEndsWith', 'template', 'regex', 'dictionaryEntry']
297
298
 
298
299
  // Build a dropdown/select
299
300
  PATTERN_TYPE_KEYS.map(key => <option value={key}>{key}</option>)
@@ -351,7 +352,9 @@ the stoplist guard is skipped and the page-start match is allowed.
351
352
  #### Arabic Dictionary Helper
352
353
 
353
354
  Use `createArabicDictionaryEntryRule()` to build a conservative rule for Arabic
354
- dictionaries with lemma capture, stopword filtering, and page-wrap protection:
355
+ dictionaries with lemma capture, stopword filtering, and page-wrap protection.
356
+ The helper now returns a serializable native `dictionaryEntry` rule rather than
357
+ an eagerly-compiled regex blob:
355
358
 
356
359
  ```typescript
357
360
  import { createArabicDictionaryEntryRule, segmentPages } from 'flappa-doormal';
@@ -364,16 +367,35 @@ const rule = createArabicDictionaryEntryRule({
364
367
  allowParenthesized: true, // e.g. (عنبر) :
365
368
  allowWhitespaceBeforeColon: true, // e.g. عنبر :
366
369
  allowCommaSeparated: true, // e.g. سبد، دبس:
370
+ midLineSubentries: false, // line/page starts only
367
371
  });
368
372
 
369
373
  const segments = segmentPages(pages, { rules: [rule] });
370
374
  ```
371
375
 
376
+ Equivalent direct JSON-authored rule:
377
+
378
+ ```typescript
379
+ const rule = {
380
+ dictionaryEntry: {
381
+ stopWords: ['وقيل', 'ويقال', 'قال', 'العجاج', 'أخاك'],
382
+ allowParenthesized: true,
383
+ allowWhitespaceBeforeColon: true,
384
+ allowCommaSeparated: true,
385
+ midLineSubentries: false,
386
+ },
387
+ pageStartPrevWordStoplist: ['قال', 'وقيل', 'ويقال'],
388
+ samePagePrevWordStoplist: ['جل'],
389
+ meta: { type: 'entry' },
390
+ };
391
+ ```
392
+
372
393
  Behavior:
373
394
  - Keeps the lemma marker in `segment.content`
374
395
  - Stores the matched lemma in `segment.meta.lemma`
375
396
  - Matches root entries at true line/page starts like `عز:` and `لع:`
376
397
  - Matches mid-line subentries conservatively when they begin with `و`
398
+ - Supports disabling mid-line subentries entirely with `midLineSubentries: false`
377
399
  - Can match parenthesized headwords like `(عنبر) :` when enabled
378
400
  - Can match comma-separated headword lists like `سبد، دبس:` when enabled
379
401
  - Can suppress same-page false positives like `جلّ وعزّ:` with `samePagePrevWordStoplist`
package/dist/index.d.mts CHANGED
@@ -259,6 +259,73 @@ type LineStartsAfterPattern = {
259
259
  type LineEndsWithPattern = {
260
260
  /** Array of patterns that mark line endings. Brackets `()[]` are auto-escaped. */lineEndsWith: string[];
261
261
  };
262
+ /**
263
+ * Dictionary entry pattern options for Arabic lexicon-style headword matching.
264
+ *
265
+ * This captures authoring intent in a serializable shape and is compiled into
266
+ * a regex internally by the rule compiler.
267
+ */
268
+ interface DictionaryEntryPatternOptions {
269
+ /**
270
+ * Words that should never be treated as lemmas when followed by a colon.
271
+ *
272
+ * Matching is Arabic-normalized, diacritic-insensitive, and exact. Callers
273
+ * should provide canonical forms only; vocalized variants do not need to be
274
+ * listed separately.
275
+ */
276
+ stopWords: string[];
277
+ /**
278
+ * Allow balanced parenthesized headwords like `(عنبر):` or `(عنبر) :`.
279
+ * @default false
280
+ */
281
+ allowParenthesized?: boolean;
282
+ /**
283
+ * Allow optional whitespace before the trailing colon.
284
+ * @default false
285
+ */
286
+ allowWhitespaceBeforeColon?: boolean;
287
+ /**
288
+ * Allow comma-separated headword lists like `سبد، دبس:`.
289
+ * @default false
290
+ */
291
+ allowCommaSeparated?: boolean;
292
+ /**
293
+ * Allow conservative mid-line subentries that begin with `و`.
294
+ * Disable this when the rule should only split true line/page starts.
295
+ * @default true
296
+ */
297
+ midLineSubentries?: boolean;
298
+ /**
299
+ * Named capture key for the matched lemma metadata.
300
+ * @default 'lemma'
301
+ */
302
+ captureName?: string;
303
+ /**
304
+ * Minimum number of Arabic base letters in a lemma.
305
+ * @default 2
306
+ */
307
+ minLetters?: number;
308
+ /**
309
+ * Maximum number of Arabic base letters in a lemma.
310
+ * @default 10
311
+ */
312
+ maxLetters?: number;
313
+ }
314
+ /**
315
+ * Arabic dictionary entry pattern rule - serializable headword matcher compiled internally.
316
+ *
317
+ * @example
318
+ * {
319
+ * dictionaryEntry: {
320
+ * stopWords: ['قال', 'وقيل'],
321
+ * allowCommaSeparated: true,
322
+ * },
323
+ * meta: { type: 'entry' }
324
+ * }
325
+ */
326
+ type DictionaryEntryPattern = {
327
+ dictionaryEntry: DictionaryEntryPatternOptions;
328
+ };
262
329
  /**
263
330
  * Union of all pattern types for split rules.
264
331
  *
@@ -268,8 +335,9 @@ type LineEndsWithPattern = {
268
335
  * - `lineStartsWith` - Match line beginnings (marker included)
269
336
  * - `lineStartsAfter` - Match line beginnings (marker excluded)
270
337
  * - `lineEndsWith` - Match line endings
338
+ * - `dictionaryEntry` - Arabic dictionary headword matching
271
339
  */
272
- type PatternType = RegexPattern | TemplatePattern | LineStartsWithPattern | LineStartsAfterPattern | LineEndsWithPattern;
340
+ type PatternType = RegexPattern | TemplatePattern | LineStartsWithPattern | LineStartsAfterPattern | LineEndsWithPattern | DictionaryEntryPattern;
273
341
  /**
274
342
  * Pattern type key names for split rules.
275
343
  *
@@ -285,7 +353,7 @@ type PatternType = RegexPattern | TemplatePattern | LineStartsWithPattern | Line
285
353
  * const validateKey = (k: string): k is PatternTypeKey =>
286
354
  * (PATTERN_TYPE_KEYS as readonly string[]).includes(k);
287
355
  */
288
- declare const PATTERN_TYPE_KEYS: readonly ["lineStartsWith", "lineStartsAfter", "lineEndsWith", "template", "regex"];
356
+ declare const PATTERN_TYPE_KEYS: readonly ["lineStartsWith", "lineStartsAfter", "lineEndsWith", "template", "regex", "dictionaryEntry"];
289
357
  /**
290
358
  * String union of pattern type key names.
291
359
  *
@@ -417,7 +485,7 @@ type RuleConstraints = PageRangeConstraintWithExclude & {
417
485
  *
418
486
  * Each rule must specify:
419
487
  * - **Pattern** (exactly one): `regex`, `template`, `lineStartsWith`,
420
- * `lineStartsAfter`, or `lineEndsWith`
488
+ * `lineStartsAfter`, `lineEndsWith`, or `dictionaryEntry`
421
489
  * - **Split behavior**: `split` (optional, defaults to `'at'`), `occurrence`, `fuzzy`
422
490
  * - **Constraints** (optional): `min`, `max`, `meta`
423
491
  *
@@ -1122,30 +1190,7 @@ declare const fixTrailingWaw: (text: string) => string;
1122
1190
  declare const applyPreprocessToPage: (content: string, pageId: number, transforms: PreprocessTransform[]) => string;
1123
1191
  //#endregion
1124
1192
  //#region src/segmentation/arabic-dictionary-rule.d.ts
1125
- interface ArabicDictionaryEntryRuleOptions {
1126
- /**
1127
- * Words that should never be treated as lemmas when followed by a colon.
1128
- *
1129
- * Matching is Arabic-normalized, diacritic-insensitive, and exact. Callers
1130
- * should provide canonical forms only; vocalized variants do not need to be
1131
- * listed separately.
1132
- */
1133
- stopWords: string[];
1134
- /**
1135
- * Allow balanced parenthesized headwords like `(عنبر):` or `(عنبر) :`.
1136
- * @default false
1137
- */
1138
- allowParenthesized?: boolean;
1139
- /**
1140
- * Allow optional whitespace before the trailing colon.
1141
- * @default false
1142
- */
1143
- allowWhitespaceBeforeColon?: boolean;
1144
- /**
1145
- * Allow comma-separated headword lists like `سبد، دبس:`.
1146
- * @default false
1147
- */
1148
- allowCommaSeparated?: boolean;
1193
+ interface ArabicDictionaryEntryRuleOptions extends DictionaryEntryPatternOptions {
1149
1194
  /**
1150
1195
  * Suppress page-start matches when the previous page's last Arabic word
1151
1196
  * is in this stoplist, unless that page ends with strong sentence punctuation.
@@ -1156,21 +1201,6 @@ interface ArabicDictionaryEntryRuleOptions {
1156
1201
  * on the same page is in this stoplist.
1157
1202
  */
1158
1203
  samePagePrevWordStoplist?: string[];
1159
- /**
1160
- * Named capture key for the matched lemma.
1161
- * @default 'lemma'
1162
- */
1163
- captureName?: string;
1164
- /**
1165
- * Minimum number of Arabic base letters in a lemma.
1166
- * @default 2
1167
- */
1168
- minLetters?: number;
1169
- /**
1170
- * Maximum number of Arabic base letters in a lemma.
1171
- * @default 10
1172
- */
1173
- maxLetters?: number;
1174
1204
  /**
1175
1205
  * Static metadata merged into matching segments.
1176
1206
  */
@@ -1179,13 +1209,9 @@ interface ArabicDictionaryEntryRuleOptions {
1179
1209
  /**
1180
1210
  * Creates a reusable split rule for Arabic dictionary entries.
1181
1211
  *
1182
- * The generated rule:
1183
- * - keeps the lemma marker in `segment.content`
1184
- * - stores the lemma in `segment.meta[captureName]`
1185
- * - matches root entries at true line/page starts
1186
- * - matches mid-line subentries conservatively when they begin with `و`
1187
- * - can optionally support parenthesized headwords like `(عنبر) :`
1188
- * - can optionally support comma-separated headword lists like `سبد، دبس:`
1212
+ * The returned rule preserves authoring intent as a serializable
1213
+ * `{ dictionaryEntry: ... }` pattern rather than eagerly compiling to a raw
1214
+ * regex string.
1189
1215
  *
1190
1216
  * @example
1191
1217
  * createArabicDictionaryEntryRule({
@@ -1208,6 +1234,7 @@ declare const createArabicDictionaryEntryRule: ({
1208
1234
  captureName,
1209
1235
  maxLetters,
1210
1236
  meta,
1237
+ midLineSubentries,
1211
1238
  minLetters,
1212
1239
  pageStartPrevWordStoplist,
1213
1240
  samePagePrevWordStoplist,
@@ -1269,7 +1296,7 @@ declare const getSegmentDebugReason: (segment: Segment, options?: DebugReasonOpt
1269
1296
  /**
1270
1297
  * Types of validation issues that can be detected.
1271
1298
  */
1272
- type ValidationIssueType = 'missing_braces' | 'unknown_token' | 'duplicate' | 'empty_pattern' | 'invalid_regex';
1299
+ type ValidationIssueType = 'missing_braces' | 'unknown_token' | 'duplicate' | 'empty_pattern' | 'invalid_regex' | 'invalid_option';
1273
1300
  /**
1274
1301
  * A validation issue found in a pattern.
1275
1302
  */
@@ -1290,6 +1317,7 @@ type RuleValidationResult = {
1290
1317
  lineEndsWith?: (ValidationIssue | undefined)[];
1291
1318
  template?: ValidationIssue;
1292
1319
  regex?: ValidationIssue;
1320
+ dictionaryEntry?: Partial<Record<keyof DictionaryEntryPatternOptions, ValidationIssue>>;
1293
1321
  };
1294
1322
  /**
1295
1323
  * Validates split rules for common pattern issues.
@@ -1756,5 +1784,5 @@ type ValidationOptions = {
1756
1784
  */
1757
1785
  declare const validateSegments: (pages: Page[], options: SegmentationOptions, segments: Segment[], validationOptions?: ValidationOptions) => SegmentValidationReport;
1758
1786
  //#endregion
1759
- export { ARABIC_BASE_LETTER_CLASS, ARABIC_LETTER_WITH_OPTIONAL_MARKS_PATTERN, ARABIC_MARKS_CLASS, ARABIC_WORD_WITH_OPTIONAL_MARKS_PATTERN, type ArabicDictionaryEntryRuleOptions, type Breakpoint, type BreakpointRule, type CommonLineStartPattern, type CondenseEllipsisRule, type DetectedPattern, type ExpandResult, type FixTrailingWawRule, type LineStartAnalysisOptions, type LineStartPatternExample, type Logger, type OptimizeResult, PATTERN_TYPE_KEYS, type Page, type PageRange, type PageRangeConstraint, type PageRangeConstraintWithExclude, type PatternProcessor, type PatternTypeKey, type PreprocessTransform, type RemoveZeroWidthRule, type RepeatingSequenceExample, type RepeatingSequenceOptions, type RepeatingSequencePattern, type RuleValidationResult, type Segment, type SegmentValidationIssue, type SegmentValidationIssueSeverity, type SegmentValidationIssueType, type SegmentValidationReport, type SegmentationOptions, type SplitRule, TOKEN_PATTERNS, Token, type TokenKey, type TokenMapping, type TokenPatternName, type ValidationIssue, type ValidationIssueType, type ValidationOptions, analyzeCommonLineStarts, analyzeRepeatingSequences, analyzeTextForRule, applyPreprocessToPage, applyTokenMappings, condenseEllipsis, containsTokens, createArabicDictionaryEntryRule, detectTokenPatterns, escapeRegex, escapeTemplateBrackets, escapeWordsOutsideTokens, expandCompositeTokensInTemplate, expandTokens, expandTokensWithCaptures, fixTrailingWaw, formatValidationReport, generateTemplateFromText, getAvailableTokens, getDebugReason, getSegmentDebugReason, getTokenPattern, makeDiacriticInsensitive, normalizeArabicForComparison, optimizeRules, removeZeroWidth, segmentPages, shouldDefaultToFuzzy, stripTokenMappings, suggestPatternConfig, templateToRegex, validateRules, validateSegments, withCapture };
1787
+ export { ARABIC_BASE_LETTER_CLASS, ARABIC_LETTER_WITH_OPTIONAL_MARKS_PATTERN, ARABIC_MARKS_CLASS, ARABIC_WORD_WITH_OPTIONAL_MARKS_PATTERN, type ArabicDictionaryEntryRuleOptions, type Breakpoint, type BreakpointRule, type CommonLineStartPattern, type CondenseEllipsisRule, type DetectedPattern, type DictionaryEntryPatternOptions, type ExpandResult, type FixTrailingWawRule, type LineStartAnalysisOptions, type LineStartPatternExample, type Logger, type OptimizeResult, PATTERN_TYPE_KEYS, type Page, type PageRange, type PageRangeConstraint, type PageRangeConstraintWithExclude, type PatternProcessor, type PatternTypeKey, type PreprocessTransform, type RemoveZeroWidthRule, type RepeatingSequenceExample, type RepeatingSequenceOptions, type RepeatingSequencePattern, type RuleValidationResult, type Segment, type SegmentValidationIssue, type SegmentValidationIssueSeverity, type SegmentValidationIssueType, type SegmentValidationReport, type SegmentationOptions, type SplitRule, TOKEN_PATTERNS, Token, type TokenKey, type TokenMapping, type TokenPatternName, type ValidationIssue, type ValidationIssueType, type ValidationOptions, analyzeCommonLineStarts, analyzeRepeatingSequences, analyzeTextForRule, applyPreprocessToPage, applyTokenMappings, condenseEllipsis, containsTokens, createArabicDictionaryEntryRule, detectTokenPatterns, escapeRegex, escapeTemplateBrackets, escapeWordsOutsideTokens, expandCompositeTokensInTemplate, expandTokens, expandTokensWithCaptures, fixTrailingWaw, formatValidationReport, generateTemplateFromText, getAvailableTokens, getDebugReason, getSegmentDebugReason, getTokenPattern, makeDiacriticInsensitive, normalizeArabicForComparison, optimizeRules, removeZeroWidth, segmentPages, shouldDefaultToFuzzy, stripTokenMappings, suggestPatternConfig, templateToRegex, validateRules, validateSegments, withCapture };
1760
1788
  //# sourceMappingURL=index.d.mts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.mts","names":[],"sources":["../src/types/breakpoints.ts","../src/types/rules.ts","../src/types/options.ts","../src/types/validation.ts","../src/types/index.ts","../src/analysis/line-starts.ts","../src/analysis/repeating-sequences.ts","../src/detection.ts","../src/optimization/optimize-rules.ts","../src/preprocessing/transforms.ts","../src/segmentation/arabic-dictionary-rule.ts","../src/segmentation/breakpoint-utils.ts","../src/segmentation/debug-meta.ts","../src/segmentation/pattern-validator.ts","../src/segmentation/segmenter.ts","../src/segmentation/tokens.ts","../src/utils/textUtils.ts","../src/validation/validate-segments.ts"],"mappings":";AAqBA;;;;;;;;;;;;AA2GA;;;;;;;AA3GA,KAAY,cAAA,GAAiB,8BAAA;ECOxB;;;;;AAEI;;;;;AA4BG;EDzBR,OAAA;;;;ACuDc;;;;;AAiCC;;;;;AAwBH;EDhGZ,KAAA;;;;;;;;;;;;;;;;ACmIJ;;;;;AAOA;;;;;AAAgE;;;ED5G5D,KAAA;EC6HA;;;;;AAyBK;;;;;ED1IL,KAAA;ECiKO;;;;;;AA8FX;;;;;;;;;;;;ED3OI,QAAA;AAAA;;;AErFJ;;;;;;;;;AAgCA;;;KFsEY,UAAA,YAAsB,cAAA;;;AA3GlC;;;;;;;;;;;;AA2GA;;;;;;;;AChIiE;;;;;AA8BxD;ADTT,KCOK,YAAA;wEAED,KAAA;AAAA;AA4BQ;;;;;AA8BM;;;;;AAiCC;;;;;AAwBH;;;;;;;;AAvFJ,KAFP,eAAA;EA2GoB,2GAzGrB,QAAA;AAAA;;;;;;AA0HJ;;;;;AAOA;;;;;AAAgE;;;;;;;;;AA0CvD;KA/IJ,qBAAA;oHAED,cAAA;AAAA;;;;;;;;AAkQJ;;;;;;;;;;;;;;;;AChUA;;;;;KD6FK,sBAAA;ECpFD,oHDsFA,eAAA;AAAA;AC/DJ;;;;;AAyBA;;;;;AAsBA;;;;;;;;;AA/CA,KDqFK,mBAAA;EChCC,kFDkCF,YAAA;AAAA;ACRJ;;;;;;;;;;AAAA,KDqBK,WAAA,GACC,YAAA,GACA,eAAA,GACA,qBAAA,GACA,sBAAA,GACA,mBAAA;;;;;;;;;;;ACmBN;;;;;cDFa,iBAAA;;;;;;KAOD,cAAA,WAAyB,iBAAA;;;;;;;KAUhC,aAAA;ECsEa;;;;;;ED/Dd,KAAA;ECiJgC;;;;;AC7VpC;;;EFsNI,UAAA;EEtNsC;AAE1C;;;;;AAMA;;;;;;;EF6NI,KAAA;AAAA;;;;;;;;;KAWC,eAAA,GAAkB,8BAAA;EEzNf;;;;;;;;;AAWR;;EF0NI,IAAA,GAAO,MAAA;EEjNuB;;;;;;;;;;;;;;;AC7BlC;;;;;EHoQI,cAAA;EGhPA;;;;;AAwBJ;;;;;AA2BA;;;;;AAgBA;;;;EHkMI,yBAAA;EG1KQ;;;;;;;;;;;;AC1GZ;;;;;;EJwSI,wBAAA;AAAA;;;;;;;;;;;;;AI1RJ;;;;;AAEA;;;;;;;;;KJuTY,SAAA,GAAY,WAAA,GAAc,aAAA,GAAgB,eAAA;;;;;;;;;;;;;AD1NtD;;;;;;;;AChIiE;;;;KC0BrD,mBAAA,GAAsB,mBAAA;EAC9B,IAAA;ED6BgB;;;;AAER;;;ECvBR,IAAA;AAAA;ADqDc;;;;;AAiCC;;;;;AAwBH;;;;;;;;;;AAzDE,KC9BN,oBAAA,GAAuB,mBAAA;EAC/B,IAAA;AAAA;;;;;ADyHJ;;;;;AAOA;;;;;AAAgE;;;;;;;KCxGpD,kBAAA,GAAqB,mBAAA;EAC7B,IAAA;AAAA;;;;;;;;;;;;ADsQJ;;;;;;;KCjPY,mBAAA,+DAIN,mBAAA,GACA,oBAAA,GACA,kBAAA;;;;;;;;;AArFN;;;;;;;;;AAgCA;;;;;AAyBA;;UAsDiB,MAAA;EAtDgB;EAwD7B,KAAA,IAAS,OAAA,aAAoB,IAAA;EAlCrB;EAoCR,KAAA,IAAS,OAAA,aAAoB,IAAA;;EAE7B,IAAA,IAAQ,OAAA,aAAoB,IAAA;EAjC1B;EAmCF,KAAA,IAAS,OAAA,aAAoB,IAAA;EAlCT;EAoCpB,IAAA,IAAQ,OAAA,aAAoB,IAAA;AAAA;;;;;AAVhC;;;;;;;;;;;;;;;;;;;;;AA6CA;;;;;;;KAAY,mBAAA;EAuKwB;;;;;;;EA/JhC,KAAA,GAAQ,SAAA;EAkCR;;;;;;;;;EAvBA,KAAA;IAoJgC,4DAhJtB,OAAA;IAEA,OAAA,GAAU,KAAA;EAAA;EC/MZ;;;;;AAEZ;;;;;AAMA;;;;EDwNI,QAAA;ECvNM;;;;;;;;;;;EDoON,gBAAA;ECvNI;;;;;;;;;;AAYR;;;;;;;;;;;;;;;;;;EDyOI,WAAA,GAAc,UAAA;EE7PC;;;;;;;EFsQf,MAAA;EEzOa;;AAejB;;;;;AA2BA;;;;;EF6MI,UAAA;EE7L2B;;;;AAwB/B;;;;;;;;;;;;AC1GA;;;;;;;;;;;;;;;EHgTI,MAAA,GAAS,MAAA;EGtSQ;;;;AAIrB;;;;;AAEA;;;;;;;;;;AA0QA;;;;;EHgDI,UAAA,GAAa,mBAAA;AAAA;;;KC7VL,8BAAA;AAAA,KAEA,0BAAA;AAAA,KAMA,sBAAA;EACR,IAAA,EAAM,0BAAA;EACN,QAAA,EAAU,8BAAA;EACV,YAAA;EACA,OAAA;IACI,IAAA;IACA,EAAA;IACA,cAAA;EAAA;EAEJ,QAAA;IACI,IAAA;IACA,EAAA;EAAA;EAEJ,MAAA;IACI,IAAA;IACA,EAAA;EAAA;EAEJ,WAAA;IACI,MAAA;IACA,WAAA;IACA,UAAA;EAAA;EAEJ,QAAA;EACA,IAAA;AAAA;AAAA,KAGQ,uBAAA;EACR,EAAA;EACA,OAAA;IACI,YAAA;IACA,SAAA;IACA,MAAA;IACA,MAAA;IACA,QAAA;EAAA;EAEJ,MAAA,EAAQ,sBAAA;AAAA;;;;AHtBZ;;;;;;;;;;;;AA2GA;KIlHY,OAAA;;;;;;;EAOR,OAAA;EHOa;;;EGFb,IAAA;EH8BC;;;;;AAEO;EGxBR,EAAA;;;;AHsDc;;;;EG7Cd,IAAA,GAAO,MAAA;AAAA;;;;;AHsGK;;;;;;;;KGvFJ,IAAA;EHyGa;;;;;EGnGrB,EAAA;EHmGqB;;AAiBzB;;;;EG5GI,OAAA;AAAA;;;;;AHmH4D;;;;;;KGtGpD,SAAA;;;AHgJH;;;;;;;;;;;;KGhIG,mBAAA;EHqPS;;;;EGhPjB,GAAA;EHgPiE;;;;EG1OjE,GAAA;AAAA;;;;;AFtFJ;;;;;;KEmGY,8BAAA,GAAiC,mBAAA;EF1FrC;;AAuBR;;;;;AAyBA;;;;;AAsBA;;;EEoCI,OAAA,GAAU,SAAA;AAAA;;;KC1HF,wBAAA;EACR,IAAA;EACA,WAAA;EACA,aAAA;EACA,QAAA;EACA,WAAA;EACA,wBAAA;EACA,yBAAA;EACA,MAAA;EACA,UAAA,IAAc,IAAA,UAAc,MAAA;EAC5B,cAAA,GAAiB,MAAA;EACjB,UAAA;AAAA;AAAA,KAGQ,uBAAA;EAA4B,IAAA;EAAc,MAAA;AAAA;AAAA,KAE1C,sBAAA;EACR,OAAA;EACA,KAAA;EACA,QAAA,EAAU,uBAAA;AAAA;;;;cAuQD,uBAAA,GACT,KAAA,EAAO,IAAA,IACP,OAAA,GAAS,wBAAA,KACV,sBAAA;;;KCvRS,wBAAA;EACR,WAAA;EACA,WAAA;EACA,QAAA;EACA,IAAA;EACA,yBAAA;EACA,YAAA;EACA,UAAA;EACA,WAAA;EACA,YAAA;EACA,iBAAA;AAAA;AAAA,KAGQ,wBAAA;EACR,IAAA;EACA,OAAA;EACA,MAAA;EACA,YAAA;AAAA;AAAA,KAGQ,wBAAA;EACR,OAAA;EACA,KAAA;EACA,QAAA,EAAU,wBAAA;AAAA;;ALwCI;;;;;cK6LL,yBAAA,GACT,KAAA,EAAO,IAAA,IACP,OAAA,GAAU,wBAAA,KACX,wBAAA;;;;ANnQH;;KOhBY,eAAA;EPgB+C,6DOdvD,KAAA,UP0BA;EOxBA,KAAA,UPsEA;EOpEA,KAAA,UPoGA;EOlGA,QAAA;AAAA;APmHJ;;;;;;;;AChIiE;;;;;AA8BxD;;ADkGT,cO5Ca,mBAAA,GAAuB,IAAA,aAAY,eAAA;;;AN1BpC;;;;;AA8BM;;;;;AAiCC;cM2BN,wBAAA,GAA4B,IAAA,UAAc,QAAA,EAAU,eAAA;;;;ANHjD;;;cM0BH,oBAAA,GACT,QAAA,EAAU,eAAA;EACT,WAAA;EAAmD,KAAA;EAAgB,QAAA;AAAA;;;;;;;cA+B3D,kBAAA,GACT,IAAA;EAEA,QAAA;EACA,WAAA;EACA,KAAA;EACA,QAAA;EACA,QAAA,EAAU,eAAA;AAAA;;;AP9Ld;;;AAAA,KQbY,cAAA;ERaiB,yDQXzB,KAAA,EAAO,SAAA,IRuCP;EQrCA,WAAA;AAAA;AAAA,cAkCS,aAAA,GAAiB,KAAA,EAAO,SAAA;;SAAA,SAAA;AAAA;;;;;ARkFrC;;;;;cShGa,eAAA,GAAmB,IAAA,UAAc,IAAA;;;ARhCmB;;;;;AA8BxD;;cQkCI,gBAAA,GAAoB,IAAA;;;ARNrB;;;;;AA8BM;cQdL,cAAA,GAAkB,IAAA;;;;AR+CZ;;;;;AAwBH;;;cQ5BH,qBAAA,GAAyB,OAAA,UAAiB,MAAA,UAAgB,UAAA,EAAY,mBAAA;;;UCjHlE,gCAAA;EViBS;;;;;;;EUTtB,SAAA;EVmGA;;;AAiBJ;EU9GI,kBAAA;;;;;EAMA,0BAAA;;ATxB6D;;;ES8B7D,mBAAA;ETAK;AAAA;;;ESML,yBAAA;ETsBQ;AAAA;;;EShBR,wBAAA;ET8Cc;AAAA;;;ESxCd,WAAA;ETyEe;AAAA;;;ESnEf,UAAA;ET2FY;AAAA;;;ESrFZ,UAAA;EToGE;;;ES/FF,IAAA,GAAO,MAAA;AAAA;;;;;;;;;ATmHX;;;;;AAOA;;;;;AAAgE;;;;;;;cS1BnD,+BAAA;EAAmC,mBAAA;EAAA,kBAAA;EAAA,0BAAA;EAAA,WAAA;EAAA,UAAA;EAAA,IAAA;EAAA,UAAA;EAAA,yBAAA;EAAA,wBAAA;EAAA;AAAA,GAW7C,gCAAA,KAAmC,SAAA;;;AV5CtC;;;;;;;;AChIiE;;;;;AA8BxD;;;;;AA4BG;;;;ADsEZ,cWnFa,wBAAA,GAA4B,IAAA;;KA0K7B,gBAAA,IAAoB,OAAA;;;;;;KCnIpB,kBAAA;EX0EP;;;;EWrED,OAAA;AAAA;;;;;;cAyDS,cAAA,GAAkB,IAAA,EAAM,MAAA,2BAAiC,OAAA,GAAU,kBAAA;;;;;;cA4BnE,qBAAA,GAAyB,OAAA,EAAS,OAAA,EAAS,OAAA,GAAU,kBAAA;;;;;;KCjKtD,mBAAA;;;AbmHZ;Ka9GY,eAAA;EACR,IAAA,EAAM,mBAAA;EACN,OAAA;EACA,UAAA;EAEA,KAAA;EAEA,OAAA;AAAA;;;;AZKK;KYEG,oBAAA;EACR,cAAA,IAAkB,eAAA;EAClB,eAAA,IAAmB,eAAA;EACnB,YAAA,IAAgB,eAAA;EAChB,QAAA,GAAW,eAAA;EACX,KAAA,GAAQ,eAAA;AAAA;;;AZmDM;;;;;AAiCC;;;;;AAwBH;;;;;;;cYmDH,aAAA,GAAiB,KAAA,EAAO,SAAA,QAAW,oBAAA;;;;;;;;;;AZhBhD;;;;cYyCa,sBAAA,GAA0B,OAAA,GAAU,oBAAA;;;;;;;;AZ7NgB;;;;;AA8BxD;;;;;AA4BG;;;;;AA8BM;;;;;AAiCC;;;;;AAwBH;;;;;;;;;;;;cauLH,YAAA,GAAgB,KAAA,EAAO,IAAA,IAAQ,OAAA,EAAS,mBAAA,KAAmB,OAAA;;;;AdnTxE;;;;;;ceda,wBAAA;;;;cAKA,kBAAA;;AfoHb;;ce/Ga,yCAAA;;;;cAKA,uCAAA;;cAiHA,KAAA;Ed3GI,oDAEb;EAAA,mCA0BC;EAAA;6BAEO;EAAA,2BA4Bc;EAAA,2BAEtB;EAAA,6BA+BC;EAAA;+BAEc;EAAA,2BAsBK;EAAA,iCAEpB;EAAA,yBAaC;EAAA;6BAEC;EAAA,2BAEA;EAAA,6BACmB;EAAA,6BAJnB;EAAA;;;;;KckBM,QAAA,gBAAwB,KAAA;AdGpC;;;AAAA,KcEY,gBAAA,gBAAgC,cAAA;;cAG/B,WAAA,GAAe,KAAA,UAAe,IAAA;;cAiB9B,+BAAA,GAAmC,QAAA;;;AdfgB;;;;;;;;;AA0CvD;;;;;;;;;;;;AAqHT;;cchGa,cAAA;EdgGW,iDAA8B;EAAA,2BAAe;EAAA,0BAA/B;EAAA,yBAA+B;EAAA;;2KChUtC;EAAA,6BAAsB;EAAA,wBACjD;EAAA,uBAQI;EAAA,yBAuBI;EAAA;yBACJ;EAAA,oCAwBsB;EAAA,sCAAG;EAAA,wBAsBrB;EAAA;;;;;;;;;;;;AAgCZ;;;;;ca4Ja,cAAA,GAAkB,KAAA;;;;;;;KAWnB,YAAA;Eb/JR;;;;;EaqKA,OAAA;EbnKgC;;AAmCpC;;;EauII,YAAA;Eb9GoB;;;;;EaqHpB,WAAA;AAAA;;;;;;;;;;;;;;;;;;;;;AZpUJ;;;;;AAEA;;;;;AAMA;;;;;;cY0ba,wBAAA,GACT,KAAA,UACA,cAAA,IAAkB,OAAA,qBAClB,aAAA;;;;;;;;;;;;;;;;;;;;;AZnaJ;;;cY8ca,YAAA,GAAgB,KAAA;;;;;;;;;;;;;;;AXle7B;;;;;;;cWyfa,eAAA,GAAmB,QAAA,aAAgB,MAAA;;;;AX7chD;;;;;AA2BA;;;;cWuca,kBAAA,QAAyB,gBAAA;AXvbtC;;;;;AAwBA;;;;;;;;;AAxBA,cWuca,eAAA,GAAmB,SAAA,EAAW,gBAAA;;;AVzhB3C;;;;;;;;;;;;cUujBa,oBAAA,GAAwB,QAAA;;;;KAWzB,YAAA;EAAiB,KAAA;EAAe,IAAA;AAAA;AVpjB5C;;;;;AAEA;;;;;;;;;;AA0QA;;;AA5QA,cUwkBa,kBAAA,GAAsB,QAAA,UAAkB,QAAA,EAAU,YAAA;;;;;;;;;;;;;;;AThlB/D;cS6mBa,kBAAA,GAAsB,QAAA;;;;;;AftgBnC;;;;;;;;AChIiE;;;;;AA8BxD;;;;;AA4BG;;cenBC,sBAAA,GAA0B,OAAA;;;AfiDrB;;;;;AAiCC;;;;;AAwBH;cejDH,WAAA,GAAe,CAAA;;;;;;;;;;cAwBf,4BAAA,GAAgC,IAAA;AAAA,cAiBhC,wBAAA,GAA4B,IAAA;;;KCzC7B,iBAAA;EjB3E+C;;;;;EiBiFvD,mBAAA;AAAA;;;AjB0BJ;;;;;;;;AChIiE;;;;cgB0iBpD,gBAAA,GACT,KAAA,EAAO,IAAA,IACP,OAAA,EAAS,mBAAA,EACT,QAAA,EAAU,OAAA,IACV,iBAAA,GAAoB,iBAAA,KACrB,uBAAA"}
1
+ {"version":3,"file":"index.d.mts","names":[],"sources":["../src/types/breakpoints.ts","../src/types/rules.ts","../src/types/options.ts","../src/types/validation.ts","../src/types/index.ts","../src/analysis/line-starts.ts","../src/analysis/repeating-sequences.ts","../src/detection.ts","../src/optimization/optimize-rules.ts","../src/preprocessing/transforms.ts","../src/segmentation/arabic-dictionary-rule.ts","../src/segmentation/breakpoint-utils.ts","../src/segmentation/debug-meta.ts","../src/segmentation/pattern-validator.ts","../src/segmentation/segmenter.ts","../src/segmentation/tokens.ts","../src/utils/textUtils.ts","../src/validation/validate-segments.ts"],"mappings":";AAqBA;;;;;;;;;;;;AA2GA;;;;;;;AA3GA,KAAY,cAAA,GAAiB,8BAAA;ECOxB;;;;;AAEI;;;;;AA4BG;EDzBR,OAAA;;;;ACuDc;;;;;AAiCC;;;;;AAiCnB;EDzGI,KAAA;;;;;;;;;;;;;AC6JH;;;;;AAeiD;;;;;;;;;;;ED9I9C,KAAA;EC8JE;;;;;;;AAqBN;;;EDvKI,KAAA;EC8KM;AAOV;;;;;AAAgE;;;;;;;;;AA0CvD;;;ED3ML,QAAA;AAAA;;;;;;;;ACgUJ;;;;;;;KD/SY,UAAA,YAAsB,cAAA;;;AA3GlC;;;;;;;;;;;;AA2GA;;;;;;;;AChIiE;;;;;AA8BxD;ADTT,KCOK,YAAA;wEAED,KAAA;AAAA;AA4BQ;;;;;AA8BM;;;;;AAiCC;;;;;AAiCnB;;;;;;;;AAhGY,KAFP,eAAA;EAyID,2GAvIA,QAAA;AAAA;;;AAoJH;;;;;AAeiD;;;;;;;;;;;;;;;;;;KAvI7C,qBAAA;EA4KQ,kHA1KT,cAAA;AAAA;;;AAwLJ;;;;;AAAgE;;;;;;;;;AA0CvD;;;;;;;;;;;;KAnMJ,sBAAA;EAwTgB,oHAtTjB,eAAA;AAAA;;;;;;;;;;;;;AC/FJ;;;;;;;KDqHK,mBAAA;EC5GG,kFD8GJ,YAAA;AAAA;;;;AC9DJ;;;UDuEiB,6BAAA;ECtET;AAqBR;;;;;;EDyDI,SAAA;ECnDoB;;;;EDyDpB,kBAAA;ECzDoB;AA0BxB;;;EDqCI,0BAAA;ECnCA;;;;EDyCA,mBAAA;ECvC6B;;;;;ED8C7B,iBAAA;EC1C6B;;;;EDgD7B,WAAA;EC9CgC;AAmCpC;;;EDiBI,UAAA;ECQoB;;;;EDFpB,UAAA;AAAA;;;;;;;;;;;;;KAeC,sBAAA;EACD,eAAA,EAAiB,6BAAA;AAAA;;;;;;;AE7NrB;;;;;KF2OK,WAAA,GACC,YAAA,GACA,eAAA,GACA,qBAAA,GACA,sBAAA,GACA,mBAAA,GACA,sBAAA;;;;;AEzON;;;;;;;;;;;cF0Pa,iBAAA;;;;;;KAcD,cAAA,WAAyB,iBAAA;;;;;;;KAUhC,aAAA;EE3PG;;AAGR;;;;EF+PI,KAAA;EE7PA;;;;;;;;EFuQA,UAAA;EEhQ8B;;;;AC7BlC;;;;;;;;;EH4SI,KAAA;AAAA;AGhQJ;;;;;AA2BA;;;AA3BA,KH2QK,eAAA,GAAkB,8BAAA;EGhPF;AAgBrB;;;;;AAwBA;;;;;EHoNI,IAAA,GAAO,MAAA;EGpMG;;;;;;AC1Hd;;;;;;;;;;;;;;EJoVI,cAAA;EI1UA;;;;;AAIJ;;;;;AAEA;;;;;;;;;EJyVI,yBAAA;EI/ES;;;;;;;;;;;;;;;;;;EJmGT,wBAAA;AAAA;;;;;;;;;;;;;;;AK1WJ;;;;;;;;;;AAOA;;KLkYY,SAAA,GAAY,WAAA,GAAc,aAAA,GAAgB,eAAA;;;;;;;;;;;;;AD/StD;;;;;;;;AChIiE;;;;KC0BrD,mBAAA,GAAsB,mBAAA;EAC9B,IAAA;ED6BgB;;;;AAER;;;ECvBR,IAAA;AAAA;ADqDc;;;;;AAiCC;;;;;AAiCnB;;;;;;;;;;AAlEkB,KC9BN,oBAAA,GAAuB,mBAAA;EAC/B,IAAA;AAAA;;ADmJH;;;;;AAeiD;;;;;;;;;;;;;;;KC1ItC,kBAAA,GAAqB,mBAAA;EAC7B,IAAA;AAAA;;AD8KJ;;;;;AAcA;;;;;AAAgE;;;;;;;KCvKpD,mBAAA,+DAIN,mBAAA,GACA,oBAAA,GACA,kBAAA;;AD2MG;;;;;;;;;;;;AAqHT;;;;;;;;;;;UCtSiB,MAAA;EDsSoD;ECpSjE,KAAA,IAAS,OAAA,aAAoB,IAAA;;EAE7B,KAAA,IAAS,OAAA,aAAoB,IAAA;EAnHrB;EAqHR,IAAA,IAAQ,OAAA,aAAoB,IAAA;;EAE5B,KAAA,IAAS,OAAA,aAAoB,IAAA;EAvHC;EAyH9B,IAAA,IAAQ,OAAA,aAAoB,IAAA;AAAA;;;AAzFhC;;;;;AAyBA;;;;;AAsBA;;;;;;;;;;;;;AAgCA;;;;;;;KA6CY,mBAAA;EAzCC;;;;;;;EAiDT,KAAA,GAAQ,SAAA;EA3CR;;;;;AAmCJ;;;;EAmBI,KAAA;IAkEc,4DA9DJ,OAAA,WAgJG;IA9IH,OAAA,GAAU,KAAA;EAAA;EAjBpB;;;;;;;;;;;;;;EAkCA,QAAA;EA6Ha;;;;;;AC7VjB;;;;;ED6OI,gBAAA;EC3OkC;;;;AAMtC;;;;;;;;;;;;;;;;;;;;;;;;EDmQI,WAAA,GAAc,UAAA;EC5OV;;AAGR;;;;;EDkPI,MAAA;EC/OI;;;;;;;;;;;;ED6PJ,UAAA;EEpRe;;;;;;;;;;AA4CnB;;;;;AA2BA;;;;;AAgBA;;;;;AAwBA;;;;;;EFsMI,MAAA,GAAS,MAAA;EEtLU;;;;;AC1HvB;;;;;;;;;;;;;;;;;;;EH0UI,UAAA,GAAa,mBAAA;AAAA;;;KC7VL,8BAAA;AAAA,KAEA,0BAAA;AAAA,KAMA,sBAAA;EACR,IAAA,EAAM,0BAAA;EACN,QAAA,EAAU,8BAAA;EACV,YAAA;EACA,OAAA;IACI,IAAA;IACA,EAAA;IACA,cAAA;EAAA;EAEJ,QAAA;IACI,IAAA;IACA,EAAA;EAAA;EAEJ,MAAA;IACI,IAAA;IACA,EAAA;EAAA;EAEJ,WAAA;IACI,MAAA;IACA,WAAA;IACA,UAAA;EAAA;EAEJ,QAAA;EACA,IAAA;AAAA;AAAA,KAGQ,uBAAA;EACR,EAAA;EACA,OAAA;IACI,YAAA;IACA,SAAA;IACA,MAAA;IACA,MAAA;IACA,QAAA;EAAA;EAEJ,MAAA,EAAQ,sBAAA;AAAA;;;;AHtBZ;;;;;;;;;;;;AA2GA;KIlHY,OAAA;;;;;;;EAOR,OAAA;EHOa;;;EGFb,IAAA;EH8BC;;;;;AAEO;EGxBR,EAAA;;;;AHsDc;;;;EG7Cd,IAAA,GAAO,MAAA;AAAA;;;;;AH+GX;;;;;;;;KGhGY,IAAA;EHuIR;;;;;EGjIA,EAAA;EH4JuB;;;;AACuB;;EGrJ9C,OAAA;AAAA;;;;;;;;;;;KAaQ,SAAA;;;;;AH6KZ;;;;;AAcA;;;;;KG3KY,mBAAA;EHqLM;;;;EGhLd,GAAA;EHgNA;;;AAAK;EG1ML,GAAA;AAAA;;;;;;;;;;AH+TJ;KGlTY,8BAAA,GAAiC,mBAAA;;;;;;;;;;;;;;;AFnG7C;EEmHI,OAAA,GAAU,SAAA;AAAA;;;KC1HF,wBAAA;EACR,IAAA;EACA,WAAA;EACA,aAAA;EACA,QAAA;EACA,WAAA;EACA,wBAAA;EACA,yBAAA;EACA,MAAA;EACA,UAAA,IAAc,IAAA,UAAc,MAAA;EAC5B,cAAA,GAAiB,MAAA;EACjB,UAAA;AAAA;AAAA,KAGQ,uBAAA;EAA4B,IAAA;EAAc,MAAA;AAAA;AAAA,KAE1C,sBAAA;EACR,OAAA;EACA,KAAA;EACA,QAAA,EAAU,uBAAA;AAAA;;;;cAuQD,uBAAA,GACT,KAAA,EAAO,IAAA,IACP,OAAA,GAAS,wBAAA,KACV,sBAAA;;;KCvRS,wBAAA;EACR,WAAA;EACA,WAAA;EACA,QAAA;EACA,IAAA;EACA,yBAAA;EACA,YAAA;EACA,UAAA;EACA,WAAA;EACA,YAAA;EACA,iBAAA;AAAA;AAAA,KAGQ,wBAAA;EACR,IAAA;EACA,OAAA;EACA,MAAA;EACA,YAAA;AAAA;AAAA,KAGQ,wBAAA;EACR,OAAA;EACA,KAAA;EACA,QAAA,EAAU,wBAAA;AAAA;;ALwCI;;;;;cK6LL,yBAAA,GACT,KAAA,EAAO,IAAA,IACP,OAAA,GAAU,wBAAA,KACX,wBAAA;;;;ANnQH;;KOhBY,eAAA;EPgB+C,6DOdvD,KAAA,UP0BA;EOxBA,KAAA,UPsEA;EOpEA,KAAA,UPoGA;EOlGA,QAAA;AAAA;APmHJ;;;;;;;;AChIiE;;;;;AA8BxD;;ADkGT,cO5Ca,mBAAA,GAAuB,IAAA,aAAY,eAAA;;;AN1BpC;;;;;AA8BM;;;;;AAiCC;cM2BN,wBAAA,GAA4B,IAAA,UAAc,QAAA,EAAU,eAAA;;;;ANMjE;;;cMiBa,oBAAA,GACT,QAAA,EAAU,eAAA;EACT,WAAA;EAAmD,KAAA;EAAgB,QAAA;AAAA;;;;;;;cA+B3D,kBAAA,GACT,IAAA;EAEA,QAAA;EACA,WAAA;EACA,KAAA;EACA,QAAA;EACA,QAAA,EAAU,eAAA;AAAA;;;AP9Ld;;;AAAA,KQbY,cAAA;ERaiB,yDQXzB,KAAA,EAAO,SAAA,IRuCP;EQrCA,WAAA;AAAA;AAAA,cAqES,aAAA,GAAiB,KAAA,EAAO,SAAA;;SAAA,SAAA;AAAA;;;;;AR+CrC;;;;;cShGa,eAAA,GAAmB,IAAA,UAAc,IAAA;;;ARhCmB;;;;;AA8BxD;;cQkCI,gBAAA,GAAoB,IAAA;;;ARNrB;;;;;AA8BM;cQdL,cAAA,GAAkB,IAAA;;;;AR+CZ;;;;;AAiCnB;;;cQrCa,qBAAA,GAAyB,OAAA,UAAiB,MAAA,UAAgB,UAAA,EAAY,mBAAA;;;UCjHlE,gCAAA,SAAyC,6BAAA;EViBhC;;;;EUZtB,yBAAA;EVwCA;;;;EUlCA,wBAAA;EVgGQ;AAiBZ;;EU5GI,IAAA,GAAO,MAAA;AAAA;;;;;ATUF;;;;;AA4BG;;;;;AA8BM;;;;;AAiCC;;cSgEN,+BAAA;EAAmC,mBAAA;EAAA,kBAAA;EAAA,0BAAA;EAAA,WAAA;EAAA,UAAA;EAAA,IAAA;EAAA,iBAAA;EAAA,UAAA;EAAA,yBAAA;EAAA,wBAAA;EAAA;AAAA,GAY7C,gCAAA,KAAmC,SAAA;;;AVrEtC;;;;;;;;AChIiE;;;;;AA8BxD;;;;;AA4BG;;;;ADsEZ,cWnFa,wBAAA,GAA4B,IAAA;ATazC;AAAA,KS6JY,gBAAA,IAAoB,OAAA;;;;;;KCnIpB,kBAAA;EXsEK;;;;EWjEb,OAAA;AAAA;;;;;;cAyDS,cAAA,GAAkB,IAAA,EAAM,MAAA,2BAAiC,OAAA,GAAU,kBAAA;;;AX4D/E;;;cWhCY,qBAAA,GAAyB,OAAA,EAAS,OAAA,EAAS,OAAA,GAAU,kBAAA;;;;;;KCjKtD,mBAAA;;;AbmHZ;KaxGY,eAAA;EACR,IAAA,EAAM,mBAAA;EACN,OAAA;EACA,UAAA;EAEA,KAAA;EAEA,OAAA;AAAA;;;;AZDK;KYQG,oBAAA;EACR,cAAA,IAAkB,eAAA;EAClB,eAAA,IAAmB,eAAA;EACnB,YAAA,IAAgB,eAAA;EAChB,QAAA,GAAW,eAAA;EACX,KAAA,GAAQ,eAAA;EACR,eAAA,GAAkB,OAAA,CAAQ,MAAA,OAAa,6BAAA,EAA+B,eAAA;AAAA;;AZ4CxD;;;;;AAiCC;;;;;AAiCnB;;;;;;;;cY0Ga,aAAA,GAAiB,KAAA,EAAO,SAAA,QAAW,oBAAA;;;;;;AZtD/C;;;;;AAeiD;;;cYuErC,sBAAA,GAA0B,OAAA,GAAU,oBAAA;;;;;;;;AZpSgB;;;;;AA8BxD;;;;;AA4BG;;;;;AA8BM;;;;;AAiCC;;;;;AAiCnB;;;;;;;;;;;;ca8Ka,YAAA,GAAgB,KAAA,EAAO,IAAA,IAAQ,OAAA,EAAS,mBAAA,KAAmB,OAAA;;;;AdnTxE;;;;;;ceda,wBAAA;;;;cAKA,kBAAA;;AfoHb;;ce/Ga,yCAAA;;;;cAKA,uCAAA;;cAiHA,KAAA;Ed3GI,oDAEb;EAAA,mCA0BC;EAAA;6BAEO;EAAA,2BA4Bc;EAAA,2BAEtB;EAAA,6BA+BC;EAAA;+BAEc;EAAA,2BAsBK;EAAA,iCAEpB;EAAA,yBASa;EAAA;6BAQb;EAAA,2BAYA;EAAA,6BAaA;EAAA,6BAYA;EAAA;;;AAOH;;Kc7BW,QAAA,gBAAwB,KAAA;;;Ad4Cc;KcvCtC,gBAAA,gBAAgC,cAAA;;cAG/B,WAAA,GAAe,KAAA,UAAe,IAAA;;cAiB9B,+BAAA,GAAmC,QAAA;;;;;;;;;;;;;;AdwDhD;;;;;AAcA;;;;;AAAgE;;cctBnD,cAAA;EdgCK,iDAiBd;EAAA,2BAeK;EAAA,0BAWJ;EAAA;yBAAkB;EAAA,wBAYZ;EAAA,yKA2CP;EAAA,6BAoBwB;EAAA,wBA+BhB;EAAA;2BAA0B;EAAA,qBAA+B;EAAA,uBAA7C;EAAA,oCAA8B;EAAA,sCAAe;EAAA;;;;;;;;;;;ACrXrE;;;;;AAyBA;;cakNa,cAAA,GAAkB,KAAA;;;Ab5L/B;;;;KauMY,YAAA;EbjMN;;;;;EauMF,OAAA;EbvMoB;;AA0BxB;;;EaoLI,YAAA;EblLA;;;;;EayLA,WAAA;AAAA;;;;;;;;;;;Ab9IJ;;;;;;;;;;;;;;;;;;;;;;;;;;ca4Qa,wBAAA,GACT,KAAA,UACA,cAAA,IAAkB,OAAA,qBAClB,aAAA;;;;;AZrcJ;;;;;AAEA;;;;;AAMA;;;;;;;;;AARA,cYgfa,YAAA,GAAgB,KAAA;;;;;;;;;;;;;;;;;;AZ9c7B;;;;cYqea,eAAA,GAAmB,QAAA,aAAgB,MAAA;;;;;;;;;;;;;cAqBnC,kBAAA,QAAyB,gBAAA;AX9gBtC;;;;;;;;;;;AA4CA;;;AA5CA,cW8hBa,eAAA,GAAmB,SAAA,EAAW,gBAAA;;AXvd3C;;;;;AAgBA;;;;;AAwBA;;;cW6ca,oBAAA,GAAwB,QAAA;;;;KAWzB,YAAA;EAAiB,KAAA;EAAe,IAAA;AAAA;;AVlkB5C;;;;;;;;;;;;;;;;;cUslBa,kBAAA,GAAsB,QAAA,UAAkB,QAAA,EAAU,YAAA;;;AVxkB/D;;;;;AAEA;;;;;;;;cUmmBa,kBAAA,GAAsB,QAAA;;;;;;AftgBnC;;;;;;;;AChIiE;;;;;AA8BxD;;;;;AA4BG;;cenBC,sBAAA,GAA0B,OAAA;;;AfiDrB;;;;;AAiCC;;;;;AAiCnB;ce1Da,WAAA,GAAe,CAAA;;;;;;;;;;cAwBf,4BAAA,GAAgC,IAAA;AAAA,cAiBhC,wBAAA,GAA4B,IAAA;;;KCzC7B,iBAAA;EjB3E+C;;;;;EiBiFvD,mBAAA;AAAA;;;AjB0BJ;;;;;;;;AChIiE;;;;cgB0iBpD,gBAAA,GACT,KAAA,EAAO,IAAA,IACP,OAAA,EAAS,mBAAA,EACT,QAAA,EAAU,OAAA,IACV,iBAAA,GAAoB,iBAAA,KACrB,uBAAA"}
package/dist/index.mjs CHANGED
@@ -1300,7 +1300,8 @@ const PATTERN_TYPE_KEYS = [
1300
1300
  "lineStartsAfter",
1301
1301
  "lineEndsWith",
1302
1302
  "template",
1303
- "regex"
1303
+ "regex",
1304
+ "dictionaryEntry"
1304
1305
  ];
1305
1306
  //#endregion
1306
1307
  //#region src/optimization/optimize-rules.ts
@@ -1319,11 +1320,17 @@ const getPatternArray = (rule, key) => {
1319
1320
  };
1320
1321
  const getPatternString = (rule, key) => {
1321
1322
  const value = rule[key];
1322
- return typeof value === "string" ? value : Array.isArray(value) ? value.join("\n") : "";
1323
+ return typeof value === "string" ? value : Array.isArray(value) ? value.join("\n") : value ? JSON.stringify(value) : "";
1323
1324
  };
1324
1325
  const normalizePatterns = (patterns) => [...new Set(patterns)].sort((a, b) => b.length - a.length || a.localeCompare(b));
1326
+ const getDictionaryEntrySpecificityScore = (rule) => {
1327
+ if (!("dictionaryEntry" in rule)) return 0;
1328
+ const { allowCommaSeparated = false, allowParenthesized = false, allowWhitespaceBeforeColon = false, maxLetters = 10, midLineSubentries = true, minLetters = 2, stopWords } = rule.dictionaryEntry;
1329
+ return minLetters * 20 + maxLetters + (allowCommaSeparated ? 0 : 120) + (allowParenthesized ? 0 : 60) + (allowWhitespaceBeforeColon ? 0 : 20) + (midLineSubentries ? 0 : 160) + Math.min(stopWords.length, 25);
1330
+ };
1325
1331
  const getSpecificityScore = (rule) => {
1326
1332
  const key = getPatternKey(rule);
1333
+ if (key === "dictionaryEntry") return getDictionaryEntrySpecificityScore(rule);
1327
1334
  return MERGEABLE_KEYS.has(key) ? getPatternArray(rule, key).reduce((max, p) => Math.max(max, p.length), 0) : getPatternString(rule, key).length;
1328
1335
  };
1329
1336
  const createMergeKey = (rule) => {
@@ -1470,21 +1477,21 @@ const applyPreprocessToPage = (content, pageId, transforms) => {
1470
1477
  };
1471
1478
  //#endregion
1472
1479
  //#region src/segmentation/arabic-dictionary-rule.ts
1473
- const uniqueNormalizedWords = (words) => {
1480
+ const uniqueCanonicalWords = (words) => {
1474
1481
  const seen = /* @__PURE__ */ new Set();
1475
1482
  const result = [];
1476
1483
  for (const word of words) {
1477
1484
  const normalized = normalizeArabicForComparison(word);
1478
1485
  if (!normalized || seen.has(normalized)) continue;
1479
1486
  seen.add(normalized);
1480
- result.push(normalized);
1487
+ result.push(word);
1481
1488
  }
1482
1489
  return result;
1483
1490
  };
1484
1491
  const buildStopAlternation = (stopWords) => {
1485
- const unique = uniqueNormalizedWords(stopWords);
1492
+ const unique = uniqueCanonicalWords(stopWords);
1486
1493
  if (unique.length === 0) return "";
1487
- return unique.map((word) => makeDiacriticInsensitive(word)).join("|");
1494
+ return unique.map((word) => makeDiacriticInsensitive(normalizeArabicForComparison(word))).join("|");
1488
1495
  };
1489
1496
  const buildHeadwordBody = ({ allowCommaSeparated, colonPattern, stopAlternation, stopwordBody, unit }) => {
1490
1497
  if (!stopAlternation) return allowCommaSeparated ? `${unit}(?:\\s*[،,]\\s*${unit})*` : unit;
@@ -1493,20 +1500,53 @@ const buildHeadwordBody = ({ allowCommaSeparated, colonPattern, stopAlternation,
1493
1500
  };
1494
1501
  const buildBalancedMarker = ({ allowParenthesized, allowWhitespaceBeforeColon, captureName, headwordBody }) => {
1495
1502
  const colon = allowWhitespaceBeforeColon ? "\\s*:" : ":";
1496
- const withCapture = captureName ? `(?<${captureName}>${headwordBody})` : `(?:${headwordBody})`;
1503
+ const withCapture = `(?<${captureName}>${headwordBody})`;
1497
1504
  if (!allowParenthesized) return `${withCapture}${colon}`;
1498
1505
  return `(?:\\(\\s*${withCapture}\\s*\\)|${withCapture})${colon}`;
1499
1506
  };
1507
+ const validateDictionaryEntryOptions = ({ captureName = "lemma", maxLetters = 10, minLetters = 2 }) => {
1508
+ if (!Number.isInteger(minLetters) || minLetters < 1) throw new Error(`createArabicDictionaryEntryRule: minLetters must be an integer >= 1, got ${minLetters}`);
1509
+ if (!Number.isInteger(maxLetters) || maxLetters < minLetters) throw new Error(`createArabicDictionaryEntryRule: maxLetters must be an integer >= minLetters, got ${maxLetters}`);
1510
+ if (!captureName.match(/^[A-Za-z_]\w*$/)) throw new Error(`createArabicDictionaryEntryRule: invalid captureName "${captureName}"`);
1511
+ };
1512
+ const buildArabicDictionaryEntryRegexSource = ({ allowCommaSeparated = false, allowParenthesized = false, allowWhitespaceBeforeColon = false, captureName = "lemma", maxLetters = 10, midLineSubentries = true, minLetters = 2, stopWords }, capturePrefix) => {
1513
+ validateDictionaryEntryOptions({
1514
+ captureName,
1515
+ maxLetters,
1516
+ minLetters
1517
+ });
1518
+ const zeroWidthPrefix = "[\\u200E\\u200F\\u061C\\u200B\\u200C\\u200D\\uFEFF]*";
1519
+ const wawWithMarks = `و${ARABIC_MARKS_CLASS}*`;
1520
+ const alWithMarks = `ا${ARABIC_MARKS_CLASS}*ل${ARABIC_MARKS_CLASS}*`;
1521
+ const lemmaUnit = `(?:${wawWithMarks})?(?:${alWithMarks})?${`${ARABIC_LETTER_WITH_OPTIONAL_MARKS_PATTERN}(?:${ARABIC_LETTER_WITH_OPTIONAL_MARKS_PATTERN}){${minLetters - 1},${maxLetters - 1}}`}`;
1522
+ const stopAlternation = buildStopAlternation(stopWords);
1523
+ const lemmaBody = buildHeadwordBody({
1524
+ allowCommaSeparated,
1525
+ colonPattern: allowWhitespaceBeforeColon ? "\\s*:" : ":",
1526
+ stopAlternation,
1527
+ stopwordBody: stopAlternation ? `(?:${wawWithMarks})?(?:${stopAlternation})` : "",
1528
+ unit: lemmaUnit
1529
+ });
1530
+ const lineStartBoundary = `(?:(?<=^)|(?<=\\n))${zeroWidthPrefix}`;
1531
+ const midLineTrigger = allowParenthesized ? `(?<=\\s)(?=(?:\\(\\s*)?${wawWithMarks}(?:${alWithMarks})?)` : `(?<=\\s)(?=${wawWithMarks}(?:${alWithMarks})?)`;
1532
+ const prefixedCaptureName = capturePrefix ? `${capturePrefix}${captureName}` : captureName;
1533
+ const regex = `(?:${lineStartBoundary}${midLineSubentries ? `|${midLineTrigger}` : ""})` + buildBalancedMarker({
1534
+ allowParenthesized,
1535
+ allowWhitespaceBeforeColon,
1536
+ captureName: prefixedCaptureName,
1537
+ headwordBody: lemmaBody
1538
+ });
1539
+ return {
1540
+ captureNames: [prefixedCaptureName],
1541
+ regex
1542
+ };
1543
+ };
1500
1544
  /**
1501
1545
  * Creates a reusable split rule for Arabic dictionary entries.
1502
1546
  *
1503
- * The generated rule:
1504
- * - keeps the lemma marker in `segment.content`
1505
- * - stores the lemma in `segment.meta[captureName]`
1506
- * - matches root entries at true line/page starts
1507
- * - matches mid-line subentries conservatively when they begin with `و`
1508
- * - can optionally support parenthesized headwords like `(عنبر) :`
1509
- * - can optionally support comma-separated headword lists like `سبد، دبس:`
1547
+ * The returned rule preserves authoring intent as a serializable
1548
+ * `{ dictionaryEntry: ... }` pattern rather than eagerly compiling to a raw
1549
+ * regex string.
1510
1550
  *
1511
1551
  * @example
1512
1552
  * createArabicDictionaryEntryRule({
@@ -1522,33 +1562,26 @@ const buildBalancedMarker = ({ allowParenthesized, allowWhitespaceBeforeColon, c
1522
1562
  * stopWords: ['الليث', 'العجاج'],
1523
1563
  * })
1524
1564
  */
1525
- const createArabicDictionaryEntryRule = ({ allowCommaSeparated = false, allowParenthesized = false, allowWhitespaceBeforeColon = false, captureName = "lemma", maxLetters = 10, meta, minLetters = 2, pageStartPrevWordStoplist, samePagePrevWordStoplist, stopWords }) => {
1526
- if (!Number.isInteger(minLetters) || minLetters < 1) throw new Error(`createArabicDictionaryEntryRule: minLetters must be an integer >= 1, got ${minLetters}`);
1527
- if (!Number.isInteger(maxLetters) || maxLetters < minLetters) throw new Error(`createArabicDictionaryEntryRule: maxLetters must be an integer >= minLetters, got ${maxLetters}`);
1528
- if (!captureName.match(/^[A-Za-z_]\w*$/)) throw new Error(`createArabicDictionaryEntryRule: invalid captureName "${captureName}"`);
1529
- const zeroWidthPrefix = "[\\u200E\\u200F\\u061C\\u200B\\u200C\\u200D\\uFEFF]*";
1530
- const wawWithMarks = `و${ARABIC_MARKS_CLASS}*`;
1531
- const alWithMarks = `ا${ARABIC_MARKS_CLASS}*ل${ARABIC_MARKS_CLASS}*`;
1532
- const lemmaUnit = `(?:${wawWithMarks})?(?:${alWithMarks})?${`${ARABIC_LETTER_WITH_OPTIONAL_MARKS_PATTERN}(?:${ARABIC_LETTER_WITH_OPTIONAL_MARKS_PATTERN}){${minLetters - 1},${maxLetters - 1}}`}`;
1533
- const stopAlternation = buildStopAlternation(stopWords);
1534
- const lemmaBody = buildHeadwordBody({
1535
- allowCommaSeparated,
1536
- colonPattern: allowWhitespaceBeforeColon ? "\\s*:" : ":",
1537
- stopAlternation,
1538
- stopwordBody: stopAlternation ? `(?:${wawWithMarks})?(?:${stopAlternation})` : "",
1539
- unit: lemmaUnit
1565
+ const createArabicDictionaryEntryRule = ({ allowCommaSeparated = false, allowParenthesized = false, allowWhitespaceBeforeColon = false, captureName = "lemma", maxLetters = 10, meta, midLineSubentries = true, minLetters = 2, pageStartPrevWordStoplist, samePagePrevWordStoplist, stopWords }) => {
1566
+ validateDictionaryEntryOptions({
1567
+ captureName,
1568
+ maxLetters,
1569
+ minLetters
1540
1570
  });
1541
1571
  return {
1542
- meta,
1543
- pageStartPrevWordStoplist,
1544
- regex: `(?:${`(?:(?<=^)|(?<=\\n))${zeroWidthPrefix}`}|${allowParenthesized ? `(?<=\\s)(?=(?:\\(\\s*)?${wawWithMarks}(?:${alWithMarks})?)` : `(?<=\\s)(?=${wawWithMarks}(?:${alWithMarks})?)`})` + buildBalancedMarker({
1572
+ dictionaryEntry: {
1573
+ allowCommaSeparated,
1545
1574
  allowParenthesized,
1546
1575
  allowWhitespaceBeforeColon,
1547
1576
  captureName,
1548
- headwordBody: lemmaBody
1549
- }),
1550
- samePagePrevWordStoplist,
1551
- split: "at"
1577
+ maxLetters,
1578
+ midLineSubentries,
1579
+ minLetters,
1580
+ stopWords: uniqueCanonicalWords(stopWords)
1581
+ },
1582
+ meta,
1583
+ pageStartPrevWordStoplist,
1584
+ samePagePrevWordStoplist
1552
1585
  };
1553
1586
  };
1554
1587
  const WINDOW_PREFIX_LENGTHS = [
@@ -2636,6 +2669,26 @@ const validateRegexRule = (rule, result) => {
2636
2669
  return true;
2637
2670
  }
2638
2671
  };
2672
+ const invalidDictionaryEntryIssue = (message) => ({
2673
+ message,
2674
+ type: "invalid_option"
2675
+ });
2676
+ const validateDictionaryEntryRule = (rule, result) => {
2677
+ if (!("dictionaryEntry" in rule) || !rule.dictionaryEntry) return false;
2678
+ const issues = {};
2679
+ const { allowCommaSeparated, allowParenthesized, allowWhitespaceBeforeColon, captureName, maxLetters, midLineSubentries, minLetters, stopWords } = rule.dictionaryEntry;
2680
+ if (!Array.isArray(stopWords) || stopWords.some((word) => typeof word !== "string" || !word.trim())) issues.stopWords = invalidDictionaryEntryIssue("stopWords must be a string[] with non-empty entries");
2681
+ if (allowCommaSeparated !== void 0 && typeof allowCommaSeparated !== "boolean") issues.allowCommaSeparated = invalidDictionaryEntryIssue("allowCommaSeparated must be a boolean");
2682
+ if (allowParenthesized !== void 0 && typeof allowParenthesized !== "boolean") issues.allowParenthesized = invalidDictionaryEntryIssue("allowParenthesized must be a boolean");
2683
+ if (allowWhitespaceBeforeColon !== void 0 && typeof allowWhitespaceBeforeColon !== "boolean") issues.allowWhitespaceBeforeColon = invalidDictionaryEntryIssue("allowWhitespaceBeforeColon must be a boolean");
2684
+ if (midLineSubentries !== void 0 && typeof midLineSubentries !== "boolean") issues.midLineSubentries = invalidDictionaryEntryIssue("midLineSubentries must be a boolean");
2685
+ if (captureName !== void 0 && !captureName.match(/^[A-Za-z_]\w*$/)) issues.captureName = invalidDictionaryEntryIssue(`captureName must match /^[A-Za-z_]\\w*$/, got "${captureName}"`);
2686
+ if (minLetters !== void 0 && (!Number.isInteger(minLetters) || minLetters < 1)) issues.minLetters = invalidDictionaryEntryIssue("minLetters must be an integer >= 1");
2687
+ if (maxLetters !== void 0 && (!Number.isInteger(maxLetters) || maxLetters < (minLetters ?? 2))) issues.maxLetters = invalidDictionaryEntryIssue(`maxLetters must be an integer >= ${minLetters ?? 2}`);
2688
+ if (Object.keys(issues).length === 0) return false;
2689
+ result.dictionaryEntry = issues;
2690
+ return true;
2691
+ };
2639
2692
  const formatValidationIssue = (_type, issue, loc) => {
2640
2693
  if (!issue) return null;
2641
2694
  if (issue.type === "missing_braces") return `${loc}: Missing {{}} around token "${issue.token}"`;
@@ -2670,7 +2723,8 @@ const validateRules = (rules) => rules.map((rule) => {
2670
2723
  const endsWithIssues = applyRulePatternValidation(result, "lineEndsWith", rule.lineEndsWith);
2671
2724
  const templateIssues = validateTemplateRule(rule, result);
2672
2725
  const regexIssues = validateRegexRule(rule, result);
2673
- return startsWithIssues || startsAfterIssues || endsWithIssues || templateIssues || regexIssues ? result : void 0;
2726
+ const dictionaryEntryIssues = validateDictionaryEntryRule(rule, result);
2727
+ return startsWithIssues || startsAfterIssues || endsWithIssues || templateIssues || regexIssues || dictionaryEntryIssues ? result : void 0;
2674
2728
  });
2675
2729
  /**
2676
2730
  * Formats a validation result array into a list of human-readable error messages.
@@ -2687,8 +2741,12 @@ const validateRules = (rules) => rules.map((rule) => {
2687
2741
  */
2688
2742
  const formatValidationReport = (results) => results.flatMap((result, i) => {
2689
2743
  if (!result) return [];
2690
- return Object.entries(result).flatMap(([type, issues]) => (Array.isArray(issues) ? issues : [issues]).map((issue) => formatValidationIssue(type, issue, `Rule ${i + 1}, ${type}`)).filter((msg) => msg !== null));
2744
+ return Object.entries(result).flatMap(([type, issues]) => formatValidationIssues(type, issues, i + 1));
2691
2745
  });
2746
+ const formatValidationIssues = (type, issues, ruleNumber) => {
2747
+ if (type === "dictionaryEntry" && issues && typeof issues === "object" && !Array.isArray(issues)) return Object.entries(issues).map(([field, issue]) => formatValidationIssue(type, issue, `Rule ${ruleNumber}, ${type}.${field}`)).filter((msg) => msg !== null);
2748
+ return (Array.isArray(issues) ? issues : [issues]).map((issue) => formatValidationIssue(type, issue, `Rule ${ruleNumber}, ${type}`)).filter((msg) => msg !== null);
2749
+ };
2692
2750
  //#endregion
2693
2751
  //#region src/segmentation/breakpoint-processor.ts
2694
2752
  const buildPageIdToIndexMap = (pageIds) => new Map(pageIds.map((id, i) => [id, i]));
@@ -3336,6 +3394,7 @@ const buildLineBasedRuleRegex = (rule, fuzzy, capturePrefix) => {
3336
3394
  if ("lineStartsWith" in rule && Array.isArray(rule.lineStartsWith) && rule.lineStartsWith.length > 0) return buildLineStartsWithRegexSource(rule.lineStartsWith, fuzzy, capturePrefix);
3337
3395
  if ("lineEndsWith" in rule && Array.isArray(rule.lineEndsWith) && rule.lineEndsWith.length > 0) return buildLineEndsWithRegexSource(rule.lineEndsWith, fuzzy, capturePrefix);
3338
3396
  if ("template" in rule && typeof rule.template === "string") return buildTemplateRegexSource(rule.template, capturePrefix);
3397
+ if ("dictionaryEntry" in rule && rule.dictionaryEntry) return buildArabicDictionaryEntryRegexSource(rule.dictionaryEntry, capturePrefix);
3339
3398
  return null;
3340
3399
  };
3341
3400
  /**
@@ -3358,7 +3417,7 @@ const buildRuleRegex = (rule, capturePrefix) => {
3358
3417
  let finalRegex = ruleRegexSource?.regex;
3359
3418
  let allCaptureNames = ruleRegexSource?.captureNames ?? [];
3360
3419
  if (!finalRegex && "regex" in rule && typeof rule.regex === "string") finalRegex = rule.regex;
3361
- if (!finalRegex) throw new Error("Rule must specify exactly one pattern type: regex, template, lineStartsWith, lineStartsAfter, or lineEndsWith");
3420
+ if (!finalRegex) throw new Error("Rule must specify exactly one pattern type: regex, template, lineStartsWith, lineStartsAfter, lineEndsWith, or dictionaryEntry");
3362
3421
  if (allCaptureNames.length === 0) allCaptureNames = extractNamedCaptureNames(finalRegex);
3363
3422
  return {
3364
3423
  captureNames: allCaptureNames,