flappa-doormal 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/AGENTS.md CHANGED
@@ -133,6 +133,36 @@ For patterns like `^٦٦٩٦ - (content)`, the content capture is the *last* pos
133
133
  // Solution: Iterate backward from m.length-1 to find last defined capture
134
134
  ```
135
135
 
136
+ ### Auto-Escaping Brackets in Templates
137
+
138
+ Template patterns (`lineStartsWith`, `lineStartsAfter`, `lineEndsWith`, `template`) automatically escape `()[]` characters that appear **outside** of `{{token}}` delimiters. This allows intuitive patterns without manual escaping.
139
+
140
+ **Processing order:**
141
+ 1. `escapeTemplateBrackets()` escapes `()[]` outside `{{...}}`
142
+ 2. `expandTokensWithCaptures()` expands tokens to regex patterns
143
+ 3. Fuzzy transform applied (if enabled)
144
+
145
+ ```
146
+ Input: "({{harf}}): "
147
+ Step 1: "\({{harf}}\): " (brackets escaped)
148
+ Step 2: "\([أ-ي]\): " (token expanded - its [] preserved)
149
+ ```
150
+
151
+ **Implementation in `tokens.ts`:**
152
+ ```typescript
153
+ export const escapeTemplateBrackets = (pattern: string): string => {
154
+ return pattern.replace(/(\{\{[^}]*\}\})|([()[\]])/g, (match, token, bracket) => {
155
+ if (token) return token; // Preserve {{tokens}}
156
+ return `\\${bracket}`; // Escape brackets
157
+ });
158
+ };
159
+ ```
160
+
161
+ **Where escaping is applied:**
162
+ - `processPattern()` - handles `lineStartsWith`, `lineStartsAfter`, `lineEndsWith`
163
+ - Direct `template` processing in `buildRuleRegex()`
164
+ - **NOT** applied to `regex` patterns (user has full control)
165
+
136
166
  ### Breakpoints Post-Processing Algorithm
137
167
 
138
168
  The `breakpoints` option provides a post-processing mechanism for limiting segment size. Unlike the deprecated `maxSpan` (which was per-rule), breakpoints runs AFTER all structural rules.
@@ -291,6 +321,8 @@ bunx biome lint .
291
321
 
292
322
  8. **Escaping in tests requires care**: TypeScript string `'\\.'` creates regex `\.`, but regex literal `/\./` is already escaped. Double-backslash in strings, single in literals.
293
323
 
324
+ 9. **Auto-escaping improves DX significantly**: Users expect `(أ):` to match literal parentheses. Auto-escaping `()[]` in template patterns (but not `regex`) gives intuitive behavior while preserving power-user escape hatch.
325
+
294
326
  ### Architecture Insights
295
327
 
296
328
  - **Declarative > Imperative**: Users describe patterns, library handles regex
package/README.md CHANGED
@@ -143,7 +143,31 @@ const rules = [{
143
143
  | `template` | Depends | Custom pattern with full control |
144
144
  | `regex` | Depends | Raw regex for complex cases |
145
145
 
146
- ### 5. Page Constraints
146
+ ### 5. Auto-Escaping Brackets
147
+
148
+ In `lineStartsWith`, `lineStartsAfter`, `lineEndsWith`, and `template` patterns, parentheses `()` and square brackets `[]` are **automatically escaped**. This means you can write intuitive patterns without manual escaping:
149
+
150
+ ```typescript
151
+ // Write this (clean and readable):
152
+ { lineStartsAfter: ['({{harf}}): '], split: 'at' }
153
+
154
+ // Instead of this (verbose escaping):
155
+ { lineStartsAfter: ['\\({{harf}}\\): '], split: 'at' }
156
+ ```
157
+
158
+ **Important**: Brackets inside `{{tokens}}` are NOT escaped - token patterns like `{{harf}}` which expand to `[أ-ي]` work correctly.
159
+
160
+ For full regex control (character classes, capturing groups), use the `regex` pattern type which does NOT auto-escape:
161
+
162
+ ```typescript
163
+ // Character class [أب] matches أ or ب
164
+ { regex: '^[أب] ', split: 'at' }
165
+
166
+ // Capturing group (test|text) matches either
167
+ { regex: '^(test|text) ', split: 'at' }
168
+ ```
169
+
170
+ ### 6. Page Constraints
147
171
 
148
172
  Limit rules to specific page ranges:
149
173
 
@@ -156,7 +180,7 @@ Limit rules to specific page ranges:
156
180
  }
157
181
  ```
158
182
 
159
- ### 6. Occurrence Filtering
183
+ ### 7. Occurrence Filtering
160
184
 
161
185
  Control which matches to use:
162
186
 
@@ -395,30 +419,81 @@ console.log(TOKEN_PATTERNS.narrated);
395
419
  // 'حدثنا|أخبرنا|حدثني|وحدثنا|أنبأنا|سمعت'
396
420
  ```
397
421
 
398
- ### Pattern Detection
422
+ ### Pattern Detection Utilities
423
+
424
+ These functions help auto-detect tokens in text, useful for building UI tools that suggest rule configurations from user-highlighted text.
399
425
 
400
- Auto-detect tokens in Arabic text for building rules:
426
+ #### `detectTokenPatterns(text)`
427
+
428
+ Analyzes text and returns all detected token patterns with their positions.
401
429
 
402
430
  ```typescript
403
- import { detectTokenPatterns, analyzeTextForRule } from 'flappa-doormal';
431
+ import { detectTokenPatterns } from 'flappa-doormal';
404
432
 
405
- // Detect individual tokens
406
- const tokens = detectTokenPatterns('٣٤ - حدثنا');
433
+ const detected = detectTokenPatterns("٣٤ - حدثنا");
434
+ // Returns:
407
435
  // [
408
436
  // { token: 'raqms', match: '٣٤', index: 0, endIndex: 2 },
409
437
  // { token: 'dash', match: '-', index: 3, endIndex: 4 },
410
438
  // { token: 'naql', match: 'حدثنا', index: 5, endIndex: 10 }
411
439
  // ]
440
+ ```
441
+
442
+ #### `generateTemplateFromText(text, detected)`
443
+
444
+ Converts text to a template string using detected patterns.
445
+
446
+ ```typescript
447
+ import { detectTokenPatterns, generateTemplateFromText } from 'flappa-doormal';
448
+
449
+ const text = "٣٤ - ";
450
+ const detected = detectTokenPatterns(text);
451
+ const template = generateTemplateFromText(text, detected);
452
+ // Returns: "{{raqms}} {{dash}} "
453
+ ```
454
+
455
+ #### `suggestPatternConfig(detected)`
456
+
457
+ Suggests the best pattern type and options based on detected patterns.
458
+
459
+ ```typescript
460
+ import { detectTokenPatterns, suggestPatternConfig } from 'flappa-doormal';
461
+
462
+ // For numbered patterns (hadith-style)
463
+ const hadithDetected = detectTokenPatterns("٣٤ - ");
464
+ suggestPatternConfig(hadithDetected);
465
+ // Returns: { patternType: 'lineStartsAfter', fuzzy: false, metaType: 'hadith' }
412
466
 
413
- // Get complete rule suggestion
414
- const rule = analyzeTextForRule('٣٤ - ');
467
+ // For structural patterns (chapter markers)
468
+ const chapterDetected = detectTokenPatterns("باب الصلاة");
469
+ suggestPatternConfig(chapterDetected);
470
+ // Returns: { patternType: 'lineStartsWith', fuzzy: true, metaType: 'bab' }
471
+ ```
472
+
473
+ #### `analyzeTextForRule(text)`
474
+
475
+ Complete analysis that combines detection, template generation, and config suggestion.
476
+
477
+ ```typescript
478
+ import { analyzeTextForRule } from 'flappa-doormal';
479
+
480
+ const result = analyzeTextForRule("٣٤ - حدثنا");
481
+ // Returns:
415
482
  // {
416
- // template: '{{raqms}} {{dash}} ',
483
+ // template: "{{raqms}} {{dash}} {{naql}}",
417
484
  // patternType: 'lineStartsAfter',
418
485
  // fuzzy: false,
419
486
  // metaType: 'hadith',
420
487
  // detected: [...]
421
488
  // }
489
+
490
+ // Use the result to build a rule:
491
+ const rule = {
492
+ [result.patternType]: [result.template],
493
+ split: 'at',
494
+ fuzzy: result.fuzzy,
495
+ meta: { type: result.metaType }
496
+ };
422
497
  ```
423
498
 
424
499
  ## Types
@@ -459,56 +534,19 @@ type Segment = {
459
534
  };
460
535
  ```
461
536
 
462
- ### `Logger`
537
+ ### `DetectedPattern`
463
538
 
464
- Optional logging interface for debugging segmentation:
539
+ Result from pattern detection utilities.
465
540
 
466
541
  ```typescript
467
- interface Logger {
468
- trace?: (message: string, ...args: unknown[]) => void; // Per-iteration details
469
- debug?: (message: string, ...args: unknown[]) => void; // Detailed operations
470
- info?: (message: string, ...args: unknown[]) => void; // Key progress points
471
- warn?: (message: string, ...args: unknown[]) => void; // Potential issues
472
- error?: (message: string, ...args: unknown[]) => void; // Critical failures
473
- }
474
- ```
475
-
476
- ## Debugging
477
-
478
- ### Using the Logger
479
-
480
- Pass a `logger` option to receive detailed information about the segmentation process:
481
-
482
- ```typescript
483
- // Console logger for development
484
- const segments = segmentPages(pages, {
485
- rules: [...],
486
- logger: {
487
- debug: console.debug,
488
- info: console.info,
489
- warn: console.warn,
490
- }
491
- });
492
-
493
- // Production logger (only errors)
494
- const segments = segmentPages(pages, {
495
- rules: [...],
496
- logger: {
497
- error: (msg, ...args) => myLoggingService.error(msg, args),
498
- }
499
- });
542
+ type DetectedPattern = {
543
+ token: string; // Token name (e.g., 'raqms', 'dash')
544
+ match: string; // The matched text
545
+ index: number; // Start index in original text
546
+ endIndex: number; // End index (exclusive)
547
+ };
500
548
  ```
501
549
 
502
- **Verbosity levels:**
503
- - `trace` - Per-iteration loop details (very verbose)
504
- - `debug` - Segment processing, pattern matching
505
- - `info` - Start/completion of breakpoint processing
506
- - `warn` - Safety checks triggered
507
- - `error` - Infinite loop detection
508
-
509
- When no logger is provided, no logging overhead is incurred.
510
-
511
-
512
550
  ## Usage with Next.js / Node.js
513
551
 
514
552
  ```typescript
@@ -550,7 +588,7 @@ console.log(`Found ${segments.length} segments`);
550
588
  # Install dependencies
551
589
  bun install
552
590
 
553
- # Run tests (251 tests)
591
+ # Run tests (222 tests)
554
592
  bun test
555
593
 
556
594
  # Build
package/dist/index.d.mts CHANGED
@@ -67,22 +67,33 @@ declare const makeDiacriticInsensitive: (text: string) => string;
67
67
  //#endregion
68
68
  //#region src/segmentation/types.d.ts
69
69
  /**
70
- * Literal regex pattern rule - no token expansion is applied.
70
+ * Literal regex pattern rule - no token expansion or auto-escaping is applied.
71
+ *
72
+ * Use this when you need full control over the regex pattern, including:
73
+ * - Character classes like `[أب]` to match أ or ب
74
+ * - Capturing groups like `(test|text)` for alternation
75
+ * - Any other regex syntax that would be escaped in template patterns
71
76
  *
72
- * Use this when you need full control over the regex pattern.
73
77
  * If the regex contains capturing groups, the captured content
74
78
  * will be used as the segment content.
75
79
  *
80
+ * **Note**: Unlike `template`, `lineStartsWith`, etc., this pattern type
81
+ * does NOT auto-escape `()[]`. You have full regex control.
82
+ *
76
83
  * @example
77
84
  * // Match Arabic-Indic numbers followed by a dash
78
85
  * { regex: '^[٠-٩]+ - ', split: 'at' }
79
86
  *
80
87
  * @example
88
+ * // Character class - matches أ or ب
89
+ * { regex: '^[أب] ', split: 'at' }
90
+ *
91
+ * @example
81
92
  * // Capture group - content after the marker becomes segment content
82
93
  * { regex: '^[٠-٩]+ - (.*)', split: 'at' }
83
94
  */
84
95
  type RegexPattern = {
85
- /** Raw regex pattern string (no token expansion) */
96
+ /** Raw regex pattern string (no token expansion, no auto-escaping) */
86
97
  regex: string;
87
98
  };
88
99
  /**
@@ -90,6 +101,10 @@ type RegexPattern = {
90
101
  *
91
102
  * Supports all tokens defined in `TOKEN_PATTERNS` and named capture syntax.
92
103
  *
104
+ * **Auto-escaping**: Parentheses `()` and square brackets `[]` outside of
105
+ * `{{tokens}}` are automatically escaped. Write `({{harf}}):` instead of
106
+ * `\\({{harf}}\\):`. For raw regex control, use `regex` pattern type.
107
+ *
93
108
  * @example
94
109
  * // Using tokens for Arabic-Indic digits
95
110
  * { template: '^{{raqms}} {{dash}}', split: 'at' }
@@ -98,10 +113,14 @@ type RegexPattern = {
98
113
  * // Named capture to extract hadith number into metadata
99
114
  * { template: '^{{raqms:hadithNum}} {{dash}}', split: 'at' }
100
115
  *
116
+ * @example
117
+ * // Auto-escaped brackets - matches literal (أ):
118
+ * { template: '^({{harf}}): ', split: 'at' }
119
+ *
101
120
  * @see TOKEN_PATTERNS for available tokens
102
121
  */
103
122
  type TemplatePattern = {
104
- /** Template string with `{{token}}` or `{{token:name}}` placeholders */
123
+ /** Template string with `{{token}}` or `{{token:name}}` placeholders. Brackets `()[]` are auto-escaped. */
105
124
  template: string;
106
125
  };
107
126
  /**
@@ -113,6 +132,10 @@ type TemplatePattern = {
113
132
  * Token expansion is applied to each pattern. Use `fuzzy: true` for
114
133
  * diacritic-insensitive Arabic matching.
115
134
  *
135
+ * **Auto-escaping**: Parentheses `()` and square brackets `[]` outside of
136
+ * `{{tokens}}` are automatically escaped. Write `({{harf}})` instead of
137
+ * `\\({{harf}}\\)`. For raw regex control, use `regex` pattern type.
138
+ *
116
139
  * @example
117
140
  * // Split at chapter headings (marker included in content)
118
141
  * { lineStartsWith: ['## ', '### '], split: 'at' }
@@ -120,9 +143,13 @@ type TemplatePattern = {
120
143
  * @example
121
144
  * // Split at Arabic book/chapter markers with fuzzy matching
122
145
  * { lineStartsWith: ['{{kitab}}', '{{bab}}'], split: 'at', fuzzy: true }
146
+ *
147
+ * @example
148
+ * // Auto-escaped brackets - matches literal (أ)
149
+ * { lineStartsWith: ['({{harf}}) '], split: 'at' }
123
150
  */
124
151
  type LineStartsWithPattern = {
125
- /** Array of patterns that mark line beginnings (marker included in content) */
152
+ /** Array of patterns that mark line beginnings (marker included in content). Brackets `()[]` are auto-escaped. */
126
153
  lineStartsWith: string[];
127
154
  };
128
155
  /**
@@ -136,6 +163,10 @@ type LineStartsWithPattern = {
136
163
  * Token expansion is applied to each pattern. Use `fuzzy: true` for
137
164
  * diacritic-insensitive Arabic matching.
138
165
  *
166
+ * **Auto-escaping**: Parentheses `()` and square brackets `[]` outside of
167
+ * `{{tokens}}` are automatically escaped. Write `({{harf}}):` instead of
168
+ * `\\({{harf}}\\):`. For raw regex control, use `regex` pattern type.
169
+ *
139
170
  * @example
140
171
  * // Split at numbered hadiths, capturing content without the number prefix
141
172
  * // Content extends to next split, not just end of that line
@@ -144,9 +175,13 @@ type LineStartsWithPattern = {
144
175
  * @example
145
176
  * // Extract hadith number to metadata while stripping the prefix
146
177
  * { lineStartsAfter: ['{{raqms:num}} {{dash}} '], split: 'at' }
178
+ *
179
+ * @example
180
+ * // Auto-escaped brackets - matches literal (أ): prefix
181
+ * { lineStartsAfter: ['({{harf}}): '], split: 'at' }
147
182
  */
148
183
  type LineStartsAfterPattern = {
149
- /** Array of patterns that mark line beginnings (marker excluded from content) */
184
+ /** Array of patterns that mark line beginnings (marker excluded from content). Brackets `()[]` are auto-escaped. */
150
185
  lineStartsAfter: string[];
151
186
  };
152
187
  /**
@@ -157,12 +192,19 @@ type LineStartsAfterPattern = {
157
192
  * Token expansion is applied to each pattern. Use `fuzzy: true` for
158
193
  * diacritic-insensitive Arabic matching.
159
194
  *
195
+ * **Auto-escaping**: Parentheses `()` and square brackets `[]` outside of
196
+ * `{{tokens}}` are automatically escaped. For raw regex control, use `regex` pattern type.
197
+ *
160
198
  * @example
161
199
  * // Split at lines ending with Arabic sentence-ending punctuation
162
200
  * { lineEndsWith: ['۔', '؟', '!'], split: 'after' }
201
+ *
202
+ * @example
203
+ * // Auto-escaped brackets - matches literal (انتهى) suffix
204
+ * { lineEndsWith: ['(انتهى)'], split: 'after' }
163
205
  */
164
206
  type LineEndsWithPattern = {
165
- /** Array of patterns that mark line endings */
207
+ /** Array of patterns that mark line endings. Brackets `()[]` are auto-escaped. */
166
208
  lineEndsWith: string[];
167
209
  };
168
210
  /**
@@ -749,6 +791,51 @@ declare const normalizeLineEndings: (content: string) => string;
749
791
  * expandTokensWithCaptures('{{raqms:num}} {{dash}}')
750
792
  * // → { pattern: '(?<num>[\\u0660-\\u0669]+) [-–—ـ]', captureNames: ['num'], hasCaptures: true }
751
793
  */
794
+ /**
795
+ * Token definitions mapping human-readable token names to regex patterns.
796
+ *
797
+ * Tokens are used in template strings with double-brace syntax:
798
+ * - `{{token}}` - Expands to the pattern (non-capturing in context)
799
+ * - `{{token:name}}` - Expands to a named capture group `(?<name>pattern)`
800
+ * - `{{:name}}` - Captures any content with the given name `(?<name>.+)`
801
+ *
802
+ * @remarks
803
+ * These patterns are designed for Arabic text matching. For diacritic-insensitive
804
+ * matching of Arabic patterns, use the `fuzzy: true` option in split rules,
805
+ * which applies `makeDiacriticInsensitive()` to the expanded patterns.
806
+ *
807
+ * @example
808
+ * // Using tokens in a split rule
809
+ * { lineStartsWith: ['{{kitab}}', '{{bab}}'], split: 'at', fuzzy: true }
810
+ *
811
+ * @example
812
+ * // Using tokens with named captures
813
+ * { lineStartsAfter: ['{{raqms:hadithNum}} {{dash}} '], split: 'at' }
814
+ */
815
+ /**
816
+ * Escapes regex metacharacters (parentheses and brackets) in template patterns,
817
+ * but preserves content inside `{{...}}` token delimiters.
818
+ *
819
+ * This allows users to write intuitive patterns like `({{harf}}):` instead of
820
+ * the verbose `\\({{harf}}\\):`. The escaping is applied BEFORE token expansion,
821
+ * so tokens like `{{harf}}` which expand to `[أ-ي]` work correctly.
822
+ *
823
+ * @param pattern - Template pattern that may contain `()[]` and `{{tokens}}`
824
+ * @returns Pattern with `()[]` escaped outside of `{{...}}` delimiters
825
+ *
826
+ * @example
827
+ * escapeTemplateBrackets('({{harf}}): ')
828
+ * // → '\\({{harf}}\\): '
829
+ *
830
+ * @example
831
+ * escapeTemplateBrackets('[{{raqm}}] ')
832
+ * // → '\\[{{raqm}}\\] '
833
+ *
834
+ * @example
835
+ * escapeTemplateBrackets('{{harf}}')
836
+ * // → '{{harf}}' (unchanged - no brackets outside tokens)
837
+ */
838
+ declare const escapeTemplateBrackets: (pattern: string) => string;
752
839
  /**
753
840
  * Token definitions mapping human-readable token names to regex patterns.
754
841
  *
@@ -1000,5 +1087,5 @@ declare const analyzeTextForRule: (text: string) => {
1000
1087
  detected: DetectedPattern[];
1001
1088
  } | null;
1002
1089
  //#endregion
1003
- export { type Breakpoint, type BreakpointRule, type DetectedPattern, type ExpandResult, type Logger, type Page, type PageRange, type Segment, type SegmentationOptions, type SplitRule, TOKEN_PATTERNS, analyzeTextForRule, containsTokens, detectTokenPatterns, escapeRegex, expandTokens, expandTokensWithCaptures, generateTemplateFromText, getAvailableTokens, getTokenPattern, makeDiacriticInsensitive, normalizeLineEndings, segmentPages, stripHtmlTags, suggestPatternConfig, templateToRegex };
1090
+ export { type Breakpoint, type BreakpointRule, type DetectedPattern, type ExpandResult, type Logger, type Page, type PageRange, type Segment, type SegmentationOptions, type SplitRule, TOKEN_PATTERNS, analyzeTextForRule, containsTokens, detectTokenPatterns, escapeRegex, escapeTemplateBrackets, expandTokens, expandTokensWithCaptures, generateTemplateFromText, getAvailableTokens, getTokenPattern, makeDiacriticInsensitive, normalizeLineEndings, segmentPages, stripHtmlTags, suggestPatternConfig, templateToRegex };
1004
1091
  //# sourceMappingURL=index.d.mts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.mts","names":[],"sources":["../src/segmentation/fuzzy.ts","../src/segmentation/types.ts","../src/segmentation/segmenter.ts","../src/segmentation/textUtils.ts","../src/segmentation/tokens.ts","../src/pattern-detection.ts"],"sourcesContent":[],"mappings":";;AAkEA;AA+FA;;;;;AC9IiB;AAoBG;AAsBM;AAyBC;AAiBH;;;;;;;AAoBC;AA0FzB;AAAkD;AA4GlD;;;;;AAkBA;AAqCA;AA0EY,cDhYC,WCgYqB,EAAA,CAAA,CAAA,EAAA,MAAc,EAAA,GAAA,MAAA;AA8BhD;AAiDA;;;;;AAiHA;;;;AC2KA;;;;;;;;ACvyBA;AAaA;;;;AC6NA;AA2CA;AAWA;AA2DA;AAqGA;AAuBA;AAqBA;AAgBA;;;;ACtfA;AA0Da,cL2FA,wBK3FsC,EAAA,CAAA,IAAA,EAAe,MAAA,EAAA,GAAA,MAAA;;;;ALJlE;AA+FA;;;;;AC9IiB;AAoBG;AAsBM;AAyBC;AAiBH;;;;KApFnB,YAAA,GAuGC;EACA;EAAmB,KAAA,EAAA,MAAA;AAAA,CAAA;AA0FzB;AAAkD;AA4GlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;KA1eK,eAAA,GA0kBc;EAiBP;;;;AC2KZ;;;;;;;;ACvyBA;AAaA;;;;AC6NA;AA2CA;AAWA;AA2DA,KHpSK,qBAAA,GGoSQ;EAqGA;EAuBA,cAAA,EAAA,MAOZ,EAAA;AAcD,CAAA;AAgBA;;;;ACtfA;AA0DA;AA4DA;AAuBA;AAiCA;;;;;;;;;;;;KJpGK,sBAAA;;;;;;;;;;;;;;;;KAiBA,mBAAA;;;;;;;;;;;;;;KAeA,WAAA,GACC,eACA,kBACA,wBACA,yBACA;;;;;;;KAYD,aAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KA8EO,SAAA;;;;;;;KAYP,eAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAyCS;;;;;;;;;;;;SAaH;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KA0CC,SAAA,GAAY,cAAc,gBAAgB;;;;;;;;;;;;;KAkB1C,IAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAqCA,cAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAqCE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAqCF,UAAA,YAAsB;;;;;;;;;;;;;;;;;;;;;;;;;UA8BjB,MAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAiDL,mBAAA;;;;;;;;UAQA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;gBA8CM;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;WA0CL;;;;;;;;;;;;;;;;KAiBD,OAAA;;;;;;;;;;;;;;;;;;;;;;;;;;SA6BD;;;;AAzkBgB;AAiBH;;;;;;;AAoBC;AA0FzB;AAAkD;AA4GlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;;AAiHA;;;;AC2KA;;;;;;;;ACvyBA;AAaA;;;;AC6Na,cF6jBA,YE7jBgB,EAAA,CAAA,KAAM,EF6jBC,IE7jBD,EAAA,EAAA,OAAA,EF6jBkB,mBE7jBlB,EAAA,GF6jBwC,OE7jBxC,EAAA;;;;AJ9KnC;AA+FA;;;;AC9IK,cEbQ,aFaI,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GAAA,MAAA;AAAA;AAoBG;AAsBM;AAyBC;AAiBH;;;;;AAoBlB,cExGO,oBFwGP,EAAA,CAAA,OAAA,EAAA,MAAA,EAAA,GAAA,MAAA;;;;ADzDN;AA+FA;;;;;AC9IiB;AAoBG;AAsBM;AAyBC;AAiBH;;;;;;;AAoBC;AA0FzB;AAAkD;AA4GlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;;AAiHA;;;;AC2KA;;;;;cE7jBa,gBAAgB;;;AD1O7B;AAaA;;;;AC6NA;AA2CA;AAWA;AA2DA;AAqGA;AAuBA;AAqBA;AAgBA;cAvOa;;;AC/Qb;AA0DA;AA4DA;AAuBA;AAiCa,KD4GD,YAAA,GCzFX;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;cDoJY,2FAA0F;;;;;;;;;;;;;;;;;;;;cAqG1F;;;;;;;;;;;;;;;;;;;;;;cAuBA,uCAAsC;;;;;;;;;;;;;cAqBtC;;;;;;;;;;;;;;;cAgBA;;;;AJhcb;AA+FA;;;;;AC9IiB;AAoBG;AA+Cf,KI1EO,eAAA,GJ0Ee;EAiBtB;EAeA,KAAA,EAAA,MAAA;EACC;EACA,KAAA,EAAA,MAAA;EACA;EACA,KAAA,EAAA,MAAA;EACA;EAAmB,QAAA,EAAA,MAAA;AAAA,CAAA;AA0FzB;AAAkD;AA4GlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;AAgGmB,cI3iBN,mBJ2iBM,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GI3iBgC,eJ2iBhC,EAAA;AAiBnB;;;;AC2KA;;;;;;;;ACvyBA;AAaa,cE+GA,wBF/G2E,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,QAAA,EE+GvB,eF/GuB,EAAA,EAAA,GAAA,MAAA;;;;AC6NxF;AA2CA;AAWA;AA2Da,cCxMA,oBDwRZ,EAhFsG,CAAA,QAAA,ECvMzF,eDuRb,EAAA,EAAA,GAAA;EAqBY,WAAA,EAAA,gBAAiF,GAAA,iBAAA;EAuBjF,KAAA,EAAA,OAAA;EAqBA,QAAA,CAAA,EAAA,MAAA;AAgBb,CAAA;;;;ACtfA;AA0DA;AA4DA;AAuBa,cAiCA,kBAhCC,EAAA,CAAA,IAAA,EAAA,MAAe,EAAA,GAAA;EAgChB,QAAA,EAAA,MAAA;;;;YAOC"}
1
+ {"version":3,"file":"index.d.mts","names":[],"sources":["../src/segmentation/fuzzy.ts","../src/segmentation/types.ts","../src/segmentation/segmenter.ts","../src/segmentation/textUtils.ts","../src/segmentation/tokens.ts","../src/pattern-detection.ts"],"sourcesContent":[],"mappings":";;AAkEA;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA0FzB;AAAkD;AA4GlD;;;;;AAkBA;AAqCA;AA0EY,cD1aC,WC0aqB,EAAA,CAAA,CAAA,EAAA,MAAc,EAAA,GAAA,MAAA;AA8BhD;AAiDA;;;;;AAiHA;;;;ACoCA;;;;;;;;AC1sBA;AAaA;;;;ACiDA;AAkNA;AA2CA;AAWA;AA2DA;AAqGA;AAuBA;AAqBA;AAgBA;;;;AC5hBY,cLqJC,wBKrJc,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GAAA,MAAA;;;;ALsD3B;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA0FzB;AAAkD;AA4GlD;;;;;AAkBA,KA/VK,YAAA,GA+VW;EAqCJ;EA0EA,KAAA,EAAA,MAAU;AA8BtB,CAAA;AAiDA;;;;;AAiHA;;;;ACoCA;;;;;;;;AC1sBA;AAaA;;;;ACiDA;AAkNA,KH5NK,eAAA,GGgOJ;EAuCY;EAWD,QAAA,EAAA,MAAY;AA2DxB,CAAA;AAqGA;AAuBA;AAqBA;AAgBA;;;;AC5hBA;AA0DA;AA4DA;AAuBA;AAiCA;;;;;;;;;;;;;;KJlGK,qBAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAiCA,sBAAA;;;;;;;;;;;;;;;;;;;;;;;KAwBA,mBAAA;;;;;;;;;;;;;;KAeA,WAAA,GACC,eACA,kBACA,wBACA,yBACA;;;;;;;KAYD,aAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KA8EO,SAAA;;;;;;;KAYP,eAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAyCS;;;;;;;;;;;;SAaH;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KA0CC,SAAA,GAAY,cAAc,gBAAgB;;;;;;;;;;;;;KAkB1C,IAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAqCA,cAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAqCE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAqCF,UAAA,YAAsB;;;;;;;;;;;;;;;;;;;;;;;;;UA8BjB,MAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAiDL,mBAAA;;;;;;;;UAQA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;gBA8CM;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;WA0CL;;;;;;;;;;;;;;;;KAiBD,OAAA;;;;;;;;;;;;;;;;;;;;;;;;;;SA6BD;;;;AAhlBgB;AAwBH;;;;;;;AAoBC;AA0FzB;AAAkD;AA4GlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;;AAiHA;;;;ACoCA;;;;;;;;AC1sBA;AAaA;;;;ACiDa,cF4oBA,YEnoBZ,EAAA,CAAA,KAAA,EFmoBmC,IEnoBnC,EAAA,EAAA,OAAA,EFmoBoD,mBEnoBpD,EAAA,GFmoB0E,OEnoB1E,EAAA;;;;AJXD;AA+FA;;;;ACnIK,cExBQ,aFwBI,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GAAA,MAAA;AAAA;AA4BG;AA8BM;AAiCC;AAwBH;;;;;AAoBlB,cElJO,oBFkJP,EAAA,CAAA,OAAA,EAAA,MAAA,EAAA,GAAA,MAAA;;;;ADnGN;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA0FzB;AAAkD;AA4GlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;;AAiHA;;;;ACoCA;;;;;;;;AC1sBA;AAaA;;;;ACiDA;AAkNA;AA2CA;AAWA;AA2DA;AAqGA;AAuBA;AAqBA;AAgBA;;;cApea;ACxDb;AA0DA;AA4DA;AAuBA;AAiCA;;;;;;;;;;;;;;;;;;;;;cD4Fa,gBAAgB;;;;;;;;;;;;;;;;cA2ChB;;;;;;;KAWD,YAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;cA2DC,2FAA0F;;;;;;;;;;;;;;;;;;;;cAqG1F;;;;;;;;;;;;;;;;;;;;;;cAuBA,uCAAsC;;;;;;;;;;;;;cAqBtC;;;;;;;;;;;;;;;cAgBA;;;;AJteb;AA+FA;;;;;ACnIiB;AA4BG;AA+Df,KI7GO,eAAA,GJ6Ge;EAwBtB;EAeA,KAAA,EAAA,MAAA;EACC;EACA,KAAA,EAAA,MAAA;EACA;EACA,KAAA,EAAA,MAAA;EACA;EAAmB,QAAA,EAAA,MAAA;AAAA,CAAA;AA0FzB;AAAkD;AA4GlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;AAgGmB,cIrlBN,mBJqlBM,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GIrlBgC,eJqlBhC,EAAA;AAiBnB;;;;ACoCA;;;;;;;;AC1sBA;AAaa,cE+GA,wBF/G2E,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,QAAA,EE+GvB,eF/GuB,EAAA,EAAA,GAAA,MAAA;;;;ACiDxF;AAkNA;AA2CA;AAWY,cCnLC,oBDmLW,EAAA,CAAA,QAAA,EClLV,eDkLU,EAAA,EAAA,GAAA;EA2DX,WAAA,EAAA,gBAgFZ,GAAA,iBAhFsG;EAqG1F,KAAA,EAAA,OAAA;EAuBA,QAAA,CAAA,EAAA,MAAA;AAqBb,CAAA;AAgBA;;;;AC5hBA;AA0DA;AA4Da,cAwDA,kBAzCZ,EAAA,CAfgE,IAAA,EAAA,MAAA,EAAA,GAAe;EAuBnE,QAAA,EAAA,MAAA;EAiCA,WAAA,EAAA,gBAmBZ,GAZa,iBAAe;;;YAAf"}