npm - flappa-doormal - Versions diffs - 2.1.0 → 2.2.0 - Mend

flappa-doormal 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/AGENTS.md CHANGED Viewed

@@ -133,6 +133,36 @@ For patterns like `^٦٦٩٦ - (content)`, the content capture is the *last* pos
 // Solution: Iterate backward from m.length-1 to find last defined capture
 ```
+### Auto-Escaping Brackets in Templates
+Template patterns (`lineStartsWith`, `lineStartsAfter`, `lineEndsWith`, `template`) automatically escape `()[]` characters that appear **outside** of `{{token}}` delimiters. This allows intuitive patterns without manual escaping.
+**Processing order:**
+1. `escapeTemplateBrackets()` escapes `()[]` outside `{{...}}`
+2. `expandTokensWithCaptures()` expands tokens to regex patterns
+3. Fuzzy transform applied (if enabled)
+```
+Input:  "({{harf}}): "
+Step 1: "\({{harf}}\): "           (brackets escaped)
+Step 2: "\([أ-ي]\): "              (token expanded - its [] preserved)
+```
+**Implementation in `tokens.ts`:**
+```typescript
+export const escapeTemplateBrackets = (pattern: string): string => {
+    return pattern.replace(/(\{\{[^}]*\}\})|([()[\]])/g, (match, token, bracket) => {
+        if (token) return token;      // Preserve {{tokens}}
+        return `\\${bracket}`;        // Escape brackets
+    });
+};
+```
+**Where escaping is applied:**
+- `processPattern()` - handles `lineStartsWith`, `lineStartsAfter`, `lineEndsWith`
+- Direct `template` processing in `buildRuleRegex()`
+- **NOT** applied to `regex` patterns (user has full control)
 ### Breakpoints Post-Processing Algorithm
 The `breakpoints` option provides a post-processing mechanism for limiting segment size. Unlike the deprecated `maxSpan` (which was per-rule), breakpoints runs AFTER all structural rules.
@@ -291,6 +321,8 @@ bunx biome lint .
 8. **Escaping in tests requires care**: TypeScript string `'\\.'` creates regex `\.`, but regex literal `/\./` is already escaped. Double-backslash in strings, single in literals.
+9. **Auto-escaping improves DX significantly**: Users expect `(أ):` to match literal parentheses. Auto-escaping `()[]` in template patterns (but not `regex`) gives intuitive behavior while preserving power-user escape hatch.
 ### Architecture Insights
 - **Declarative > Imperative**: Users describe patterns, library handles regex

package/README.md CHANGED Viewed

@@ -143,7 +143,31 @@ const rules = [{
 | `template` | Depends | Custom pattern with full control |
 | `regex` | Depends | Raw regex for complex cases |
-### 5. Page Constraints
+### 5. Auto-Escaping Brackets
+In `lineStartsWith`, `lineStartsAfter`, `lineEndsWith`, and `template` patterns, parentheses `()` and square brackets `[]` are **automatically escaped**. This means you can write intuitive patterns without manual escaping:
+```typescript
+// Write this (clean and readable):
+{ lineStartsAfter: ['({{harf}}): '], split: 'at' }
+// Instead of this (verbose escaping):
+{ lineStartsAfter: ['\\({{harf}}\\): '], split: 'at' }
+```
+**Important**: Brackets inside `{{tokens}}` are NOT escaped - token patterns like `{{harf}}` which expand to `[أ-ي]` work correctly.
+For full regex control (character classes, capturing groups), use the `regex` pattern type which does NOT auto-escape:
+```typescript
+// Character class [أب] matches أ or ب
+{ regex: '^[أب] ', split: 'at' }
+// Capturing group (test|text) matches either
+{ regex: '^(test|text) ', split: 'at' }
+```
+### 6. Page Constraints
 Limit rules to specific page ranges:
@@ -156,7 +180,7 @@ Limit rules to specific page ranges:
 }
 ```
-### 6. Occurrence Filtering
+### 7. Occurrence Filtering
 Control which matches to use:
@@ -395,30 +419,81 @@ console.log(TOKEN_PATTERNS.narrated);
 // 'حدثنا|أخبرنا|حدثني|وحدثنا|أنبأنا|سمعت'
 ```
-### Pattern Detection
+### Pattern Detection Utilities
+These functions help auto-detect tokens in text, useful for building UI tools that suggest rule configurations from user-highlighted text.
-Auto-detect tokens in Arabic text for building rules:
+#### `detectTokenPatterns(text)`
+Analyzes text and returns all detected token patterns with their positions.
 ```typescript
-import { detectTokenPatterns, analyzeTextForRule } from 'flappa-doormal';
+import { detectTokenPatterns } from 'flappa-doormal';
-// Detect individual tokens
-const tokens = detectTokenPatterns('٣٤ - حدثنا');
+const detected = detectTokenPatterns("٣٤ - حدثنا");
+// Returns:
 // [
 //   { token: 'raqms', match: '٣٤', index: 0, endIndex: 2 },
 //   { token: 'dash', match: '-', index: 3, endIndex: 4 },
 //   { token: 'naql', match: 'حدثنا', index: 5, endIndex: 10 }
 // ]
+```
+#### `generateTemplateFromText(text, detected)`
+Converts text to a template string using detected patterns.
+```typescript
+import { detectTokenPatterns, generateTemplateFromText } from 'flappa-doormal';
+const text = "٣٤ - ";
+const detected = detectTokenPatterns(text);
+const template = generateTemplateFromText(text, detected);
+// Returns: "{{raqms}} {{dash}} "
+```
+#### `suggestPatternConfig(detected)`
+Suggests the best pattern type and options based on detected patterns.
+```typescript
+import { detectTokenPatterns, suggestPatternConfig } from 'flappa-doormal';
+// For numbered patterns (hadith-style)
+const hadithDetected = detectTokenPatterns("٣٤ - ");
+suggestPatternConfig(hadithDetected);
+// Returns: { patternType: 'lineStartsAfter', fuzzy: false, metaType: 'hadith' }
-// Get complete rule suggestion
-const rule = analyzeTextForRule('٣٤ - ');
+// For structural patterns (chapter markers)
+const chapterDetected = detectTokenPatterns("باب الصلاة");
+suggestPatternConfig(chapterDetected);
+// Returns: { patternType: 'lineStartsWith', fuzzy: true, metaType: 'bab' }
+```
+#### `analyzeTextForRule(text)`
+Complete analysis that combines detection, template generation, and config suggestion.
+```typescript
+import { analyzeTextForRule } from 'flappa-doormal';
+const result = analyzeTextForRule("٣٤ - حدثنا");
+// Returns:
 // {
-//   template: '{{raqms}} {{dash}} ',
+//   template: "{{raqms}} {{dash}} {{naql}}",
 //   patternType: 'lineStartsAfter',
 //   fuzzy: false,
 //   metaType: 'hadith',
 //   detected: [...]
 // }
+// Use the result to build a rule:
+const rule = {
+  [result.patternType]: [result.template],
+  split: 'at',
+  fuzzy: result.fuzzy,
+  meta: { type: result.metaType }
+};
 ```
 ## Types
@@ -459,56 +534,19 @@ type Segment = {
 };
 ```
-### `Logger`
+### `DetectedPattern`
-Optional logging interface for debugging segmentation:
+Result from pattern detection utilities.
 ```typescript
-interface Logger {
-  trace?: (message: string, ...args: unknown[]) => void;  // Per-iteration details
-  debug?: (message: string, ...args: unknown[]) => void;  // Detailed operations
-  info?: (message: string, ...args: unknown[]) => void;   // Key progress points
-  warn?: (message: string, ...args: unknown[]) => void;   // Potential issues
-  error?: (message: string, ...args: unknown[]) => void;  // Critical failures
-}
-```
-## Debugging
-### Using the Logger
-Pass a `logger` option to receive detailed information about the segmentation process:
-```typescript
-// Console logger for development
-const segments = segmentPages(pages, {
-  rules: [...],
-  logger: {
-    debug: console.debug,
-    info: console.info,
-    warn: console.warn,
-  }
-});
-// Production logger (only errors)
-const segments = segmentPages(pages, {
-  rules: [...],
-  logger: {
-    error: (msg, ...args) => myLoggingService.error(msg, args),
-  }
-});
+type DetectedPattern = {
+  token: string;    // Token name (e.g., 'raqms', 'dash')
+  match: string;    // The matched text
+  index: number;    // Start index in original text
+  endIndex: number; // End index (exclusive)
+};
 ```
-**Verbosity levels:**
-- `trace` - Per-iteration loop details (very verbose)
-- `debug` - Segment processing, pattern matching
-- `info` - Start/completion of breakpoint processing
-- `warn` - Safety checks triggered
-- `error` - Infinite loop detection
-When no logger is provided, no logging overhead is incurred.
 ## Usage with Next.js / Node.js
 ```typescript
@@ -550,7 +588,7 @@ console.log(`Found ${segments.length} segments`);
 # Install dependencies
 bun install
-# Run tests (251 tests)
+# Run tests (222 tests)
 bun test
 # Build

package/dist/index.d.mts CHANGED Viewed

@@ -67,22 +67,33 @@ declare const makeDiacriticInsensitive: (text: string) => string;
 //#endregion
 //#region src/segmentation/types.d.ts
 /**
- * Literal regex pattern rule - no token expansion is applied.
+ * Literal regex pattern rule - no token expansion or auto-escaping is applied.
+ *
+ * Use this when you need full control over the regex pattern, including:
+ * - Character classes like `[أب]` to match أ or ب
+ * - Capturing groups like `(test|text)` for alternation
+ * - Any other regex syntax that would be escaped in template patterns
  *
- * Use this when you need full control over the regex pattern.
  * If the regex contains capturing groups, the captured content
  * will be used as the segment content.
  *
+ * **Note**: Unlike `template`, `lineStartsWith`, etc., this pattern type
+ * does NOT auto-escape `()[]`. You have full regex control.
+ *
  * @example
  * // Match Arabic-Indic numbers followed by a dash
  * { regex: '^[٠-٩]+ - ', split: 'at' }
  *
  * @example
+ * // Character class - matches أ or ب
+ * { regex: '^[أب] ', split: 'at' }
+ *
+ * @example
  * // Capture group - content after the marker becomes segment content
  * { regex: '^[٠-٩]+ - (.*)', split: 'at' }
  */
 type RegexPattern = {
-  /** Raw regex pattern string (no token expansion) */
+  /** Raw regex pattern string (no token expansion, no auto-escaping) */
   regex: string;
 };
 /**
@@ -90,6 +101,10 @@ type RegexPattern = {
  *
  * Supports all tokens defined in `TOKEN_PATTERNS` and named capture syntax.
  *
+ * **Auto-escaping**: Parentheses `()` and square brackets `[]` outside of
+ * `{{tokens}}` are automatically escaped. Write `({{harf}}):` instead of
+ * `\\({{harf}}\\):`. For raw regex control, use `regex` pattern type.
+ *
  * @example
  * // Using tokens for Arabic-Indic digits
  * { template: '^{{raqms}} {{dash}}', split: 'at' }
@@ -98,10 +113,14 @@ type RegexPattern = {
  * // Named capture to extract hadith number into metadata
  * { template: '^{{raqms:hadithNum}} {{dash}}', split: 'at' }
  *
+ * @example
+ * // Auto-escaped brackets - matches literal (أ):
+ * { template: '^({{harf}}): ', split: 'at' }
+ *
  * @see TOKEN_PATTERNS for available tokens
  */
 type TemplatePattern = {
-  /** Template string with `{{token}}` or `{{token:name}}` placeholders */
+  /** Template string with `{{token}}` or `{{token:name}}` placeholders. Brackets `()[]` are auto-escaped. */
   template: string;
 };
 /**
@@ -113,6 +132,10 @@ type TemplatePattern = {
  * Token expansion is applied to each pattern. Use `fuzzy: true` for
  * diacritic-insensitive Arabic matching.
  *
+ * **Auto-escaping**: Parentheses `()` and square brackets `[]` outside of
+ * `{{tokens}}` are automatically escaped. Write `({{harf}})` instead of
+ * `\\({{harf}}\\)`. For raw regex control, use `regex` pattern type.
+ *
  * @example
  * // Split at chapter headings (marker included in content)
  * { lineStartsWith: ['## ', '### '], split: 'at' }
@@ -120,9 +143,13 @@ type TemplatePattern = {
  * @example
  * // Split at Arabic book/chapter markers with fuzzy matching
  * { lineStartsWith: ['{{kitab}}', '{{bab}}'], split: 'at', fuzzy: true }
+ *
+ * @example
+ * // Auto-escaped brackets - matches literal (أ)
+ * { lineStartsWith: ['({{harf}}) '], split: 'at' }
  */
 type LineStartsWithPattern = {
-  /** Array of patterns that mark line beginnings (marker included in content) */
+  /** Array of patterns that mark line beginnings (marker included in content). Brackets `()[]` are auto-escaped. */
   lineStartsWith: string[];
 };
 /**
@@ -136,6 +163,10 @@ type LineStartsWithPattern = {
  * Token expansion is applied to each pattern. Use `fuzzy: true` for
  * diacritic-insensitive Arabic matching.
  *
+ * **Auto-escaping**: Parentheses `()` and square brackets `[]` outside of
+ * `{{tokens}}` are automatically escaped. Write `({{harf}}):` instead of
+ * `\\({{harf}}\\):`. For raw regex control, use `regex` pattern type.
+ *
  * @example
  * // Split at numbered hadiths, capturing content without the number prefix
  * // Content extends to next split, not just end of that line
@@ -144,9 +175,13 @@ type LineStartsWithPattern = {
  * @example
  * // Extract hadith number to metadata while stripping the prefix
  * { lineStartsAfter: ['{{raqms:num}} {{dash}} '], split: 'at' }
+ *
+ * @example
+ * // Auto-escaped brackets - matches literal (أ): prefix
+ * { lineStartsAfter: ['({{harf}}): '], split: 'at' }
  */
 type LineStartsAfterPattern = {
-  /** Array of patterns that mark line beginnings (marker excluded from content) */
+  /** Array of patterns that mark line beginnings (marker excluded from content). Brackets `()[]` are auto-escaped. */
   lineStartsAfter: string[];
 };
 /**
@@ -157,12 +192,19 @@ type LineStartsAfterPattern = {
  * Token expansion is applied to each pattern. Use `fuzzy: true` for
  * diacritic-insensitive Arabic matching.
  *
+ * **Auto-escaping**: Parentheses `()` and square brackets `[]` outside of
+ * `{{tokens}}` are automatically escaped. For raw regex control, use `regex` pattern type.
+ *
  * @example
  * // Split at lines ending with Arabic sentence-ending punctuation
  * { lineEndsWith: ['۔', '؟', '!'], split: 'after' }
+ *
+ * @example
+ * // Auto-escaped brackets - matches literal (انتهى) suffix
+ * { lineEndsWith: ['(انتهى)'], split: 'after' }
  */
 type LineEndsWithPattern = {
-  /** Array of patterns that mark line endings */
+  /** Array of patterns that mark line endings. Brackets `()[]` are auto-escaped. */
   lineEndsWith: string[];
 };
 /**
@@ -749,6 +791,51 @@ declare const normalizeLineEndings: (content: string) => string;
  * expandTokensWithCaptures('{{raqms:num}} {{dash}}')
  * // → { pattern: '(?<num>[\\u0660-\\u0669]+) [-–—ـ]', captureNames: ['num'], hasCaptures: true }
  */
+/**
+ * Token definitions mapping human-readable token names to regex patterns.
+ *
+ * Tokens are used in template strings with double-brace syntax:
+ * - `{{token}}` - Expands to the pattern (non-capturing in context)
+ * - `{{token:name}}` - Expands to a named capture group `(?<name>pattern)`
+ * - `{{:name}}` - Captures any content with the given name `(?<name>.+)`
+ *
+ * @remarks
+ * These patterns are designed for Arabic text matching. For diacritic-insensitive
+ * matching of Arabic patterns, use the `fuzzy: true` option in split rules,
+ * which applies `makeDiacriticInsensitive()` to the expanded patterns.
+ *
+ * @example
+ * // Using tokens in a split rule
+ * { lineStartsWith: ['{{kitab}}', '{{bab}}'], split: 'at', fuzzy: true }
+ *
+ * @example
+ * // Using tokens with named captures
+ * { lineStartsAfter: ['{{raqms:hadithNum}} {{dash}} '], split: 'at' }
+ */
+/**
+ * Escapes regex metacharacters (parentheses and brackets) in template patterns,
+ * but preserves content inside `{{...}}` token delimiters.
+ *
+ * This allows users to write intuitive patterns like `({{harf}}):` instead of
+ * the verbose `\\({{harf}}\\):`. The escaping is applied BEFORE token expansion,
+ * so tokens like `{{harf}}` which expand to `[أ-ي]` work correctly.
+ *
+ * @param pattern - Template pattern that may contain `()[]` and `{{tokens}}`
+ * @returns Pattern with `()[]` escaped outside of `{{...}}` delimiters
+ *
+ * @example
+ * escapeTemplateBrackets('({{harf}}): ')
+ * // → '\\({{harf}}\\): '
+ *
+ * @example
+ * escapeTemplateBrackets('[{{raqm}}] ')
+ * // → '\\[{{raqm}}\\] '
+ *
+ * @example
+ * escapeTemplateBrackets('{{harf}}')
+ * // → '{{harf}}' (unchanged - no brackets outside tokens)
+ */
+declare const escapeTemplateBrackets: (pattern: string) => string;
 /**
  * Token definitions mapping human-readable token names to regex patterns.
  *
@@ -1000,5 +1087,5 @@ declare const analyzeTextForRule: (text: string) => {
   detected: DetectedPattern[];
 } | null;
 //#endregion
-export { type Breakpoint, type BreakpointRule, type DetectedPattern, type ExpandResult, type Logger, type Page, type PageRange, type Segment, type SegmentationOptions, type SplitRule, TOKEN_PATTERNS, analyzeTextForRule, containsTokens, detectTokenPatterns, escapeRegex, expandTokens, expandTokensWithCaptures, generateTemplateFromText, getAvailableTokens, getTokenPattern, makeDiacriticInsensitive, normalizeLineEndings, segmentPages, stripHtmlTags, suggestPatternConfig, templateToRegex };
+export { type Breakpoint, type BreakpointRule, type DetectedPattern, type ExpandResult, type Logger, type Page, type PageRange, type Segment, type SegmentationOptions, type SplitRule, TOKEN_PATTERNS, analyzeTextForRule, containsTokens, detectTokenPatterns, escapeRegex, escapeTemplateBrackets, expandTokens, expandTokensWithCaptures, generateTemplateFromText, getAvailableTokens, getTokenPattern, makeDiacriticInsensitive, normalizeLineEndings, segmentPages, stripHtmlTags, suggestPatternConfig, templateToRegex };
 //# sourceMappingURL=index.d.mts.map

package/dist/index.d.mts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.mts","names":[],"sources":["../src/segmentation/fuzzy.ts","../src/segmentation/types.ts","../src/segmentation/segmenter.ts","../src/segmentation/textUtils.ts","../src/segmentation/tokens.ts","../src/pattern-detection.ts"],"sourcesContent":[],"mappings":";;AAkEA;AA+FA;;;;;~~AC9IiB~~;~~AAoBG~~;~~AAsBM~~;~~AAyBC~~;~~AAiBH~~;;;;;;;AAoBC;AA0FzB;AAAkD;AA4GlD;;;;;AAkBA;AAqCA;AA0EY,~~cDhYC~~,~~WCgYqB~~,EAAA,CAAA,CAAA,EAAA,MAAc,EAAA,GAAA,MAAA;AA8BhD;AAiDA;;;;;AAiHA;;;;~~AC2KA~~;;;;;;;;~~ACvyBA~~;AAaA;;;;~~AC6NA~~;AA2CA;AAWA;AA2DA;AAqGA;AAuBA;AAqBA;AAgBA;;;;~~ACtfA;AA0Da~~,~~cL2FA~~,~~wBK3FsC~~,EAAA,CAAA,IAAA,~~EAAe~~,MAAA,EAAA,GAAA,MAAA;;;;~~ALJlE~~;AA+FA;;;;;~~AC9IiB~~;~~AAoBG~~;~~AAsBM~~;~~AAyBC~~;~~AAiBH;;;;KApFnB~~,YAAA,~~GAuGC~~;~~EACA~~;~~EAAmB~~,KAAA,EAAA,~~MAAA~~;~~AAAA~~,CAAA;~~AA0FzB;AAAkD;AA4GlD~~;;;;;~~AAkBA;AAqCA;AA0EA;AA8BA;AAiDA~~;;;;~~KA1eK,eAAA,GA0kBc;EAiBP;;;;AC2KZ~~;;;;;;;;~~ACvyBA~~;AAaA;;;;~~AC6NA~~;~~AA2CA;AAWA;AA2DA~~,~~KHpSK~~,~~qBAAA~~,~~GGoSQ~~;~~EAqGA~~;~~EAuBA~~,~~cAAA~~,EAAA,~~MAOZ,EAAA~~;~~AAcD~~,CAAA;AAgBA;;;;~~ACtfA~~;AA0DA;AA4DA;AAuBA;AAiCA~~;;;;;;;;;;;;KJpGK~~,sBAAA~~;;;;;;;;;;;;;;;;KAiBA~~,mBAAA;;;;;;;;;;;;;;KAeA,WAAA,GACC,eACA,kBACA,wBACA,yBACA;;;;;;;KAYD,aAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KA8EO,SAAA;;;;;;;KAYP,eAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAyCS;;;;;;;;;;;;SAaH;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KA0CC,SAAA,GAAY,cAAc,gBAAgB;;;;;;;;;;;;;KAkB1C,IAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAqCA,cAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAqCE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAqCF,UAAA,YAAsB;;;;;;;;;;;;;;;;;;;;;;;;;UA8BjB,MAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAiDL,mBAAA;;;;;;;;UAQA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;gBA8CM;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;WA0CL;;;;;;;;;;;;;;;;KAiBD,OAAA;;;;;;;;;;;;;;;;;;;;;;;;;;SA6BD;;;;~~AAzkBgB~~;~~AAiBH~~;;;;;;;AAoBC;AA0FzB;AAAkD;AA4GlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;;AAiHA;;;;~~AC2KA~~;;;;;;;;~~ACvyBA~~;AAaA;;;;~~AC6Na~~,~~cF6jBA~~,~~YE7jBgB~~,EAAA,CAAA,~~KAAM~~,~~EF6jBC~~,~~IE7jBD~~,EAAA,EAAA,OAAA,~~EF6jBkB~~,~~mBE7jBlB~~,EAAA,~~GF6jBwC~~,~~OE7jBxC~~,EAAA;;;;~~AJ9KnC~~;AA+FA;;;;~~AC9IK~~,~~cEbQ~~,~~aFaI~~,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GAAA,MAAA;AAAA;~~AAoBG~~;~~AAsBM~~;~~AAyBC~~;~~AAiBH~~;;;;;AAoBlB,~~cExGO~~,~~oBFwGP~~,EAAA,CAAA,OAAA,EAAA,MAAA,EAAA,GAAA,MAAA;;;;~~ADzDN~~;AA+FA;;;;;~~AC9IiB~~;~~AAoBG~~;~~AAsBM~~;~~AAyBC~~;~~AAiBH~~;;;;;;;AAoBC;AA0FzB;AAAkD;AA4GlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;;AAiHA;;;;~~AC2KA;;;;;cE7jBa,gBAAgB;;;AD1O7B~~;AAaA;;;;~~AC6NA~~;AA2CA;AAWA;AA2DA;AAqGA;AAuBA;AAqBA;AAgBA~~;cAvOa~~;;;~~AC/Qb~~;AA0DA;AA4DA;AAuBA;~~AAiCa~~,~~KD4GD~~,YAAA~~,GCzFX~~;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;~~cDoJY~~,2FAA0F;;;;;;;;;;;;;;;;;;;;cAqG1F;;;;;;;;;;;;;;;;;;;;;;cAuBA,uCAAsC;;;;;;;;;;;;;cAqBtC;;;;;;;;;;;;;;;cAgBA;;;;~~AJhcb~~;AA+FA;;;;;~~AC9IiB~~;~~AAoBG~~;AA+Cf,~~KI1EO~~,eAAA,~~GJ0Ee~~;~~EAiBtB~~;EAeA,KAAA,EAAA,MAAA;EACC;EACA,KAAA,EAAA,MAAA;EACA;EACA,KAAA,EAAA,MAAA;EACA;EAAmB,QAAA,EAAA,MAAA;AAAA,CAAA;AA0FzB;AAAkD;AA4GlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;AAgGmB,~~cI3iBN~~,~~mBJ2iBM~~,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,~~GI3iBgC~~,~~eJ2iBhC~~,EAAA;AAiBnB;;;;~~AC2KA~~;;;;;;;;~~ACvyBA~~;AAaa,cE+GA,wBF/G2E,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,QAAA,EE+GvB,eF/GuB,EAAA,EAAA,GAAA,MAAA;;;;~~AC6NxF~~;AA2CA;~~AAWA;AA2Da~~,~~cCxMA~~,~~oBDwRZ~~,~~EAhFsG~~,CAAA,QAAA,~~ECvMzF~~,~~eDuRb~~,EAAA,EAAA,GAAA;~~EAqBY~~,WAAA,EAAA,~~gBAAiF~~,GAAA,~~iBAAA~~;~~EAuBjF~~,KAAA,EAAA,OAAA;~~EAqBA~~,QAAA,CAAA,EAAA,MAAA;~~AAgBb~~,CAAA;;;;~~ACtfA~~;AA0DA;~~AA4DA;AAuBa~~,~~cAiCA~~,~~kBAhCC~~,EAAA,~~CAAA~~,IAAA,EAAA,~~MAAe~~,EAAA,~~GAAA~~;~~EAgChB~~,QAAA,EAAA,MAAA~~;;;;YAOC~~"}
1	+ {"version":3,"file":"index.d.mts","names":[],"sources":["../src/segmentation/fuzzy.ts","../src/segmentation/types.ts","../src/segmentation/segmenter.ts","../src/segmentation/textUtils.ts","../src/segmentation/tokens.ts","../src/pattern-detection.ts"],"sourcesContent":[],"mappings":";;AAkEA;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA0FzB;AAAkD;AA4GlD;;;;;AAkBA;AAqCA;AA0EY,cD1aC,WC0aqB,EAAA,CAAA,CAAA,EAAA,MAAc,EAAA,GAAA,MAAA;AA8BhD;AAiDA;;;;;AAiHA;;;;ACoCA;;;;;;;;AC1sBA;AAaA;;;;ACiDA;AAkNA;AA2CA;AAWA;AA2DA;AAqGA;AAuBA;AAqBA;AAgBA;;;;AC5hBY,cLqJC,wBKrJc,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GAAA,MAAA;;;;ALsD3B;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA0FzB;AAAkD;AA4GlD;;;;;AAkBA,KA/VK,YAAA,GA+VW;EAqCJ;EA0EA,KAAA,EAAA,MAAU;AA8BtB,CAAA;AAiDA;;;;;AAiHA;;;;ACoCA;;;;;;;;AC1sBA;AAaA;;;;ACiDA;AAkNA,KH5NK,eAAA,GGgOJ;EAuCY;EAWD,QAAA,EAAA,MAAY;AA2DxB,CAAA;AAqGA;AAuBA;AAqBA;AAgBA;;;;AC5hBA;AA0DA;AA4DA;AAuBA;AAiCA;;;;;;;;;;;;;;KJlGK,qBAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAiCA,sBAAA;;;;;;;;;;;;;;;;;;;;;;;KAwBA,mBAAA;;;;;;;;;;;;;;KAeA,WAAA,GACC,eACA,kBACA,wBACA,yBACA;;;;;;;KAYD,aAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KA8EO,SAAA;;;;;;;KAYP,eAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAyCS;;;;;;;;;;;;SAaH;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KA0CC,SAAA,GAAY,cAAc,gBAAgB;;;;;;;;;;;;;KAkB1C,IAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAqCA,cAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAqCE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAqCF,UAAA,YAAsB;;;;;;;;;;;;;;;;;;;;;;;;;UA8BjB,MAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAiDL,mBAAA;;;;;;;;UAQA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;gBA8CM;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;WA0CL;;;;;;;;;;;;;;;;KAiBD,OAAA;;;;;;;;;;;;;;;;;;;;;;;;;;SA6BD;;;;AAhlBgB;AAwBH;;;;;;;AAoBC;AA0FzB;AAAkD;AA4GlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;;AAiHA;;;;ACoCA;;;;;;;;AC1sBA;AAaA;;;;ACiDa,cF4oBA,YEnoBZ,EAAA,CAAA,KAAA,EFmoBmC,IEnoBnC,EAAA,EAAA,OAAA,EFmoBoD,mBEnoBpD,EAAA,GFmoB0E,OEnoB1E,EAAA;;;;AJXD;AA+FA;;;;ACnIK,cExBQ,aFwBI,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GAAA,MAAA;AAAA;AA4BG;AA8BM;AAiCC;AAwBH;;;;;AAoBlB,cElJO,oBFkJP,EAAA,CAAA,OAAA,EAAA,MAAA,EAAA,GAAA,MAAA;;;;ADnGN;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA0FzB;AAAkD;AA4GlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;;AAiHA;;;;ACoCA;;;;;;;;AC1sBA;AAaA;;;;ACiDA;AAkNA;AA2CA;AAWA;AA2DA;AAqGA;AAuBA;AAqBA;AAgBA;;;cApea;ACxDb;AA0DA;AA4DA;AAuBA;AAiCA;;;;;;;;;;;;;;;;;;;;;cD4Fa,gBAAgB;;;;;;;;;;;;;;;;cA2ChB;;;;;;;KAWD,YAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;cA2DC,2FAA0F;;;;;;;;;;;;;;;;;;;;cAqG1F;;;;;;;;;;;;;;;;;;;;;;cAuBA,uCAAsC;;;;;;;;;;;;;cAqBtC;;;;;;;;;;;;;;;cAgBA;;;;AJteb;AA+FA;;;;;ACnIiB;AA4BG;AA+Df,KI7GO,eAAA,GJ6Ge;EAwBtB;EAeA,KAAA,EAAA,MAAA;EACC;EACA,KAAA,EAAA,MAAA;EACA;EACA,KAAA,EAAA,MAAA;EACA;EAAmB,QAAA,EAAA,MAAA;AAAA,CAAA;AA0FzB;AAAkD;AA4GlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;AAgGmB,cIrlBN,mBJqlBM,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GIrlBgC,eJqlBhC,EAAA;AAiBnB;;;;ACoCA;;;;;;;;AC1sBA;AAaa,cE+GA,wBF/G2E,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,QAAA,EE+GvB,eF/GuB,EAAA,EAAA,GAAA,MAAA;;;;ACiDxF;AAkNA;AA2CA;AAWY,cCnLC,oBDmLW,EAAA,CAAA,QAAA,EClLV,eDkLU,EAAA,EAAA,GAAA;EA2DX,WAAA,EAAA,gBAgFZ,GAAA,iBAhFsG;EAqG1F,KAAA,EAAA,OAAA;EAuBA,QAAA,CAAA,EAAA,MAAA;AAqBb,CAAA;AAgBA;;;;AC5hBA;AA0DA;AA4Da,cAwDA,kBAzCZ,EAAA,CAfgE,IAAA,EAAA,MAAA,EAAA,GAAe;EAuBnE,QAAA,EAAA,MAAA;EAiCA,WAAA,EAAA,gBAmBZ,GAZa,iBAAe;;;YAAf"}