npm - flappa-doormal - Versions diffs - 2.0.0 → 2.2.0 - Mend

flappa-doormal 2.0.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.d.mts CHANGED Viewed

@@ -67,22 +67,33 @@ declare const makeDiacriticInsensitive: (text: string) => string;
 //#endregion
 //#region src/segmentation/types.d.ts
 /**
- * Literal regex pattern rule - no token expansion is applied.
+ * Literal regex pattern rule - no token expansion or auto-escaping is applied.
+ *
+ * Use this when you need full control over the regex pattern, including:
+ * - Character classes like `[أب]` to match أ or ب
+ * - Capturing groups like `(test|text)` for alternation
+ * - Any other regex syntax that would be escaped in template patterns
  *
- * Use this when you need full control over the regex pattern.
  * If the regex contains capturing groups, the captured content
  * will be used as the segment content.
  *
+ * **Note**: Unlike `template`, `lineStartsWith`, etc., this pattern type
+ * does NOT auto-escape `()[]`. You have full regex control.
+ *
  * @example
  * // Match Arabic-Indic numbers followed by a dash
  * { regex: '^[٠-٩]+ - ', split: 'at' }
  *
  * @example
+ * // Character class - matches أ or ب
+ * { regex: '^[أب] ', split: 'at' }
+ *
+ * @example
  * // Capture group - content after the marker becomes segment content
  * { regex: '^[٠-٩]+ - (.*)', split: 'at' }
  */
 type RegexPattern = {
-  /** Raw regex pattern string (no token expansion) */
+  /** Raw regex pattern string (no token expansion, no auto-escaping) */
   regex: string;
 };
 /**
@@ -90,6 +101,10 @@ type RegexPattern = {
  *
  * Supports all tokens defined in `TOKEN_PATTERNS` and named capture syntax.
  *
+ * **Auto-escaping**: Parentheses `()` and square brackets `[]` outside of
+ * `{{tokens}}` are automatically escaped. Write `({{harf}}):` instead of
+ * `\\({{harf}}\\):`. For raw regex control, use `regex` pattern type.
+ *
  * @example
  * // Using tokens for Arabic-Indic digits
  * { template: '^{{raqms}} {{dash}}', split: 'at' }
@@ -98,10 +113,14 @@ type RegexPattern = {
  * // Named capture to extract hadith number into metadata
  * { template: '^{{raqms:hadithNum}} {{dash}}', split: 'at' }
  *
+ * @example
+ * // Auto-escaped brackets - matches literal (أ):
+ * { template: '^({{harf}}): ', split: 'at' }
+ *
  * @see TOKEN_PATTERNS for available tokens
  */
 type TemplatePattern = {
-  /** Template string with `{{token}}` or `{{token:name}}` placeholders */
+  /** Template string with `{{token}}` or `{{token:name}}` placeholders. Brackets `()[]` are auto-escaped. */
   template: string;
 };
 /**
@@ -113,6 +132,10 @@ type TemplatePattern = {
  * Token expansion is applied to each pattern. Use `fuzzy: true` for
  * diacritic-insensitive Arabic matching.
  *
+ * **Auto-escaping**: Parentheses `()` and square brackets `[]` outside of
+ * `{{tokens}}` are automatically escaped. Write `({{harf}})` instead of
+ * `\\({{harf}}\\)`. For raw regex control, use `regex` pattern type.
+ *
  * @example
  * // Split at chapter headings (marker included in content)
  * { lineStartsWith: ['## ', '### '], split: 'at' }
@@ -120,9 +143,13 @@ type TemplatePattern = {
  * @example
  * // Split at Arabic book/chapter markers with fuzzy matching
  * { lineStartsWith: ['{{kitab}}', '{{bab}}'], split: 'at', fuzzy: true }
+ *
+ * @example
+ * // Auto-escaped brackets - matches literal (أ)
+ * { lineStartsWith: ['({{harf}}) '], split: 'at' }
  */
 type LineStartsWithPattern = {
-  /** Array of patterns that mark line beginnings (marker included in content) */
+  /** Array of patterns that mark line beginnings (marker included in content). Brackets `()[]` are auto-escaped. */
   lineStartsWith: string[];
 };
 /**
@@ -136,6 +163,10 @@ type LineStartsWithPattern = {
  * Token expansion is applied to each pattern. Use `fuzzy: true` for
  * diacritic-insensitive Arabic matching.
  *
+ * **Auto-escaping**: Parentheses `()` and square brackets `[]` outside of
+ * `{{tokens}}` are automatically escaped. Write `({{harf}}):` instead of
+ * `\\({{harf}}\\):`. For raw regex control, use `regex` pattern type.
+ *
  * @example
  * // Split at numbered hadiths, capturing content without the number prefix
  * // Content extends to next split, not just end of that line
@@ -144,9 +175,13 @@ type LineStartsWithPattern = {
  * @example
  * // Extract hadith number to metadata while stripping the prefix
  * { lineStartsAfter: ['{{raqms:num}} {{dash}} '], split: 'at' }
+ *
+ * @example
+ * // Auto-escaped brackets - matches literal (أ): prefix
+ * { lineStartsAfter: ['({{harf}}): '], split: 'at' }
  */
 type LineStartsAfterPattern = {
-  /** Array of patterns that mark line beginnings (marker excluded from content) */
+  /** Array of patterns that mark line beginnings (marker excluded from content). Brackets `()[]` are auto-escaped. */
   lineStartsAfter: string[];
 };
 /**
@@ -157,12 +192,19 @@ type LineStartsAfterPattern = {
  * Token expansion is applied to each pattern. Use `fuzzy: true` for
  * diacritic-insensitive Arabic matching.
  *
+ * **Auto-escaping**: Parentheses `()` and square brackets `[]` outside of
+ * `{{tokens}}` are automatically escaped. For raw regex control, use `regex` pattern type.
+ *
  * @example
  * // Split at lines ending with Arabic sentence-ending punctuation
  * { lineEndsWith: ['۔', '؟', '!'], split: 'after' }
+ *
+ * @example
+ * // Auto-escaped brackets - matches literal (انتهى) suffix
+ * { lineEndsWith: ['(انتهى)'], split: 'after' }
  */
 type LineEndsWithPattern = {
-  /** Array of patterns that mark line endings */
+  /** Array of patterns that mark line endings. Brackets `()[]` are auto-escaped. */
   lineEndsWith: string[];
 };
 /**
@@ -460,6 +502,42 @@ type BreakpointRule = {
  * { pattern: '{{tarqim}}\\s*', min: 10 }
  */
 type Breakpoint = string | BreakpointRule;
+/**
+ * Logger interface for custom logging implementations.
+ *
+ * All methods are optional - only implement the verbosity levels you need.
+ * When no logger is provided, no logging overhead is incurred.
+ *
+ * Compatible with the Logger interface from ffmpeg-simplified and similar libraries.
+ *
+ * @example
+ * // Simple console logger
+ * const logger: Logger = {
+ *   debug: console.debug,
+ *   info: console.info,
+ *   warn: console.warn,
+ *   error: console.error,
+ * };
+ *
+ * @example
+ * // Production logger (only warnings and errors)
+ * const prodLogger: Logger = {
+ *   warn: (msg, ...args) => myLoggingService.warn(msg, args),
+ *   error: (msg, ...args) => myLoggingService.error(msg, args),
+ * };
+ */
+interface Logger {
+  /** Log a debug message (verbose debugging output) */
+  debug?: (message: string, ...args: unknown[]) => void;
+  /** Log an error message (critical failures) */
+  error?: (message: string, ...args: unknown[]) => void;
+  /** Log an informational message (key progress points) */
+  info?: (message: string, ...args: unknown[]) => void;
+  /** Log a trace message (extremely verbose, per-iteration details) */
+  trace?: (message: string, ...args: unknown[]) => void;
+  /** Log a warning message (potential issues) */
+  warn?: (message: string, ...args: unknown[]) => void;
+}
 /**
  * Segmentation options controlling how pages are split.
  *
@@ -480,6 +558,17 @@ type Breakpoint = string | BreakpointRule;
  *   breakpoints: ['{{tarqim}}\\s*', '\\n', ''],
  *   prefer: 'longer'
  * };
+ *
+ * @example
+ * // With custom logger for debugging
+ * const options: SegmentationOptions = {
+ *   rules: [...],
+ *   logger: {
+ *     debug: console.debug,
+ *     info: console.info,
+ *     warn: console.warn,
+ *   }
+ * };
  */
 type SegmentationOptions = {
   /**
@@ -542,6 +631,38 @@ type SegmentationOptions = {
    * @default 'longer'
    */
   prefer?: 'longer' | 'shorter';
+  /**
+   * Optional logger for debugging segmentation.
+   *
+   * Provide a logger to receive detailed information about the segmentation
+   * process. Useful for debugging pattern matching, page tracking, and
+   * breakpoint processing issues.
+   *
+   * When not provided, no logging overhead is incurred (methods are not called).
+   *
+   * Verbosity levels:
+   * - `trace`: Per-iteration details (very verbose)
+   * - `debug`: Detailed operation information
+   * - `info`: Key progress points
+   * - `warn`: Potential issues
+   * - `error`: Critical failures
+   *
+   * @example
+   * // Console logger for development
+   * logger: {
+   *   debug: console.debug,
+   *   info: console.info,
+   *   warn: console.warn,
+   * }
+   *
+   * @example
+   * // Custom logger integration
+   * logger: {
+   *   debug: (msg, ...args) => winston.debug(msg, { meta: args }),
+   *   error: (msg, ...args) => winston.error(msg, { meta: args }),
+   * }
+   */
+  logger?: Logger;
 };
 /**
  * Output segment produced by `segmentPages()`.
@@ -670,6 +791,51 @@ declare const normalizeLineEndings: (content: string) => string;
  * expandTokensWithCaptures('{{raqms:num}} {{dash}}')
  * // → { pattern: '(?<num>[\\u0660-\\u0669]+) [-–—ـ]', captureNames: ['num'], hasCaptures: true }
  */
+/**
+ * Token definitions mapping human-readable token names to regex patterns.
+ *
+ * Tokens are used in template strings with double-brace syntax:
+ * - `{{token}}` - Expands to the pattern (non-capturing in context)
+ * - `{{token:name}}` - Expands to a named capture group `(?<name>pattern)`
+ * - `{{:name}}` - Captures any content with the given name `(?<name>.+)`
+ *
+ * @remarks
+ * These patterns are designed for Arabic text matching. For diacritic-insensitive
+ * matching of Arabic patterns, use the `fuzzy: true` option in split rules,
+ * which applies `makeDiacriticInsensitive()` to the expanded patterns.
+ *
+ * @example
+ * // Using tokens in a split rule
+ * { lineStartsWith: ['{{kitab}}', '{{bab}}'], split: 'at', fuzzy: true }
+ *
+ * @example
+ * // Using tokens with named captures
+ * { lineStartsAfter: ['{{raqms:hadithNum}} {{dash}} '], split: 'at' }
+ */
+/**
+ * Escapes regex metacharacters (parentheses and brackets) in template patterns,
+ * but preserves content inside `{{...}}` token delimiters.
+ *
+ * This allows users to write intuitive patterns like `({{harf}}):` instead of
+ * the verbose `\\({{harf}}\\):`. The escaping is applied BEFORE token expansion,
+ * so tokens like `{{harf}}` which expand to `[أ-ي]` work correctly.
+ *
+ * @param pattern - Template pattern that may contain `()[]` and `{{tokens}}`
+ * @returns Pattern with `()[]` escaped outside of `{{...}}` delimiters
+ *
+ * @example
+ * escapeTemplateBrackets('({{harf}}): ')
+ * // → '\\({{harf}}\\): '
+ *
+ * @example
+ * escapeTemplateBrackets('[{{raqm}}] ')
+ * // → '\\[{{raqm}}\\] '
+ *
+ * @example
+ * escapeTemplateBrackets('{{harf}}')
+ * // → '{{harf}}' (unchanged - no brackets outside tokens)
+ */
+declare const escapeTemplateBrackets: (pattern: string) => string;
 /**
  * Token definitions mapping human-readable token names to regex patterns.
  *
@@ -846,5 +1012,80 @@ declare const getAvailableTokens: () => string[];
  */
 declare const getTokenPattern: (tokenName: string) => string | undefined;
 //#endregion
-export { type Breakpoint, type BreakpointRule, type ExpandResult, type Page, type PageRange, type Segment, type SegmentationOptions, type SplitRule, TOKEN_PATTERNS, containsTokens, escapeRegex, expandTokens, expandTokensWithCaptures, getAvailableTokens, getTokenPattern, makeDiacriticInsensitive, normalizeLineEndings, segmentPages, stripHtmlTags, templateToRegex };
+//#region src/pattern-detection.d.ts
+/**
+ * Pattern detection utilities for recognizing template tokens in Arabic text.
+ * Used to auto-detect patterns from user-highlighted text in the segmentation dialog.
+ *
+ * @module pattern-detection
+ */
+/**
+ * Result of detecting a token pattern in text
+ */
+type DetectedPattern = {
+  /** Token name from TOKEN_PATTERNS (e.g., 'raqms', 'dash') */
+  token: string;
+  /** The matched text */
+  match: string;
+  /** Start index in the original text */
+  index: number;
+  /** End index (exclusive) */
+  endIndex: number;
+};
+/**
+ * Analyzes text and returns all detected token patterns with their positions.
+ * Patterns are detected in priority order to avoid partial matches.
+ *
+ * @param text - The text to analyze for token patterns
+ * @returns Array of detected patterns sorted by position
+ *
+ * @example
+ * detectTokenPatterns("٣٤ - حدثنا")
+ * // Returns: [
+ * //   { token: 'raqms', match: '٣٤', index: 0, endIndex: 2 },
+ * //   { token: 'dash', match: '-', index: 3, endIndex: 4 },
+ * //   { token: 'naql', match: 'حدثنا', index: 5, endIndex: 10 }
+ * // ]
+ */
+declare const detectTokenPatterns: (text: string) => DetectedPattern[];
+/**
+ * Generates a template pattern from text using detected tokens.
+ * Replaces matched portions with {{token}} syntax.
+ *
+ * @param text - Original text
+ * @param detected - Array of detected patterns from detectTokenPatterns
+ * @returns Template string with tokens, e.g., "{{raqms}} {{dash}} "
+ *
+ * @example
+ * const detected = detectTokenPatterns("٣٤ - ");
+ * generateTemplateFromText("٣٤ - ", detected);
+ * // Returns: "{{raqms}} {{dash}} "
+ */
+declare const generateTemplateFromText: (text: string, detected: DetectedPattern[]) => string;
+/**
+ * Determines the best pattern type for auto-generated rules based on detected patterns.
+ *
+ * @param detected - Array of detected patterns
+ * @returns Suggested pattern type and whether to use fuzzy matching
+ */
+declare const suggestPatternConfig: (detected: DetectedPattern[]) => {
+  patternType: "lineStartsWith" | "lineStartsAfter";
+  fuzzy: boolean;
+  metaType?: string;
+};
+/**
+ * Analyzes text and generates a complete suggested rule configuration.
+ *
+ * @param text - Highlighted text from the page
+ * @returns Suggested rule configuration or null if no patterns detected
+ */
+declare const analyzeTextForRule: (text: string) => {
+  template: string;
+  patternType: "lineStartsWith" | "lineStartsAfter";
+  fuzzy: boolean;
+  metaType?: string;
+  detected: DetectedPattern[];
+} | null;
+//#endregion
+export { type Breakpoint, type BreakpointRule, type DetectedPattern, type ExpandResult, type Logger, type Page, type PageRange, type Segment, type SegmentationOptions, type SplitRule, TOKEN_PATTERNS, analyzeTextForRule, containsTokens, detectTokenPatterns, escapeRegex, escapeTemplateBrackets, expandTokens, expandTokensWithCaptures, generateTemplateFromText, getAvailableTokens, getTokenPattern, makeDiacriticInsensitive, normalizeLineEndings, segmentPages, stripHtmlTags, suggestPatternConfig, templateToRegex };
 //# sourceMappingURL=index.d.mts.map

package/dist/index.d.mts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.mts","names":[],"sources":["../src/segmentation/fuzzy.ts","../src/segmentation/types.ts","../src/segmentation/segmenter.ts","../src/segmentation/textUtils.ts","../src/segmentation/tokens.ts"],"sourcesContent":[],"mappings":";;AAkEA;AA+FA;;;;;~~AC9IiB~~;~~AAoBG~~;~~AAsBM~~;~~AAyBC~~;~~AAiBH~~;;;;;;;AAoBC;AA0FzB;AAAkD;AA4GlD;;;;;AAkBA;AAqCA;AA0EY,~~cDhYC~~,~~WCgYqB~~,EAAA,CAAA,CAAA,EAAA,MAAc,EAAA,GAAA,MAAA;~~AAuBhD~~;~~AAgFA~~;;;;~~ACoKA~~;;;;;;;;~~ACvsBA~~;AAaA;;;;~~AC6NA~~;AA2CA;AAWA;AA2DA;AAqGA;AAuBA;AAqBA;AAgBA~~;;;;;;;;;;cJjWa~~;;;;~~AA/Fb~~;AA+FA;;;;;~~AC9IiB~~;~~AAoBG~~;~~AAsBM~~;~~AAyBC~~;~~AAiBH;;;;KApFnB~~,YAAA,~~GAuGC~~;~~EACA~~;~~EAAmB~~,KAAA,EAAA,~~MAAA~~;~~AAAA~~,CAAA;~~AA0FzB;AAAkD;AA4GlD~~;;;;;~~AAkBA~~;~~AAqCA~~;~~AA0EA;AAuBA;AAgFA;;;;ACoKA~~,~~KDtqBK~~,eAAA,~~GC6vBJ~~;~~EAvFmC~~;~~EAAiB~~,QAAA,EAAA,~~MAAA~~;~~CAAsB;;;;;ACvsB3E~~;~~AAaA;;;;AC6NA;AA2CA;AAWA;AA2DA;~~AAqGA;AAuBA;AAqBA;AAgBA;~~KHrcK~~,qBAAA~~;;;;;;;;;;;;;;;;;;;;;;;;KAyBA~~,sBAAA~~;;;;;;;;;;;;;;;;KAiBA~~,mBAAA;;;;;;;;;;;;;;KAeA,WAAA,GACC,eACA,kBACA,wBACA,yBACA;;;;;;;KAYD,aAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KA8EO,SAAA;;;;;;;KAYP,eAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAyCS;;;;;;;;;;;;SAaH;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KA0CC,SAAA,GAAY,cAAc,gBAAgB;;;;;;;;;;;;;KAkB1C,IAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAqCA,cAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAqCE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAqCF,UAAA,YAAsB~~;;;;;;;;;;;;;;;;;;;;;;KAuBtB~~,mBAAA;;;;;;;;UAQA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;gBA8CM~~;;;;;;;;;;;;;;;;;;;;;;;;KA0BN~~,OAAA;;;;;;;;;;;;;;;;;;;;;;;;;;SA6BD;;;;~~AAhfgB~~;~~AAiBH~~;;;;;;;AAoBC;AA0FzB;AAAkD;AA4GlD;;;;;AAkBA;AAqCA;AA0EA;~~AAuBA~~;~~AAgFA~~;;;;~~ACoKA~~;;;;;;;;~~ACvsBA~~;AAaA;;;;~~AC6NA;AA2CA;AAWA;AA2DA;AAqGA;AAuBa~~,~~cFgPA~~,~~YEzOZ~~,~~EAPkD~~,CAAA,~~KAAM~~,~~EFgPrB~~,~~IEhPqB~~,EAAA,EAAA,OAAA,~~EFgPJ~~,~~mBEhPI~~,EAAA,~~GFgPkB~~,~~OEhPlB~~,EAAA;;;;~~AJ3ZzD~~;AA+FA;;;;~~AC9IK~~,~~cEbQ~~,~~aFaI~~,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GAAA,MAAA;AAAA;~~AAoBG~~;~~AAsBM~~;~~AAyBC~~;~~AAiBH~~;;;;;AAoBlB,~~cExGO~~,~~oBFwGP~~,EAAA,CAAA,OAAA,EAAA,MAAA,EAAA,GAAA,MAAA;;;;~~ADzDN~~;AA+FA;;;;;~~AC9IiB~~;~~AAoBG~~;~~AAsBM~~;~~AAyBC~~;~~AAiBH~~;;;;;;;AAoBC;AA0FzB;AAAkD;AA4GlD;;;;;AAkBA;AAqCA;AA0EA;~~AAuBA~~;~~AAgFA~~;;;;~~ACoKA~~;;;;;;;;~~ACvsBA~~;AAaA;~~cC6Na,gBAAgB;;;AAA7B~~;AA2CA;AAWA;AA2DA;AAqGA;AAuBA;AAqBA;AAgBA~~;;;;;;cAvOa~~;;;;;;;KAWD,YAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;cA2DC,2FAA0F;;;;;;;;;;;;;;;;;;;;cAqG1F;;;;;;;;;;;;;;;;;;;;;;cAuBA,uCAAsC;;;;;;;;;;;;;cAqBtC;;;;;;;;;;;;;;;cAgBA"}
1	+ {"version":3,"file":"index.d.mts","names":[],"sources":["../src/segmentation/fuzzy.ts","../src/segmentation/types.ts","../src/segmentation/segmenter.ts","../src/segmentation/textUtils.ts","../src/segmentation/tokens.ts","../src/pattern-detection.ts"],"sourcesContent":[],"mappings":";;AAkEA;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA0FzB;AAAkD;AA4GlD;;;;;AAkBA;AAqCA;AA0EY,cD1aC,WC0aqB,EAAA,CAAA,CAAA,EAAA,MAAc,EAAA,GAAA,MAAA;AA8BhD;AAiDA;;;;;AAiHA;;;;ACoCA;;;;;;;;AC1sBA;AAaA;;;;ACiDA;AAkNA;AA2CA;AAWA;AA2DA;AAqGA;AAuBA;AAqBA;AAgBA;;;;AC5hBY,cLqJC,wBKrJc,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GAAA,MAAA;;;;ALsD3B;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA0FzB;AAAkD;AA4GlD;;;;;AAkBA,KA/VK,YAAA,GA+VW;EAqCJ;EA0EA,KAAA,EAAA,MAAU;AA8BtB,CAAA;AAiDA;;;;;AAiHA;;;;ACoCA;;;;;;;;AC1sBA;AAaA;;;;ACiDA;AAkNA,KH5NK,eAAA,GGgOJ;EAuCY;EAWD,QAAA,EAAA,MAAY;AA2DxB,CAAA;AAqGA;AAuBA;AAqBA;AAgBA;;;;AC5hBA;AA0DA;AA4DA;AAuBA;AAiCA;;;;;;;;;;;;;;KJlGK,qBAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAiCA,sBAAA;;;;;;;;;;;;;;;;;;;;;;;KAwBA,mBAAA;;;;;;;;;;;;;;KAeA,WAAA,GACC,eACA,kBACA,wBACA,yBACA;;;;;;;KAYD,aAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KA8EO,SAAA;;;;;;;KAYP,eAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAyCS;;;;;;;;;;;;SAaH;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KA0CC,SAAA,GAAY,cAAc,gBAAgB;;;;;;;;;;;;;KAkB1C,IAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAqCA,cAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAqCE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAqCF,UAAA,YAAsB;;;;;;;;;;;;;;;;;;;;;;;;;UA8BjB,MAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAiDL,mBAAA;;;;;;;;UAQA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;gBA8CM;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;WA0CL;;;;;;;;;;;;;;;;KAiBD,OAAA;;;;;;;;;;;;;;;;;;;;;;;;;;SA6BD;;;;AAhlBgB;AAwBH;;;;;;;AAoBC;AA0FzB;AAAkD;AA4GlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;;AAiHA;;;;ACoCA;;;;;;;;AC1sBA;AAaA;;;;ACiDa,cF4oBA,YEnoBZ,EAAA,CAAA,KAAA,EFmoBmC,IEnoBnC,EAAA,EAAA,OAAA,EFmoBoD,mBEnoBpD,EAAA,GFmoB0E,OEnoB1E,EAAA;;;;AJXD;AA+FA;;;;ACnIK,cExBQ,aFwBI,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GAAA,MAAA;AAAA;AA4BG;AA8BM;AAiCC;AAwBH;;;;;AAoBlB,cElJO,oBFkJP,EAAA,CAAA,OAAA,EAAA,MAAA,EAAA,GAAA,MAAA;;;;ADnGN;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA0FzB;AAAkD;AA4GlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;;AAiHA;;;;ACoCA;;;;;;;;AC1sBA;AAaA;;;;ACiDA;AAkNA;AA2CA;AAWA;AA2DA;AAqGA;AAuBA;AAqBA;AAgBA;;;cApea;ACxDb;AA0DA;AA4DA;AAuBA;AAiCA;;;;;;;;;;;;;;;;;;;;;cD4Fa,gBAAgB;;;;;;;;;;;;;;;;cA2ChB;;;;;;;KAWD,YAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;cA2DC,2FAA0F;;;;;;;;;;;;;;;;;;;;cAqG1F;;;;;;;;;;;;;;;;;;;;;;cAuBA,uCAAsC;;;;;;;;;;;;;cAqBtC;;;;;;;;;;;;;;;cAgBA;;;;AJteb;AA+FA;;;;;ACnIiB;AA4BG;AA+Df,KI7GO,eAAA,GJ6Ge;EAwBtB;EAeA,KAAA,EAAA,MAAA;EACC;EACA,KAAA,EAAA,MAAA;EACA;EACA,KAAA,EAAA,MAAA;EACA;EAAmB,QAAA,EAAA,MAAA;AAAA,CAAA;AA0FzB;AAAkD;AA4GlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;AAgGmB,cIrlBN,mBJqlBM,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GIrlBgC,eJqlBhC,EAAA;AAiBnB;;;;ACoCA;;;;;;;;AC1sBA;AAaa,cE+GA,wBF/G2E,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,QAAA,EE+GvB,eF/GuB,EAAA,EAAA,GAAA,MAAA;;;;ACiDxF;AAkNA;AA2CA;AAWY,cCnLC,oBDmLW,EAAA,CAAA,QAAA,EClLV,eDkLU,EAAA,EAAA,GAAA;EA2DX,WAAA,EAAA,gBAgFZ,GAAA,iBAhFsG;EAqG1F,KAAA,EAAA,OAAA;EAuBA,QAAA,CAAA,EAAA,MAAA;AAqBb,CAAA;AAgBA;;;;AC5hBA;AA0DA;AA4Da,cAwDA,kBAzCZ,EAAA,CAfgE,IAAA,EAAA,MAAA,EAAA,GAAe;EAuBnE,QAAA,EAAA,MAAA;EAiCA,WAAA,EAAA,gBAmBZ,GAZa,iBAAe;;;YAAf"}

package/dist/index.mjs CHANGED Viewed

@@ -673,6 +673,35 @@ const normalizeLineEndings = (content) => content.replace(/\r\n?/g, "\n");
 * { lineStartsAfter: ['{{raqms:hadithNum}} {{dash}} '], split: 'at' }
 */
 /**
+* Escapes regex metacharacters (parentheses and brackets) in template patterns,
+* but preserves content inside `{{...}}` token delimiters.
+*
+* This allows users to write intuitive patterns like `({{harf}}):` instead of
+* the verbose `\\({{harf}}\\):`. The escaping is applied BEFORE token expansion,
+* so tokens like `{{harf}}` which expand to `[أ-ي]` work correctly.
+*
+* @param pattern - Template pattern that may contain `()[]` and `{{tokens}}`
+* @returns Pattern with `()[]` escaped outside of `{{...}}` delimiters
+*
+* @example
+* escapeTemplateBrackets('({{harf}}): ')
+* // → '\\({{harf}}\\): '
+*
+* @example
+* escapeTemplateBrackets('[{{raqm}}] ')
+* // → '\\[{{raqm}}\\] '
+*
+* @example
+* escapeTemplateBrackets('{{harf}}')
+* // → '{{harf}}' (unchanged - no brackets outside tokens)
+*/
+const escapeTemplateBrackets = (pattern) => {
+	return pattern.replace(/(\{\{[^}]*\}\})|([()[\]])/g, (match, token, bracket) => {
+		if (token) return token;
+		return `\\${bracket}`;
+	});
+};
+/**
 * Base token definitions mapping human-readable token names to regex patterns.
 *
 * These tokens contain raw regex patterns and do not reference other tokens.
@@ -1000,7 +1029,7 @@ const hasCapturingGroup = (pattern) => {
 * // → { pattern: 'حَ?دَّ?ثَ?نَ?ا|...', captureNames: [] }
 */
 const processPattern = (pattern, fuzzy) => {
-	const { pattern: expanded, captureNames } = expandTokensWithCaptures(pattern, fuzzy ? makeDiacriticInsensitive : void 0);
+	const { pattern: expanded, captureNames } = expandTokensWithCaptures(escapeTemplateBrackets(pattern), fuzzy ? makeDiacriticInsensitive : void 0);
 	return {
 		captureNames,
 		pattern: expanded
@@ -1055,16 +1084,16 @@ const buildRuleRegex = (rule) => {
 		const processed = s.lineStartsWith.map((p) => processPattern(p, fuzzy));
 		const patterns = processed.map((p) => p.pattern).join("|");
 		allCaptureNames = processed.flatMap((p) => p.captureNames);
-		s.template = `^(?:${patterns})`;
+		s.regex = `^(?:${patterns})`;
 	}
 	if (s.lineEndsWith?.length) {
 		const processed = s.lineEndsWith.map((p) => processPattern(p, fuzzy));
 		const patterns = processed.map((p) => p.pattern).join("|");
 		allCaptureNames = processed.flatMap((p) => p.captureNames);
-		s.template = `(?:${patterns})$`;
+		s.regex = `(?:${patterns})$`;
 	}
 	if (s.template) {
-		const { pattern, captureNames } = expandTokensWithCaptures(s.template);
+		const { pattern, captureNames } = expandTokensWithCaptures(escapeTemplateBrackets(s.template));
 		s.regex = pattern;
 		allCaptureNames = [...allCaptureNames, ...captureNames];
 	}
@@ -1480,5 +1509,160 @@ const buildSegments = (splitPoints, content, pageMap, rules) => {
 };
 //#endregion
-export { TOKEN_PATTERNS, containsTokens, escapeRegex, expandTokens, expandTokensWithCaptures, getAvailableTokens, getTokenPattern, makeDiacriticInsensitive, normalizeLineEndings, segmentPages, stripHtmlTags, templateToRegex };
+//#region src/pattern-detection.ts
+/**
+* Pattern detection utilities for recognizing template tokens in Arabic text.
+* Used to auto-detect patterns from user-highlighted text in the segmentation dialog.
+*
+* @module pattern-detection
+*/
+/**
+* Token detection order - more specific patterns first to avoid partial matches.
+* Example: 'raqms' before 'raqm' so "٣٤" matches 'raqms' not just the first digit.
+*
+* Tokens not in this list are appended in alphabetical order from TOKEN_PATTERNS.
+*/
+const TOKEN_PRIORITY_ORDER = [
+	"basmalah",
+	"kitab",
+	"bab",
+	"fasl",
+	"naql",
+	"numbered",
+	"raqms",
+	"raqm",
+	"tarqim",
+	"bullet",
+	"dash",
+	"harf"
+];
+/**
+* Gets the token detection priority order.
+* Returns tokens in priority order, with any TOKEN_PATTERNS not in the priority list appended.
+*/
+const getTokenPriority = () => {
+	const allTokens = getAvailableTokens();
+	const prioritized = TOKEN_PRIORITY_ORDER.filter((t) => allTokens.includes(t));
+	const remaining = allTokens.filter((t) => !TOKEN_PRIORITY_ORDER.includes(t)).sort();
+	return [...prioritized, ...remaining];
+};
+/**
+* Analyzes text and returns all detected token patterns with their positions.
+* Patterns are detected in priority order to avoid partial matches.
+*
+* @param text - The text to analyze for token patterns
+* @returns Array of detected patterns sorted by position
+*
+* @example
+* detectTokenPatterns("٣٤ - حدثنا")
+* // Returns: [
+* //   { token: 'raqms', match: '٣٤', index: 0, endIndex: 2 },
+* //   { token: 'dash', match: '-', index: 3, endIndex: 4 },
+* //   { token: 'naql', match: 'حدثنا', index: 5, endIndex: 10 }
+* // ]
+*/
+const detectTokenPatterns = (text) => {
+	if (!text) return [];
+	const results = [];
+	const coveredRanges = [];
+	const isPositionCovered = (start, end) => {
+		return coveredRanges.some(([s, e]) => start >= s && start < e || end > s && end <= e || start <= s && end >= e);
+	};
+	for (const tokenName of getTokenPriority()) {
+		const pattern = TOKEN_PATTERNS[tokenName];
+		if (!pattern) continue;
+		try {
+			const regex = new RegExp(`(${pattern})`, "gu");
+			let match;
+			while ((match = regex.exec(text)) !== null) {
+				const startIndex = match.index;
+				const endIndex = startIndex + match[0].length;
+				if (isPositionCovered(startIndex, endIndex)) continue;
+				results.push({
+					endIndex,
+					index: startIndex,
+					match: match[0],
+					token: tokenName
+				});
+				coveredRanges.push([startIndex, endIndex]);
+			}
+		} catch {}
+	}
+	return results.sort((a, b) => a.index - b.index);
+};
+/**
+* Generates a template pattern from text using detected tokens.
+* Replaces matched portions with {{token}} syntax.
+*
+* @param text - Original text
+* @param detected - Array of detected patterns from detectTokenPatterns
+* @returns Template string with tokens, e.g., "{{raqms}} {{dash}} "
+*
+* @example
+* const detected = detectTokenPatterns("٣٤ - ");
+* generateTemplateFromText("٣٤ - ", detected);
+* // Returns: "{{raqms}} {{dash}} "
+*/
+const generateTemplateFromText = (text, detected) => {
+	if (!text || detected.length === 0) return text;
+	let template = text;
+	const sortedByIndexDesc = [...detected].sort((a, b) => b.index - a.index);
+	for (const d of sortedByIndexDesc) template = `${template.slice(0, d.index)}{{${d.token}}}${template.slice(d.endIndex)}`;
+	return template;
+};
+/**
+* Determines the best pattern type for auto-generated rules based on detected patterns.
+*
+* @param detected - Array of detected patterns
+* @returns Suggested pattern type and whether to use fuzzy matching
+*/
+const suggestPatternConfig = (detected) => {
+	const hasStructuralToken = detected.some((d) => [
+		"basmalah",
+		"kitab",
+		"bab",
+		"fasl"
+	].includes(d.token));
+	const hasNumberedPattern = detected.some((d) => [
+		"raqms",
+		"raqm",
+		"numbered"
+	].includes(d.token));
+	if (hasStructuralToken) return {
+		fuzzy: true,
+		metaType: detected.find((d) => [
+			"kitab",
+			"bab",
+			"fasl"
+		].includes(d.token))?.token || "chapter",
+		patternType: "lineStartsWith"
+	};
+	if (hasNumberedPattern) return {
+		fuzzy: false,
+		metaType: "hadith",
+		patternType: "lineStartsAfter"
+	};
+	return {
+		fuzzy: false,
+		patternType: "lineStartsAfter"
+	};
+};
+/**
+* Analyzes text and generates a complete suggested rule configuration.
+*
+* @param text - Highlighted text from the page
+* @returns Suggested rule configuration or null if no patterns detected
+*/
+const analyzeTextForRule = (text) => {
+	const detected = detectTokenPatterns(text);
+	if (detected.length === 0) return null;
+	return {
+		detected,
+		template: generateTemplateFromText(text, detected),
+		...suggestPatternConfig(detected)
+	};
+};
+//#endregion
+export { TOKEN_PATTERNS, analyzeTextForRule, containsTokens, detectTokenPatterns, escapeRegex, escapeTemplateBrackets, expandTokens, expandTokensWithCaptures, generateTemplateFromText, getAvailableTokens, getTokenPattern, makeDiacriticInsensitive, normalizeLineEndings, segmentPages, stripHtmlTags, suggestPatternConfig, templateToRegex };
 //# sourceMappingURL=index.mjs.map