flappa-doormal 2.2.3 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/AGENTS.md CHANGED
@@ -79,6 +79,7 @@ docs/
79
79
  4. **`rule-regex.ts`** - SplitRule → compiled regex builder
80
80
  - `buildRuleRegex()` - Compiles rule patterns (`lineStartsWith`, `lineStartsAfter`, `lineEndsWith`, `template`, `regex`)
81
81
  - `processPattern()` - Token expansion + auto-escaping + optional fuzzy application
82
+ - `extractNamedCaptureNames()` - Extract `(?<name>...)` groups from raw regex patterns (NEW)
82
83
 
83
84
  5. **`breakpoint-processor.ts`** - Breakpoint post-processing engine
84
85
  - `applyBreakpoints()` - Splits oversized structural segments using breakpoint patterns + windowing
@@ -178,6 +179,22 @@ export const escapeTemplateBrackets = (pattern: string): string => {
178
179
  - Direct `template` processing in `buildRuleRegex()`
179
180
  - **NOT** applied to `regex` patterns (user has full control)
180
181
 
182
+ ### Named Captures in Raw Regex Patterns (NEW)
183
+
184
+ Raw `regex` patterns now support named capture groups for metadata extraction:
185
+
186
+ ```typescript
187
+ // Named groups like (?<num>...) are automatically detected and extracted
188
+ { regex: '^(?<num>[٠-٩]+)\\s+[أ-ي\\s]+:\\s*(.+)' }
189
+ // meta.num = matched number
190
+ // content = the (.+) anonymous capture group
191
+ ```
192
+
193
+ **How it works:**
194
+ 1. `extractNamedCaptureNames()` parses `(?<name>...)` from regex string
195
+ 2. Named captures go to `segment.meta`
196
+ 3. Anonymous `(...)` captures can still be used for content extraction
197
+
181
198
  ### Breakpoints Post-Processing Algorithm
182
199
 
183
200
  The `breakpoints` option provides a post-processing mechanism for limiting segment size. Unlike the deprecated `maxSpan` (which was per-rule), breakpoints runs AFTER all structural rules.
package/README.md CHANGED
@@ -90,6 +90,7 @@ Replace regex with readable tokens:
90
90
  | `{{raqm}}` | Single Arabic digit | `[\\u0660-\\u0669]` |
91
91
  | `{{dash}}` | Dash variants | `[-–—ـ]` |
92
92
  | `{{harf}}` | Arabic letter | `[أ-ي]` |
93
+ | `{{harfs}}` | Arabic letters with spaces | `[أ-ي](?:[أ-ي\s]*[أ-ي])?` |
93
94
  | `{{numbered}}` | Hadith numbering `٢٢ - ` | `{{raqms}} {{dash}} ` |
94
95
  | `{{fasl}}` | Section markers | `فصل\|مسألة` |
95
96
  | `{{tarqim}}` | Punctuation marks | `[.!?؟؛]` |
@@ -165,6 +166,10 @@ For full regex control (character classes, capturing groups), use the `regex` pa
165
166
 
166
167
  // Capturing group (test|text) matches either
167
168
  { regex: '^(test|text) ', split: 'at' }
169
+
170
+ // Named capture groups extract metadata from raw regex too!
171
+ { regex: '^(?<num>[٠-٩]+)\\s+[أ-ي\\s]+:\\s*(.+)' }
172
+ // meta.num = matched number, content = captured (.+) group
168
173
  ```
169
174
 
170
175
  ### 6. Page Constraints
@@ -289,6 +294,23 @@ const segments = segmentPages(pages, {
289
294
  // meta: { num: '٥' } // harf not captured (no :name suffix)
290
295
  ```
291
296
 
297
+ ### Narrator Abbreviation Codes
298
+
299
+ Use `{{harfs}}` for matching Arabic letter abbreviations with spaces (common in narrator biography books):
300
+
301
+ ```typescript
302
+ const segments = segmentPages(pages, {
303
+ rules: [{
304
+ lineStartsAfter: ['{{raqms:num}} {{harfs}}:'],
305
+ split: 'at'
306
+ }]
307
+ });
308
+
309
+ // Matches: ١١١٨ د ت سي ق: حجاج بن دينار
310
+ // meta: { num: '١١١٨' }
311
+ // content: 'حجاج بن دينار' (abbreviations stripped)
312
+ ```
313
+
292
314
  ### Sentence-Based Splitting (Last Period Per Page)
293
315
 
294
316
  ```typescript
package/dist/index.d.mts CHANGED
@@ -967,7 +967,7 @@ type ExpandResult = {
967
967
  * expandTokensWithCaptures('{{bab}}', makeDiacriticInsensitive)
968
968
  * // → { pattern: 'بَ?ا?بٌ?', captureNames: [], hasCaptures: false }
969
969
  */
970
- declare const expandTokensWithCaptures: (query: string, fuzzyTransform?: (pattern: string) => string) => ExpandResult;
970
+ declare const expandTokensWithCaptures: (query: string, fuzzyTransform?: (pattern: string) => string, capturePrefix?: string) => ExpandResult;
971
971
  /**
972
972
  * Expands template tokens in a query string to their regex equivalents.
973
973
  *
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.mts","names":[],"sources":["../src/segmentation/fuzzy.ts","../src/segmentation/types.ts","../src/segmentation/segmenter.ts","../src/segmentation/textUtils.ts","../src/segmentation/tokens.ts","../src/pattern-detection.ts"],"sourcesContent":[],"mappings":";;AAkEA;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA2FzB;AAAkD;AA0GlD;;;;;AAkBA;AAqCA;AA0EY,cDzaC,WCyaqB,EAAA,CAAA,CAAA,EAAA,MAAc,EAAA,GAAA,MAAA;AA8BhD;AAiDA;;;;;AA+HA;;;;AC/SA;;;;;;;;ACpYA;AAcA;;;;ACgDA;AAkNA;AA2CA;AAWA;AA2DA;AAmHA;AAuBA;AAqBA;AAgBA;;;;AC1iBY,cLqJC,wBKrJc,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GAAA,MAAA;;;;ALsD3B;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA2FzB;AAAkD;AA0GlD;;;;;AAkBA,KA9VK,YAAA,GA8VW;EAqCJ;EA0EA,KAAA,EAAA,MAAU;AA8BtB,CAAA;AAiDA;;;;;AA+HA;;;;AC/SA;;;;;;;;ACpYA;AAcA;;;;ACgDA;AAkNA,KH5NK,eAAA,GGgOJ;EAuCY;EAWD,QAAA,EAAA,MAAY;AA2DxB,CAAA;AAmHA;AAuBA;AAqBA;AAgBA;;;;AC1iBA;AA0DA;AA4DA;AAuBA;AAiCA;;;;;;;;;;;;;;KJlGK,qBAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAiCA,sBAAA;;;;;;;;;;;;;;;;;;;;;;;KAwBA,mBAAA;;;;;;;;;;;;;;KAeA,WAAA,GACC,eACA,kBACA,wBACA,yBACA;;;;;;;KAYD,aAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KA+EO,SAAA;;;;;;;KAYP,eAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAyCS;;;;;;;;;;;;SAaH;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAwCC,SAAA,GAAY,cAAc,gBAAgB;;;;;;;;;;;;;KAkB1C,IAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAqCA,cAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAqCE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAqCF,UAAA,YAAsB;;;;;;;;;;;;;;;;;;;;;;;;;UA8BjB,MAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAiDL,mBAAA;;;;;;;;UAQA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;gBA8CM;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;WAwDL;;;;;;;;;;;;;;;;KAiBD,OAAA;;;;;;;;;;;;;;;;;;;;;;;;;;SA6BD;;;;;;;;;AEhtBX;AAcA;;;;ACgDA;AAkNA;AA2CA;AAWA;AA2DA;AAmHA;AAuBA;AAqBA;AAgBA;;;;AC1iBA;AA0DA;AA4DA;AAuBA;AAiCA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;cHgNa,sBAAuB,iBAAiB,wBAAsB;;;;AFxU3E;AA+FA;;;;ACnIK,cExBQ,aFwBI,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GAAA,MAAA;AAAA;AA4BG;AA8BM;AAiCC;AAwBH;;;;;AAoBlB,cEjJO,oBFiJP,EAAA,CAAA,OAAA,EAAA,MAAA,EAAA,GAAA,MAAA;;;;ADnGN;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA2FzB;AAAkD;AA0GlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;;AA+HA;;;;AC/SA;;;;;;;;ACpYA;AAcA;;;;ACgDA;AAkNA;AA2CA;AAWA;AA2DA;AAmHA;AAuBA;AAqBA;AAgBA;;;cAlfa;ACxDb;AA0DA;AA4DA;AAuBA;AAiCA;;;;;;;;;;;;;;;;;;;;;cD4Fa,gBAAgB;;;;;;;;;;;;;;;;cA2ChB;;;;;;;KAWD,YAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;cA2DC,2FAA0F;;;;;;;;;;;;;;;;;;;;cAmH1F;;;;;;;;;;;;;;;;;;;;;;cAuBA,uCAAmC;;;;;;;;;;;;;cAqBnC;;;;;;;;;;;;;;;cAgBA;;;;AJpfb;AA+FA;;;;;ACnIiB;AA4BG;AA+Df,KI7GO,eAAA,GJ6Ge;EAwBtB;EAeA,KAAA,EAAA,MAAA;EACC;EACA,KAAA,EAAA,MAAA;EACA;EACA,KAAA,EAAA,MAAA;EACA;EAAmB,QAAA,EAAA,MAAA;AAAA,CAAA;AA2FzB;AAAkD;AA0GlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;AA8GmB,cIlmBN,mBJkmBM,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GIlmBgC,eJkmBhC,EAAA;AAiBnB;;;;AC/SA;;;;;;;;ACpYA;AAca,cE8GA,wBF7GyD,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,QAAA,EE6GL,eF7GK,EAAA,EAAA,GAAA,MAAA;;;;AC+CtE;AAkNA;AA2CA;AAWY,cCnLC,oBDmLW,EAAA,CAAA,QAAA,EClLV,eDkLU,EAAA,EAAA,GAAA;EA2DX,WAAA,EAAA,gBA8FZ,GAAA,iBA9FsG;EAmH1F,KAAA,EAAA,OAAA;EAuBA,QAAA,CAAA,EAAA,MAAA;AAqBb,CAAA;AAgBA;;;;AC1iBA;AA0DA;AA4Da,cAwDA,kBAzCZ,EAAA,CAfgE,IAAA,EAAA,MAAA,EAAA,GAAe;EAuBnE,QAAA,EAAA,MAAA;EAiCA,WAAA,EAAA,gBAmBZ,GAZa,iBAAe;;;YAAf"}
1
+ {"version":3,"file":"index.d.mts","names":[],"sources":["../src/segmentation/fuzzy.ts","../src/segmentation/types.ts","../src/segmentation/segmenter.ts","../src/segmentation/textUtils.ts","../src/segmentation/tokens.ts","../src/pattern-detection.ts"],"sourcesContent":[],"mappings":";;AAkEA;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA2FzB;AAAkD;AA0GlD;;;;;AAkBA;AAqCA;AA0EY,cDzaC,WCyaqB,EAAA,CAAA,CAAA,EAAA,MAAc,EAAA,GAAA,MAAA;AA8BhD;AAiDA;;;;;AA+HA;;;;AC/FA;;;;;;;;ACplBA;AAcA;;;;ACgDA;AA6NA;AA2CA;AAWA;AA2DA;AAyHA;AAuBA;AAqBA;AAgBA;;;;AC3jBY,cLqJC,wBKrJc,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GAAA,MAAA;;;;ALsD3B;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA2FzB;AAAkD;AA0GlD;;;;;AAkBA,KA9VK,YAAA,GA8VW;EAqCJ;EA0EA,KAAA,EAAA,MAAU;AA8BtB,CAAA;AAiDA;;;;;AA+HA;;;;AC/FA;;;;;;;;ACplBA;AAcA;;;;ACgDA;AA6NA,KHvOK,eAAA,GG2OJ;EAuCY;EAWD,QAAA,EAAA,MAAY;AA2DxB,CAAA;AAyHA;AAuBA;AAqBA;AAgBA;;;;AC3jBA;AA0DA;AA4DA;AAuBA;AAiCA;;;;;;;;;;;;;;KJlGK,qBAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAiCA,sBAAA;;;;;;;;;;;;;;;;;;;;;;;KAwBA,mBAAA;;;;;;;;;;;;;;KAeA,WAAA,GACC,eACA,kBACA,wBACA,yBACA;;;;;;;KAYD,aAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KA+EO,SAAA;;;;;;;KAYP,eAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAyCS;;;;;;;;;;;;SAaH;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAwCC,SAAA,GAAY,cAAc,gBAAgB;;;;;;;;;;;;;KAkB1C,IAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAqCA,cAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;YAqCE;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAqCF,UAAA,YAAsB;;;;;;;;;;;;;;;;;;;;;;;;;UA8BjB,MAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;KAiDL,mBAAA;;;;;;;;UAQA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;gBA8CM;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;WAwDL;;;;;;;;;;;;;;;;KAiBD,OAAA;;;;;;;;;;;;;;;;;;;;;;;;;;SA6BD;;;;;;;;;AEhtBX;AAcA;;;;ACgDA;AA6NA;AA2CA;AAWA;AA2DA;AAyHA;AAuBA;AAqBA;AAgBA;;;;AC3jBA;AA0DA;AA4DA;AAuBA;AAiCA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;cHgaa,sBAAuB,iBAAiB,wBAAsB;;;;AFxhB3E;AA+FA;;;;ACnIK,cExBQ,aFwBI,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GAAA,MAAA;AAAA;AA4BG;AA8BM;AAiCC;AAwBH;;;;;AAoBlB,cEjJO,oBFiJP,EAAA,CAAA,OAAA,EAAA,MAAA,EAAA,GAAA,MAAA;;;;ADnGN;AA+FA;;;;;ACnIiB;AA4BG;AA8BM;AAiCC;AAwBH;;;;;;;AAoBC;AA2FzB;AAAkD;AA0GlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;;AA+HA;;;;AC/FA;;;;;;;;ACplBA;AAcA;;;;ACgDA;AA6NA;AA2CA;AAWA;AA2DA;AAyHA;AAuBA;AAqBA;AAgBA;;;cAngBa;ACxDb;AA0DA;AA4DA;AAuBA;AAiCA;;;;;;;;;;;;;;;;;;;;;cDuGa,gBAAgB;;;;;;;;;;;;;;;;cA2ChB;;;;;;;KAWD,YAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;cA2DC,mHAIV;;;;;;;;;;;;;;;;;;;;cAqHU;;;;;;;;;;;;;;;;;;;;;;cAuBA,uCAAmC;;;;;;;;;;;;;cAqBnC;;;;;;;;;;;;;;;cAgBA;;;;AJrgBb;AA+FA;;;;;ACnIiB;AA4BG;AA+Df,KI7GO,eAAA,GJ6Ge;EAwBtB;EAeA,KAAA,EAAA,MAAA;EACC;EACA,KAAA,EAAA,MAAA;EACA;EACA,KAAA,EAAA,MAAA;EACA;EAAmB,QAAA,EAAA,MAAA;AAAA,CAAA;AA2FzB;AAAkD;AA0GlD;;;;;AAkBA;AAqCA;AA0EA;AA8BA;AAiDA;;;;AA8GmB,cIlmBN,mBJkmBM,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,GIlmBgC,eJkmBhC,EAAA;AAiBnB;;;;AC/FA;;;;;;;;ACplBA;AAca,cE8GA,wBF7GyD,EAAA,CAAA,IAAA,EAAA,MAAA,EAAA,QAAA,EE6GL,eF7GK,EAAA,EAAA,GAAA,MAAA;;;;AC+CtE;AA6NA;AA2CA;AAWY,cC9LC,oBD8LW,EAAA,CAAA,QAAA,EC7LV,eD6LU,EAAA,EAAA,GAAA;EA2DX,WAAA,EAAA,gBAoGZ,GAAA,iBAhGE;EAqHU,KAAA,EAAA,OAAA;EAuBA,QAAA,CAAA,EAAA,MAAA;AAqBb,CAAA;AAgBA;;;;AC3jBA;AA0DA;AA4Da,cAwDA,kBAzCZ,EAAA,CAfgE,IAAA,EAAA,MAAA,EAAA,GAAe;EAuBnE,QAAA,EAAA,MAAA;EAiCA,WAAA,EAAA,gBAmBZ,GAZa,iBAAe;;;YAAf"}