flappa-doormal 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -673,6 +673,35 @@ const normalizeLineEndings = (content) => content.replace(/\r\n?/g, "\n");
673
673
  * { lineStartsAfter: ['{{raqms:hadithNum}} {{dash}} '], split: 'at' }
674
674
  */
675
675
  /**
676
+ * Escapes regex metacharacters (parentheses and brackets) in template patterns,
677
+ * but preserves content inside `{{...}}` token delimiters.
678
+ *
679
+ * This allows users to write intuitive patterns like `({{harf}}):` instead of
680
+ * the verbose `\\({{harf}}\\):`. The escaping is applied BEFORE token expansion,
681
+ * so tokens like `{{harf}}` which expand to `[أ-ي]` work correctly.
682
+ *
683
+ * @param pattern - Template pattern that may contain `()[]` and `{{tokens}}`
684
+ * @returns Pattern with `()[]` escaped outside of `{{...}}` delimiters
685
+ *
686
+ * @example
687
+ * escapeTemplateBrackets('({{harf}}): ')
688
+ * // → '\\({{harf}}\\): '
689
+ *
690
+ * @example
691
+ * escapeTemplateBrackets('[{{raqm}}] ')
692
+ * // → '\\[{{raqm}}\\] '
693
+ *
694
+ * @example
695
+ * escapeTemplateBrackets('{{harf}}')
696
+ * // → '{{harf}}' (unchanged - no brackets outside tokens)
697
+ */
698
+ const escapeTemplateBrackets = (pattern) => {
699
+ return pattern.replace(/(\{\{[^}]*\}\})|([()[\]])/g, (match, token, bracket) => {
700
+ if (token) return token;
701
+ return `\\${bracket}`;
702
+ });
703
+ };
704
+ /**
676
705
  * Base token definitions mapping human-readable token names to regex patterns.
677
706
  *
678
707
  * These tokens contain raw regex patterns and do not reference other tokens.
@@ -1000,7 +1029,7 @@ const hasCapturingGroup = (pattern) => {
1000
1029
  * // → { pattern: 'حَ?دَّ?ثَ?نَ?ا|...', captureNames: [] }
1001
1030
  */
1002
1031
  const processPattern = (pattern, fuzzy) => {
1003
- const { pattern: expanded, captureNames } = expandTokensWithCaptures(pattern, fuzzy ? makeDiacriticInsensitive : void 0);
1032
+ const { pattern: expanded, captureNames } = expandTokensWithCaptures(escapeTemplateBrackets(pattern), fuzzy ? makeDiacriticInsensitive : void 0);
1004
1033
  return {
1005
1034
  captureNames,
1006
1035
  pattern: expanded
@@ -1055,16 +1084,16 @@ const buildRuleRegex = (rule) => {
1055
1084
  const processed = s.lineStartsWith.map((p) => processPattern(p, fuzzy));
1056
1085
  const patterns = processed.map((p) => p.pattern).join("|");
1057
1086
  allCaptureNames = processed.flatMap((p) => p.captureNames);
1058
- s.template = `^(?:${patterns})`;
1087
+ s.regex = `^(?:${patterns})`;
1059
1088
  }
1060
1089
  if (s.lineEndsWith?.length) {
1061
1090
  const processed = s.lineEndsWith.map((p) => processPattern(p, fuzzy));
1062
1091
  const patterns = processed.map((p) => p.pattern).join("|");
1063
1092
  allCaptureNames = processed.flatMap((p) => p.captureNames);
1064
- s.template = `(?:${patterns})$`;
1093
+ s.regex = `(?:${patterns})$`;
1065
1094
  }
1066
1095
  if (s.template) {
1067
- const { pattern, captureNames } = expandTokensWithCaptures(s.template);
1096
+ const { pattern, captureNames } = expandTokensWithCaptures(escapeTemplateBrackets(s.template));
1068
1097
  s.regex = pattern;
1069
1098
  allCaptureNames = [...allCaptureNames, ...captureNames];
1070
1099
  }
@@ -1227,7 +1256,7 @@ const convertPageBreaks = (content, startOffset, pageBreaks) => {
1227
1256
  * @param prefer - 'longer' for last match, 'shorter' for first match
1228
1257
  * @returns Processed segments with oversized ones broken up
1229
1258
  */
1230
- const applyBreakpoints = (segments, pages, normalizedContent, maxPages, breakpoints, prefer, logger) => {
1259
+ const applyBreakpoints = (segments, pages, normalizedContent, maxPages, breakpoints, prefer) => {
1231
1260
  const findExclusionBreakPosition = (currentFromIdx, windowEndIdx, toIdx, pageIds$1, expandedBreakpoints$1, cumulativeOffsets$1) => {
1232
1261
  const startingPageId = pageIds$1[currentFromIdx];
1233
1262
  if (expandedBreakpoints$1.some((bp) => bp.excludeSet.has(startingPageId)) && currentFromIdx < toIdx) return cumulativeOffsets$1[currentFromIdx + 1] - cumulativeOffsets$1[currentFromIdx];
@@ -1259,168 +1288,72 @@ const applyBreakpoints = (segments, pages, normalizedContent, maxPages, breakpoi
1259
1288
  const patternProcessor = (p) => processPattern(p, false).pattern;
1260
1289
  const expandedBreakpoints = expandBreakpoints(breakpoints, patternProcessor);
1261
1290
  const result = [];
1262
- logger?.info?.("Starting breakpoint processing", {
1263
- maxPages,
1264
- segmentCount: segments.length
1265
- });
1266
1291
  for (const segment of segments) {
1267
1292
  const fromIdx = pageIdToIndex.get(segment.from) ?? -1;
1268
1293
  const toIdx = segment.to !== void 0 ? pageIdToIndex.get(segment.to) ?? fromIdx : fromIdx;
1269
- logger?.debug?.("Processing segment", {
1270
- contentLength: segment.content.length,
1271
- contentPreview: segment.content.slice(0, 100),
1272
- from: segment.from,
1273
- fromIdx,
1274
- to: segment.to,
1275
- toIdx
1276
- });
1277
1294
  const segmentSpan = (segment.to ?? segment.from) - segment.from;
1278
1295
  const hasExclusions = expandedBreakpoints.some((bp) => hasExcludedPageInRange(bp.excludeSet, pageIds, fromIdx, toIdx));
1279
1296
  if (segmentSpan <= maxPages && !hasExclusions) {
1280
- logger?.trace?.("Segment within limit, keeping as-is");
1281
1297
  result.push(segment);
1282
1298
  continue;
1283
1299
  }
1284
- logger?.debug?.("Segment exceeds limit or has exclusions, breaking it up");
1285
1300
  let remainingContent = segment.content;
1286
1301
  let currentFromIdx = fromIdx;
1287
1302
  let isFirstPiece = true;
1288
- let iterationCount = 0;
1289
- const maxIterations = 1e4;
1290
1303
  while (currentFromIdx <= toIdx) {
1291
- iterationCount++;
1292
- if (iterationCount > maxIterations) {
1293
- logger?.error?.("INFINITE LOOP DETECTED! Breaking out", { iterationCount: maxIterations });
1294
- logger?.error?.("Loop state", {
1295
- currentFromIdx,
1296
- remainingContentLength: remainingContent.length,
1297
- toIdx
1298
- });
1299
- break;
1300
- }
1301
1304
  const remainingSpan = pageIds[toIdx] - pageIds[currentFromIdx];
1302
- logger?.trace?.("Loop iteration", {
1303
- currentFromIdx,
1304
- currentPageId: pageIds[currentFromIdx],
1305
- iterationCount,
1306
- remainingContentLength: remainingContent.length,
1307
- remainingContentPreview: remainingContent.slice(0, 80),
1308
- remainingSpan,
1309
- toIdx,
1310
- toPageId: pageIds[toIdx]
1311
- });
1312
1305
  const remainingHasExclusions = expandedBreakpoints.some((bp) => hasExcludedPageInRange(bp.excludeSet, pageIds, currentFromIdx, toIdx));
1313
1306
  if (remainingSpan <= maxPages && !remainingHasExclusions) {
1314
- logger?.debug?.("Remaining span within limit, outputting final segment");
1315
1307
  const finalSeg = createSegment(remainingContent, pageIds[currentFromIdx], currentFromIdx !== toIdx ? pageIds[toIdx] : void 0, isFirstPiece ? segment.meta : void 0);
1316
1308
  if (finalSeg) result.push(finalSeg);
1317
1309
  break;
1318
1310
  }
1319
- const currentPageId = pageIds[currentFromIdx];
1320
- const maxWindowPageId = currentPageId + maxPages;
1311
+ const maxWindowPageId = pageIds[currentFromIdx] + maxPages;
1321
1312
  let windowEndIdx = currentFromIdx;
1322
1313
  for (let i = currentFromIdx; i <= toIdx; i++) if (pageIds[i] <= maxWindowPageId) windowEndIdx = i;
1323
1314
  else break;
1324
- logger?.trace?.("Window calculation", {
1325
- currentPageId,
1326
- maxWindowPageId,
1327
- windowEndIdx,
1328
- windowEndPageId: pageIds[windowEndIdx]
1329
- });
1330
1315
  const windowHasExclusions = expandedBreakpoints.some((bp) => hasExcludedPageInRange(bp.excludeSet, pageIds, currentFromIdx, windowEndIdx));
1331
1316
  let breakPosition = -1;
1332
- if (windowHasExclusions) {
1333
- logger?.trace?.("Window has exclusions, finding exclusion break position");
1334
- breakPosition = findExclusionBreakPosition(currentFromIdx, windowEndIdx, toIdx, pageIds, expandedBreakpoints, cumulativeOffsets);
1335
- logger?.trace?.("Exclusion break position", { breakPosition });
1336
- }
1337
- if (breakPosition <= 0) {
1338
- const breakpointCtx = {
1339
- cumulativeOffsets,
1340
- expandedBreakpoints,
1341
- normalizedPages,
1342
- pageIds,
1343
- prefer
1344
- };
1345
- logger?.trace?.("Finding break position using patterns...");
1346
- breakPosition = findBreakPosition(remainingContent, currentFromIdx, toIdx, windowEndIdx, breakpointCtx);
1347
- logger?.trace?.("Pattern break position", { breakPosition });
1348
- }
1317
+ if (windowHasExclusions) breakPosition = findExclusionBreakPosition(currentFromIdx, windowEndIdx, toIdx, pageIds, expandedBreakpoints, cumulativeOffsets);
1318
+ if (breakPosition <= 0) breakPosition = findBreakPosition(remainingContent, currentFromIdx, toIdx, windowEndIdx, {
1319
+ cumulativeOffsets,
1320
+ expandedBreakpoints,
1321
+ normalizedPages,
1322
+ pageIds,
1323
+ prefer
1324
+ });
1349
1325
  if (breakPosition <= 0) {
1350
- logger?.debug?.("No pattern matched, falling back to page boundary");
1351
1326
  if (windowEndIdx === currentFromIdx) {
1352
- logger?.trace?.("Single page window, outputting page and advancing");
1353
1327
  const pageContent = cumulativeOffsets[currentFromIdx + 1] !== void 0 ? remainingContent.slice(0, cumulativeOffsets[currentFromIdx + 1] - cumulativeOffsets[currentFromIdx]) : remainingContent;
1354
1328
  const pageSeg = createSegment(pageContent.trim(), pageIds[currentFromIdx], void 0, isFirstPiece ? segment.meta : void 0);
1355
1329
  if (pageSeg) result.push(pageSeg);
1356
1330
  remainingContent = remainingContent.slice(pageContent.length).trim();
1357
1331
  currentFromIdx++;
1358
1332
  isFirstPiece = false;
1359
- logger?.trace?.("After single page", {
1360
- currentFromIdx,
1361
- remainingContentLength: remainingContent.length
1362
- });
1363
1333
  continue;
1364
1334
  }
1365
1335
  breakPosition = cumulativeOffsets[windowEndIdx + 1] - cumulativeOffsets[currentFromIdx];
1366
- logger?.trace?.("Multi-page window, using full window break position", { breakPosition });
1367
1336
  }
1368
1337
  const pieceContent = remainingContent.slice(0, breakPosition).trim();
1369
- logger?.trace?.("Piece extracted", {
1370
- breakPosition,
1371
- pieceContentLength: pieceContent.length,
1372
- pieceContentPreview: pieceContent.slice(0, 80)
1373
- });
1374
1338
  const actualStartIdx = pieceContent ? findActualStartPage(pieceContent, currentFromIdx, toIdx, pageIds, normalizedPages) : currentFromIdx;
1375
1339
  const actualEndIdx = pieceContent ? findActualEndPage(pieceContent, actualStartIdx, windowEndIdx, pageIds, normalizedPages) : currentFromIdx;
1376
- logger?.trace?.("Actual page indices", {
1377
- actualEndIdx,
1378
- actualStartIdx,
1379
- pieceHasContent: !!pieceContent
1380
- });
1381
1340
  if (pieceContent) {
1382
1341
  const pieceSeg = createSegment(pieceContent, pageIds[actualStartIdx], actualEndIdx > actualStartIdx ? pageIds[actualEndIdx] : void 0, isFirstPiece ? segment.meta : void 0);
1383
- if (pieceSeg) {
1384
- result.push(pieceSeg);
1385
- logger?.debug?.("Created segment", {
1386
- contentLength: pieceSeg.content.length,
1387
- from: pieceSeg.from,
1388
- to: pieceSeg.to
1389
- });
1390
- }
1342
+ if (pieceSeg) result.push(pieceSeg);
1391
1343
  }
1392
- const prevRemainingLength = remainingContent.length;
1393
1344
  remainingContent = remainingContent.slice(breakPosition).trim();
1394
- logger?.trace?.("After slicing remainingContent", {
1395
- newLength: remainingContent.length,
1396
- prevLength: prevRemainingLength,
1397
- slicedAmount: breakPosition
1398
- });
1399
- if (!remainingContent) {
1400
- logger?.debug?.("No remaining content, breaking out of loop");
1401
- break;
1402
- }
1403
1345
  let nextFromIdx = actualEndIdx;
1404
- if (actualEndIdx + 1 <= toIdx) {
1346
+ if (remainingContent && actualEndIdx + 1 <= toIdx) {
1405
1347
  const nextPageData = normalizedPages.get(pageIds[actualEndIdx + 1]);
1406
1348
  if (nextPageData) {
1407
1349
  const nextPrefix = nextPageData.content.slice(0, Math.min(30, nextPageData.length));
1408
- if (nextPrefix && remainingContent.startsWith(nextPrefix)) {
1409
- nextFromIdx = actualEndIdx + 1;
1410
- logger?.trace?.("Content starts with next page prefix", { advancingTo: nextFromIdx });
1411
- }
1350
+ if (nextPrefix && remainingContent.startsWith(nextPrefix)) nextFromIdx = actualEndIdx + 1;
1412
1351
  }
1413
1352
  }
1414
- logger?.trace?.("End of iteration", {
1415
- nextFromIdx,
1416
- prevCurrentFromIdx: currentFromIdx,
1417
- willAdvance: nextFromIdx !== currentFromIdx
1418
- });
1419
1353
  currentFromIdx = nextFromIdx;
1420
1354
  isFirstPiece = false;
1421
1355
  }
1422
1356
  }
1423
- logger?.info?.("Breakpoint processing completed", { resultCount: result.length });
1424
1357
  return result;
1425
1358
  };
1426
1359
  /**
@@ -1466,7 +1399,7 @@ const applyBreakpoints = (segments, pages, normalizedContent, maxPages, breakpoi
1466
1399
  * });
1467
1400
  */
1468
1401
  const segmentPages = (pages, options) => {
1469
- const { rules = [], maxPages, breakpoints, prefer = "longer", logger } = options;
1402
+ const { rules = [], maxPages, breakpoints, prefer = "longer" } = options;
1470
1403
  if (!pages.length) return [];
1471
1404
  const { content: matchContent, normalizedPages: normalizedContent, pageMap } = buildPageMap(pages);
1472
1405
  const splitPoints = [];
@@ -1504,7 +1437,7 @@ const segmentPages = (pages, options) => {
1504
1437
  if (lastPage.id !== firstPage.id) initialSeg.to = lastPage.id;
1505
1438
  if (initialSeg.content) segments = [initialSeg];
1506
1439
  }
1507
- if (maxPages !== void 0 && maxPages >= 0 && breakpoints?.length) return applyBreakpoints(segments, pages, normalizedContent, maxPages, breakpoints, prefer, logger);
1440
+ if (maxPages !== void 0 && maxPages >= 0 && breakpoints?.length) return applyBreakpoints(segments, pages, normalizedContent, maxPages, breakpoints, prefer);
1508
1441
  return segments;
1509
1442
  };
1510
1443
  /**
@@ -1731,5 +1664,5 @@ const analyzeTextForRule = (text) => {
1731
1664
  };
1732
1665
 
1733
1666
  //#endregion
1734
- export { TOKEN_PATTERNS, analyzeTextForRule, containsTokens, detectTokenPatterns, escapeRegex, expandTokens, expandTokensWithCaptures, generateTemplateFromText, getAvailableTokens, getTokenPattern, makeDiacriticInsensitive, normalizeLineEndings, segmentPages, stripHtmlTags, suggestPatternConfig, templateToRegex };
1667
+ export { TOKEN_PATTERNS, analyzeTextForRule, containsTokens, detectTokenPatterns, escapeRegex, escapeTemplateBrackets, expandTokens, expandTokensWithCaptures, generateTemplateFromText, getAvailableTokens, getTokenPattern, makeDiacriticInsensitive, normalizeLineEndings, segmentPages, stripHtmlTags, suggestPatternConfig, templateToRegex };
1735
1668
  //# sourceMappingURL=index.mjs.map