npm - flappa-doormal - Versions diffs - 2.1.0 → 2.2.0 - Mend

flappa-doormal 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.mjs CHANGED Viewed

@@ -673,6 +673,35 @@ const normalizeLineEndings = (content) => content.replace(/\r\n?/g, "\n");
 * { lineStartsAfter: ['{{raqms:hadithNum}} {{dash}} '], split: 'at' }
 */
 /**
+* Escapes regex metacharacters (parentheses and brackets) in template patterns,
+* but preserves content inside `{{...}}` token delimiters.
+*
+* This allows users to write intuitive patterns like `({{harf}}):` instead of
+* the verbose `\\({{harf}}\\):`. The escaping is applied BEFORE token expansion,
+* so tokens like `{{harf}}` which expand to `[أ-ي]` work correctly.
+*
+* @param pattern - Template pattern that may contain `()[]` and `{{tokens}}`
+* @returns Pattern with `()[]` escaped outside of `{{...}}` delimiters
+*
+* @example
+* escapeTemplateBrackets('({{harf}}): ')
+* // → '\\({{harf}}\\): '
+*
+* @example
+* escapeTemplateBrackets('[{{raqm}}] ')
+* // → '\\[{{raqm}}\\] '
+*
+* @example
+* escapeTemplateBrackets('{{harf}}')
+* // → '{{harf}}' (unchanged - no brackets outside tokens)
+*/
+const escapeTemplateBrackets = (pattern) => {
+	return pattern.replace(/(\{\{[^}]*\}\})|([()[\]])/g, (match, token, bracket) => {
+		if (token) return token;
+		return `\\${bracket}`;
+	});
+};
+/**
 * Base token definitions mapping human-readable token names to regex patterns.
 *
 * These tokens contain raw regex patterns and do not reference other tokens.
@@ -1000,7 +1029,7 @@ const hasCapturingGroup = (pattern) => {
 * // → { pattern: 'حَ?دَّ?ثَ?نَ?ا|...', captureNames: [] }
 */
 const processPattern = (pattern, fuzzy) => {
-	const { pattern: expanded, captureNames } = expandTokensWithCaptures(pattern, fuzzy ? makeDiacriticInsensitive : void 0);
+	const { pattern: expanded, captureNames } = expandTokensWithCaptures(escapeTemplateBrackets(pattern), fuzzy ? makeDiacriticInsensitive : void 0);
 	return {
 		captureNames,
 		pattern: expanded
@@ -1055,16 +1084,16 @@ const buildRuleRegex = (rule) => {
 		const processed = s.lineStartsWith.map((p) => processPattern(p, fuzzy));
 		const patterns = processed.map((p) => p.pattern).join("|");
 		allCaptureNames = processed.flatMap((p) => p.captureNames);
-		s.template = `^(?:${patterns})`;
+		s.regex = `^(?:${patterns})`;
 	}
 	if (s.lineEndsWith?.length) {
 		const processed = s.lineEndsWith.map((p) => processPattern(p, fuzzy));
 		const patterns = processed.map((p) => p.pattern).join("|");
 		allCaptureNames = processed.flatMap((p) => p.captureNames);
-		s.template = `(?:${patterns})$`;
+		s.regex = `(?:${patterns})$`;
 	}
 	if (s.template) {
-		const { pattern, captureNames } = expandTokensWithCaptures(s.template);
+		const { pattern, captureNames } = expandTokensWithCaptures(escapeTemplateBrackets(s.template));
 		s.regex = pattern;
 		allCaptureNames = [...allCaptureNames, ...captureNames];
 	}
@@ -1227,7 +1256,7 @@ const convertPageBreaks = (content, startOffset, pageBreaks) => {
 * @param prefer - 'longer' for last match, 'shorter' for first match
 * @returns Processed segments with oversized ones broken up
 */
-const applyBreakpoints = (segments, pages, normalizedContent, maxPages, breakpoints, prefer, logger) => {
+const applyBreakpoints = (segments, pages, normalizedContent, maxPages, breakpoints, prefer) => {
 	const findExclusionBreakPosition = (currentFromIdx, windowEndIdx, toIdx, pageIds$1, expandedBreakpoints$1, cumulativeOffsets$1) => {
 		const startingPageId = pageIds$1[currentFromIdx];
 		if (expandedBreakpoints$1.some((bp) => bp.excludeSet.has(startingPageId)) && currentFromIdx < toIdx) return cumulativeOffsets$1[currentFromIdx + 1] - cumulativeOffsets$1[currentFromIdx];
@@ -1259,168 +1288,72 @@ const applyBreakpoints = (segments, pages, normalizedContent, maxPages, breakpoi
 	const patternProcessor = (p) => processPattern(p, false).pattern;
 	const expandedBreakpoints = expandBreakpoints(breakpoints, patternProcessor);
 	const result = [];
-	logger?.info?.("Starting breakpoint processing", {
-		maxPages,
-		segmentCount: segments.length
-	});
 	for (const segment of segments) {
 		const fromIdx = pageIdToIndex.get(segment.from) ?? -1;
 		const toIdx = segment.to !== void 0 ? pageIdToIndex.get(segment.to) ?? fromIdx : fromIdx;
-		logger?.debug?.("Processing segment", {
-			contentLength: segment.content.length,
-			contentPreview: segment.content.slice(0, 100),
-			from: segment.from,
-			fromIdx,
-			to: segment.to,
-			toIdx
-		});
 		const segmentSpan = (segment.to ?? segment.from) - segment.from;
 		const hasExclusions = expandedBreakpoints.some((bp) => hasExcludedPageInRange(bp.excludeSet, pageIds, fromIdx, toIdx));
 		if (segmentSpan <= maxPages && !hasExclusions) {
-			logger?.trace?.("Segment within limit, keeping as-is");
 			result.push(segment);
 			continue;
 		}
-		logger?.debug?.("Segment exceeds limit or has exclusions, breaking it up");
 		let remainingContent = segment.content;
 		let currentFromIdx = fromIdx;
 		let isFirstPiece = true;
-		let iterationCount = 0;
-		const maxIterations = 1e4;
 		while (currentFromIdx <= toIdx) {
-			iterationCount++;
-			if (iterationCount > maxIterations) {
-				logger?.error?.("INFINITE LOOP DETECTED! Breaking out", { iterationCount: maxIterations });
-				logger?.error?.("Loop state", {
-					currentFromIdx,
-					remainingContentLength: remainingContent.length,
-					toIdx
-				});
-				break;
-			}
 			const remainingSpan = pageIds[toIdx] - pageIds[currentFromIdx];
-			logger?.trace?.("Loop iteration", {
-				currentFromIdx,
-				currentPageId: pageIds[currentFromIdx],
-				iterationCount,
-				remainingContentLength: remainingContent.length,
-				remainingContentPreview: remainingContent.slice(0, 80),
-				remainingSpan,
-				toIdx,
-				toPageId: pageIds[toIdx]
-			});
 			const remainingHasExclusions = expandedBreakpoints.some((bp) => hasExcludedPageInRange(bp.excludeSet, pageIds, currentFromIdx, toIdx));
 			if (remainingSpan <= maxPages && !remainingHasExclusions) {
-				logger?.debug?.("Remaining span within limit, outputting final segment");
 				const finalSeg = createSegment(remainingContent, pageIds[currentFromIdx], currentFromIdx !== toIdx ? pageIds[toIdx] : void 0, isFirstPiece ? segment.meta : void 0);
 				if (finalSeg) result.push(finalSeg);
 				break;
 			}
-			const currentPageId = pageIds[currentFromIdx];
-			const maxWindowPageId = currentPageId + maxPages;
+			const maxWindowPageId = pageIds[currentFromIdx] + maxPages;
 			let windowEndIdx = currentFromIdx;
 			for (let i = currentFromIdx; i <= toIdx; i++) if (pageIds[i] <= maxWindowPageId) windowEndIdx = i;
 			else break;
-			logger?.trace?.("Window calculation", {
-				currentPageId,
-				maxWindowPageId,
-				windowEndIdx,
-				windowEndPageId: pageIds[windowEndIdx]
-			});
 			const windowHasExclusions = expandedBreakpoints.some((bp) => hasExcludedPageInRange(bp.excludeSet, pageIds, currentFromIdx, windowEndIdx));
 			let breakPosition = -1;
-			if (windowHasExclusions) {
-				logger?.trace?.("Window has exclusions, finding exclusion break position");
-				breakPosition = findExclusionBreakPosition(currentFromIdx, windowEndIdx, toIdx, pageIds, expandedBreakpoints, cumulativeOffsets);
-				logger?.trace?.("Exclusion break position", { breakPosition });
-			}
-			if (breakPosition <= 0) {
-				const breakpointCtx = {
-					cumulativeOffsets,
-					expandedBreakpoints,
-					normalizedPages,
-					pageIds,
-					prefer
-				};
-				logger?.trace?.("Finding break position using patterns...");
-				breakPosition = findBreakPosition(remainingContent, currentFromIdx, toIdx, windowEndIdx, breakpointCtx);
-				logger?.trace?.("Pattern break position", { breakPosition });
-			}
+			if (windowHasExclusions) breakPosition = findExclusionBreakPosition(currentFromIdx, windowEndIdx, toIdx, pageIds, expandedBreakpoints, cumulativeOffsets);
+			if (breakPosition <= 0) breakPosition = findBreakPosition(remainingContent, currentFromIdx, toIdx, windowEndIdx, {
+				cumulativeOffsets,
+				expandedBreakpoints,
+				normalizedPages,
+				pageIds,
+				prefer
+			});
 			if (breakPosition <= 0) {
-				logger?.debug?.("No pattern matched, falling back to page boundary");
 				if (windowEndIdx === currentFromIdx) {
-					logger?.trace?.("Single page window, outputting page and advancing");
 					const pageContent = cumulativeOffsets[currentFromIdx + 1] !== void 0 ? remainingContent.slice(0, cumulativeOffsets[currentFromIdx + 1] - cumulativeOffsets[currentFromIdx]) : remainingContent;
 					const pageSeg = createSegment(pageContent.trim(), pageIds[currentFromIdx], void 0, isFirstPiece ? segment.meta : void 0);
 					if (pageSeg) result.push(pageSeg);
 					remainingContent = remainingContent.slice(pageContent.length).trim();
 					currentFromIdx++;
 					isFirstPiece = false;
-					logger?.trace?.("After single page", {
-						currentFromIdx,
-						remainingContentLength: remainingContent.length
-					});
 					continue;
 				}
 				breakPosition = cumulativeOffsets[windowEndIdx + 1] - cumulativeOffsets[currentFromIdx];
-				logger?.trace?.("Multi-page window, using full window break position", { breakPosition });
 			}
 			const pieceContent = remainingContent.slice(0, breakPosition).trim();
-			logger?.trace?.("Piece extracted", {
-				breakPosition,
-				pieceContentLength: pieceContent.length,
-				pieceContentPreview: pieceContent.slice(0, 80)
-			});
 			const actualStartIdx = pieceContent ? findActualStartPage(pieceContent, currentFromIdx, toIdx, pageIds, normalizedPages) : currentFromIdx;
 			const actualEndIdx = pieceContent ? findActualEndPage(pieceContent, actualStartIdx, windowEndIdx, pageIds, normalizedPages) : currentFromIdx;
-			logger?.trace?.("Actual page indices", {
-				actualEndIdx,
-				actualStartIdx,
-				pieceHasContent: !!pieceContent
-			});
 			if (pieceContent) {
 				const pieceSeg = createSegment(pieceContent, pageIds[actualStartIdx], actualEndIdx > actualStartIdx ? pageIds[actualEndIdx] : void 0, isFirstPiece ? segment.meta : void 0);
-				if (pieceSeg) {
-					result.push(pieceSeg);
-					logger?.debug?.("Created segment", {
-						contentLength: pieceSeg.content.length,
-						from: pieceSeg.from,
-						to: pieceSeg.to
-					});
-				}
+				if (pieceSeg) result.push(pieceSeg);
 			}
-			const prevRemainingLength = remainingContent.length;
 			remainingContent = remainingContent.slice(breakPosition).trim();
-			logger?.trace?.("After slicing remainingContent", {
-				newLength: remainingContent.length,
-				prevLength: prevRemainingLength,
-				slicedAmount: breakPosition
-			});
-			if (!remainingContent) {
-				logger?.debug?.("No remaining content, breaking out of loop");
-				break;
-			}
 			let nextFromIdx = actualEndIdx;
-			if (actualEndIdx + 1 <= toIdx) {
+			if (remainingContent && actualEndIdx + 1 <= toIdx) {
 				const nextPageData = normalizedPages.get(pageIds[actualEndIdx + 1]);
 				if (nextPageData) {
 					const nextPrefix = nextPageData.content.slice(0, Math.min(30, nextPageData.length));
-					if (nextPrefix && remainingContent.startsWith(nextPrefix)) {
-						nextFromIdx = actualEndIdx + 1;
-						logger?.trace?.("Content starts with next page prefix", { advancingTo: nextFromIdx });
-					}
+					if (nextPrefix && remainingContent.startsWith(nextPrefix)) nextFromIdx = actualEndIdx + 1;
 				}
 			}
-			logger?.trace?.("End of iteration", {
-				nextFromIdx,
-				prevCurrentFromIdx: currentFromIdx,
-				willAdvance: nextFromIdx !== currentFromIdx
-			});
 			currentFromIdx = nextFromIdx;
 			isFirstPiece = false;
 		}
 	}
-	logger?.info?.("Breakpoint processing completed", { resultCount: result.length });
 	return result;
 };
 /**
@@ -1466,7 +1399,7 @@ const applyBreakpoints = (segments, pages, normalizedContent, maxPages, breakpoi
 * });
 */
 const segmentPages = (pages, options) => {
-	const { rules = [], maxPages, breakpoints, prefer = "longer", logger } = options;
+	const { rules = [], maxPages, breakpoints, prefer = "longer" } = options;
 	if (!pages.length) return [];
 	const { content: matchContent, normalizedPages: normalizedContent, pageMap } = buildPageMap(pages);
 	const splitPoints = [];
@@ -1504,7 +1437,7 @@ const segmentPages = (pages, options) => {
 		if (lastPage.id !== firstPage.id) initialSeg.to = lastPage.id;
 		if (initialSeg.content) segments = [initialSeg];
 	}
-	if (maxPages !== void 0 && maxPages >= 0 && breakpoints?.length) return applyBreakpoints(segments, pages, normalizedContent, maxPages, breakpoints, prefer, logger);
+	if (maxPages !== void 0 && maxPages >= 0 && breakpoints?.length) return applyBreakpoints(segments, pages, normalizedContent, maxPages, breakpoints, prefer);
 	return segments;
 };
 /**
@@ -1731,5 +1664,5 @@ const analyzeTextForRule = (text) => {
 };
 //#endregion
-export { TOKEN_PATTERNS, analyzeTextForRule, containsTokens, detectTokenPatterns, escapeRegex, expandTokens, expandTokensWithCaptures, generateTemplateFromText, getAvailableTokens, getTokenPattern, makeDiacriticInsensitive, normalizeLineEndings, segmentPages, stripHtmlTags, suggestPatternConfig, templateToRegex };
+export { TOKEN_PATTERNS, analyzeTextForRule, containsTokens, detectTokenPatterns, escapeRegex, escapeTemplateBrackets, expandTokens, expandTokensWithCaptures, generateTemplateFromText, getAvailableTokens, getTokenPattern, makeDiacriticInsensitive, normalizeLineEndings, segmentPages, stripHtmlTags, suggestPatternConfig, templateToRegex };
 //# sourceMappingURL=index.mjs.map