flappa-doormal 2.16.2 → 2.16.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1521,7 +1521,7 @@ const JOINER_PREFIX_LENGTHS = [
1521
1521
  8,
1522
1522
  6
1523
1523
  ];
1524
- const STOP_CHARACTERS = /[\s\n.,;!?؛،۔]/;
1524
+ const STOP_CHARACTERS = /[\s\n.,;!?؛،۔۝۞]/;
1525
1525
  /**
1526
1526
  * Maximum allowed deviation between expected and actual boundary positions (characters).
1527
1527
  * Matches outside this range are rejected unless `ignoreDeviation` is active.
@@ -1833,6 +1833,46 @@ const estimateStartOffsetInCurrentPage = (remainingContent, currentFromIdx, page
1833
1833
  }
1834
1834
  return 0;
1835
1835
  };
1836
+ const estimateStartOffsetInCurrentPageFromEnd = (remainingContent, currentFromIdx, pageIds, normalizedPages) => {
1837
+ const currentPageData = normalizedPages.get(pageIds[currentFromIdx]);
1838
+ if (!currentPageData) return 0;
1839
+ const remPrefix = remainingContent.slice(0, 500).trimStart();
1840
+ if (!remPrefix) return 0;
1841
+ const maxNeedleLen = Math.min(30, remPrefix.length);
1842
+ for (let len = maxNeedleLen; len >= 5; len -= 5) {
1843
+ const needle = remPrefix.slice(0, len);
1844
+ const idx = currentPageData.content.lastIndexOf(needle);
1845
+ if (idx >= 0) return idx;
1846
+ }
1847
+ if (remPrefix.length >= 3) {
1848
+ const needle = remPrefix.slice(0, 3);
1849
+ const idx = currentPageData.content.lastIndexOf(needle);
1850
+ if (idx >= 0) return idx;
1851
+ }
1852
+ return 0;
1853
+ };
1854
+ const selectStartOffsetInCurrentPage = (segmentContent, fromIdx, toIdx, pageIds, normalizedPages, cumulativeOffsets, logger) => {
1855
+ const first = estimateStartOffsetInCurrentPage(segmentContent, fromIdx, pageIds, normalizedPages);
1856
+ const last = estimateStartOffsetInCurrentPageFromEnd(segmentContent, fromIdx, pageIds, normalizedPages);
1857
+ const candidates = [...new Set([first, last])];
1858
+ if (candidates.length <= 1 || fromIdx + 1 > toIdx) return candidates[0] ?? 0;
1859
+ const rawBoundary = cumulativeOffsets[fromIdx + 1] !== void 0 && cumulativeOffsets[fromIdx] !== void 0 ? Math.max(0, cumulativeOffsets[fromIdx + 1] - cumulativeOffsets[fromIdx]) : void 0;
1860
+ if (rawBoundary === void 0) return candidates[0] ?? 0;
1861
+ let best = candidates[0] ?? 0;
1862
+ let bestScore = Number.POSITIVE_INFINITY;
1863
+ for (const candidate of candidates) {
1864
+ const expectedBoundary = Math.max(0, rawBoundary - candidate);
1865
+ const pos = findPageStartNearExpectedBoundary(segmentContent, fromIdx + 1, expectedBoundary, pageIds, normalizedPages, logger);
1866
+ if (pos > 0) {
1867
+ const score = Math.abs(pos - expectedBoundary);
1868
+ if (score < bestScore) {
1869
+ bestScore = score;
1870
+ best = candidate;
1871
+ }
1872
+ }
1873
+ }
1874
+ return best;
1875
+ };
1836
1876
  /**
1837
1877
  * Attempts to find the start position of a target page within remainingContent,
1838
1878
  * anchored near an expected boundary position to reduce collisions.
@@ -1898,6 +1938,22 @@ const selectBestAnchor = (candidates, expectedBoundary) => {
1898
1938
  return Math.abs(curr.pos - expectedBoundary) + (curr.isNewline ? 0 : NON_NEWLINE_PENALTY) < bestScore ? curr : best;
1899
1939
  });
1900
1940
  };
1941
+ /**
1942
+ * Finds the start position of a target page after a minimum position.
1943
+ * Used to avoid duplicate earlier matches when content repeats.
1944
+ */
1945
+ const findPageStartAfterPosition = (remainingContent, targetPageIdx, minPos, pageIds, normalizedPages) => {
1946
+ const targetPageData = normalizedPages.get(pageIds[targetPageIdx]);
1947
+ if (!targetPageData) return -1;
1948
+ const targetTrimmed = targetPageData.content.trimStart();
1949
+ for (const len of WINDOW_PREFIX_LENGTHS) {
1950
+ const prefix = targetTrimmed.slice(0, Math.min(len, targetTrimmed.length)).trim();
1951
+ if (!prefix) continue;
1952
+ const after = findAnchorCandidates(remainingContent, prefix, Math.max(0, minPos), remainingContent.length).filter((c) => c.pos > minPos);
1953
+ if (after.length > 0) return selectBestAnchor(after, minPos).pos;
1954
+ }
1955
+ return -1;
1956
+ };
1901
1957
  const buildBoundaryPositionsFastPath = (segmentContent, fromIdx, toIdx, pageCount, cumulativeOffsets, logger) => {
1902
1958
  const boundaryPositions = [0];
1903
1959
  logger?.debug?.("[breakpoints] Using fast-path for large segment in buildBoundaryPositions", {
@@ -1956,7 +2012,7 @@ const buildBoundaryPositionsAccurate = (segmentContent, fromIdx, toIdx, pageCoun
1956
2012
  pageCount,
1957
2013
  toIdx
1958
2014
  });
1959
- let startOffsetInFromPage = estimateStartOffsetInCurrentPage(segmentContent, fromIdx, pageIds, normalizedPages);
2015
+ let startOffsetInFromPage = selectStartOffsetInCurrentPage(segmentContent, fromIdx, toIdx, pageIds, normalizedPages, cumulativeOffsets, logger);
1960
2016
  let didInferStartOffset = false;
1961
2017
  for (let i = fromIdx + 1; i <= toIdx; i++) {
1962
2018
  const rawBoundary = cumulativeOffsets[i] !== void 0 && cumulativeOffsets[fromIdx] !== void 0 ? Math.max(0, cumulativeOffsets[i] - cumulativeOffsets[fromIdx]) : void 0;
@@ -1964,7 +2020,12 @@ const buildBoundaryPositionsAccurate = (segmentContent, fromIdx, toIdx, pageCoun
1964
2020
  startOffsetInFromPage = resolved.startOffsetInFromPage;
1965
2021
  didInferStartOffset = didInferStartOffset || resolved.didInferStartOffset;
1966
2022
  const prevBoundary = boundaryPositions[boundaryPositions.length - 1];
1967
- if (isBoundaryPositionValid(resolved.pos, prevBoundary, resolved.expectedBoundary, segmentContent.length)) boundaryPositions.push(resolved.pos);
2023
+ let resolvedPos = resolved.pos;
2024
+ if (resolvedPos <= prevBoundary) {
2025
+ const afterPos = findPageStartAfterPosition(segmentContent, i, prevBoundary + 1, pageIds, normalizedPages);
2026
+ if (afterPos > prevBoundary) resolvedPos = afterPos;
2027
+ }
2028
+ if (isBoundaryPositionValid(resolvedPos, prevBoundary, resolved.expectedBoundary, segmentContent.length)) boundaryPositions.push(resolvedPos);
1968
2029
  else {
1969
2030
  const estimate = Math.max(prevBoundary + 1, resolved.expectedBoundary);
1970
2031
  boundaryPositions.push(Math.min(estimate, segmentContent.length));
@@ -2001,7 +2062,8 @@ const buildBoundaryPositionsAccurate = (segmentContent, fromIdx, toIdx, pageCoun
2001
2062
  */
2002
2063
  const buildBoundaryPositions = (segmentContent, fromIdx, toIdx, pageIds, normalizedPages, cumulativeOffsets, logger) => {
2003
2064
  const pageCount = toIdx - fromIdx + 1;
2004
- if (pageCount >= FAST_PATH_THRESHOLD) return buildBoundaryPositionsFastPath(segmentContent, fromIdx, toIdx, pageCount, cumulativeOffsets, logger);
2065
+ const expectedLength = (cumulativeOffsets[toIdx + 1] ?? 0) - (cumulativeOffsets[fromIdx] ?? 0);
2066
+ if (pageCount >= FAST_PATH_THRESHOLD && segmentContent.length === expectedLength) return buildBoundaryPositionsFastPath(segmentContent, fromIdx, toIdx, pageCount, cumulativeOffsets, logger);
2005
2067
  return buildBoundaryPositionsAccurate(segmentContent, fromIdx, toIdx, pageCount, pageIds, normalizedPages, cumulativeOffsets, logger);
2006
2068
  };
2007
2069
  /**