flappa-doormal 2.16.2 → 2.16.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.mjs +66 -4
- package/dist/index.mjs.map +1 -1
- package/package.json +2 -2
package/dist/index.mjs
CHANGED
|
@@ -1521,7 +1521,7 @@ const JOINER_PREFIX_LENGTHS = [
|
|
|
1521
1521
|
8,
|
|
1522
1522
|
6
|
|
1523
1523
|
];
|
|
1524
|
-
const STOP_CHARACTERS = /[\s\n
|
|
1524
|
+
const STOP_CHARACTERS = /[\s\n.,;!?؛،۔۞]/;
|
|
1525
1525
|
/**
|
|
1526
1526
|
* Maximum allowed deviation between expected and actual boundary positions (characters).
|
|
1527
1527
|
* Matches outside this range are rejected unless `ignoreDeviation` is active.
|
|
@@ -1833,6 +1833,46 @@ const estimateStartOffsetInCurrentPage = (remainingContent, currentFromIdx, page
|
|
|
1833
1833
|
}
|
|
1834
1834
|
return 0;
|
|
1835
1835
|
};
|
|
1836
|
+
const estimateStartOffsetInCurrentPageFromEnd = (remainingContent, currentFromIdx, pageIds, normalizedPages) => {
|
|
1837
|
+
const currentPageData = normalizedPages.get(pageIds[currentFromIdx]);
|
|
1838
|
+
if (!currentPageData) return 0;
|
|
1839
|
+
const remPrefix = remainingContent.slice(0, 500).trimStart();
|
|
1840
|
+
if (!remPrefix) return 0;
|
|
1841
|
+
const maxNeedleLen = Math.min(30, remPrefix.length);
|
|
1842
|
+
for (let len = maxNeedleLen; len >= 5; len -= 5) {
|
|
1843
|
+
const needle = remPrefix.slice(0, len);
|
|
1844
|
+
const idx = currentPageData.content.lastIndexOf(needle);
|
|
1845
|
+
if (idx >= 0) return idx;
|
|
1846
|
+
}
|
|
1847
|
+
if (remPrefix.length >= 3) {
|
|
1848
|
+
const needle = remPrefix.slice(0, 3);
|
|
1849
|
+
const idx = currentPageData.content.lastIndexOf(needle);
|
|
1850
|
+
if (idx >= 0) return idx;
|
|
1851
|
+
}
|
|
1852
|
+
return 0;
|
|
1853
|
+
};
|
|
1854
|
+
const selectStartOffsetInCurrentPage = (segmentContent, fromIdx, toIdx, pageIds, normalizedPages, cumulativeOffsets, logger) => {
|
|
1855
|
+
const first = estimateStartOffsetInCurrentPage(segmentContent, fromIdx, pageIds, normalizedPages);
|
|
1856
|
+
const last = estimateStartOffsetInCurrentPageFromEnd(segmentContent, fromIdx, pageIds, normalizedPages);
|
|
1857
|
+
const candidates = [...new Set([first, last])];
|
|
1858
|
+
if (candidates.length <= 1 || fromIdx + 1 > toIdx) return candidates[0] ?? 0;
|
|
1859
|
+
const rawBoundary = cumulativeOffsets[fromIdx + 1] !== void 0 && cumulativeOffsets[fromIdx] !== void 0 ? Math.max(0, cumulativeOffsets[fromIdx + 1] - cumulativeOffsets[fromIdx]) : void 0;
|
|
1860
|
+
if (rawBoundary === void 0) return candidates[0] ?? 0;
|
|
1861
|
+
let best = candidates[0] ?? 0;
|
|
1862
|
+
let bestScore = Number.POSITIVE_INFINITY;
|
|
1863
|
+
for (const candidate of candidates) {
|
|
1864
|
+
const expectedBoundary = Math.max(0, rawBoundary - candidate);
|
|
1865
|
+
const pos = findPageStartNearExpectedBoundary(segmentContent, fromIdx + 1, expectedBoundary, pageIds, normalizedPages, logger);
|
|
1866
|
+
if (pos > 0) {
|
|
1867
|
+
const score = Math.abs(pos - expectedBoundary);
|
|
1868
|
+
if (score < bestScore) {
|
|
1869
|
+
bestScore = score;
|
|
1870
|
+
best = candidate;
|
|
1871
|
+
}
|
|
1872
|
+
}
|
|
1873
|
+
}
|
|
1874
|
+
return best;
|
|
1875
|
+
};
|
|
1836
1876
|
/**
|
|
1837
1877
|
* Attempts to find the start position of a target page within remainingContent,
|
|
1838
1878
|
* anchored near an expected boundary position to reduce collisions.
|
|
@@ -1898,6 +1938,22 @@ const selectBestAnchor = (candidates, expectedBoundary) => {
|
|
|
1898
1938
|
return Math.abs(curr.pos - expectedBoundary) + (curr.isNewline ? 0 : NON_NEWLINE_PENALTY) < bestScore ? curr : best;
|
|
1899
1939
|
});
|
|
1900
1940
|
};
|
|
1941
|
+
/**
|
|
1942
|
+
* Finds the start position of a target page after a minimum position.
|
|
1943
|
+
* Used to avoid duplicate earlier matches when content repeats.
|
|
1944
|
+
*/
|
|
1945
|
+
const findPageStartAfterPosition = (remainingContent, targetPageIdx, minPos, pageIds, normalizedPages) => {
|
|
1946
|
+
const targetPageData = normalizedPages.get(pageIds[targetPageIdx]);
|
|
1947
|
+
if (!targetPageData) return -1;
|
|
1948
|
+
const targetTrimmed = targetPageData.content.trimStart();
|
|
1949
|
+
for (const len of WINDOW_PREFIX_LENGTHS) {
|
|
1950
|
+
const prefix = targetTrimmed.slice(0, Math.min(len, targetTrimmed.length)).trim();
|
|
1951
|
+
if (!prefix) continue;
|
|
1952
|
+
const after = findAnchorCandidates(remainingContent, prefix, Math.max(0, minPos), remainingContent.length).filter((c) => c.pos > minPos);
|
|
1953
|
+
if (after.length > 0) return selectBestAnchor(after, minPos).pos;
|
|
1954
|
+
}
|
|
1955
|
+
return -1;
|
|
1956
|
+
};
|
|
1901
1957
|
const buildBoundaryPositionsFastPath = (segmentContent, fromIdx, toIdx, pageCount, cumulativeOffsets, logger) => {
|
|
1902
1958
|
const boundaryPositions = [0];
|
|
1903
1959
|
logger?.debug?.("[breakpoints] Using fast-path for large segment in buildBoundaryPositions", {
|
|
@@ -1956,7 +2012,7 @@ const buildBoundaryPositionsAccurate = (segmentContent, fromIdx, toIdx, pageCoun
|
|
|
1956
2012
|
pageCount,
|
|
1957
2013
|
toIdx
|
|
1958
2014
|
});
|
|
1959
|
-
let startOffsetInFromPage =
|
|
2015
|
+
let startOffsetInFromPage = selectStartOffsetInCurrentPage(segmentContent, fromIdx, toIdx, pageIds, normalizedPages, cumulativeOffsets, logger);
|
|
1960
2016
|
let didInferStartOffset = false;
|
|
1961
2017
|
for (let i = fromIdx + 1; i <= toIdx; i++) {
|
|
1962
2018
|
const rawBoundary = cumulativeOffsets[i] !== void 0 && cumulativeOffsets[fromIdx] !== void 0 ? Math.max(0, cumulativeOffsets[i] - cumulativeOffsets[fromIdx]) : void 0;
|
|
@@ -1964,7 +2020,12 @@ const buildBoundaryPositionsAccurate = (segmentContent, fromIdx, toIdx, pageCoun
|
|
|
1964
2020
|
startOffsetInFromPage = resolved.startOffsetInFromPage;
|
|
1965
2021
|
didInferStartOffset = didInferStartOffset || resolved.didInferStartOffset;
|
|
1966
2022
|
const prevBoundary = boundaryPositions[boundaryPositions.length - 1];
|
|
1967
|
-
|
|
2023
|
+
let resolvedPos = resolved.pos;
|
|
2024
|
+
if (resolvedPos <= prevBoundary) {
|
|
2025
|
+
const afterPos = findPageStartAfterPosition(segmentContent, i, prevBoundary + 1, pageIds, normalizedPages);
|
|
2026
|
+
if (afterPos > prevBoundary) resolvedPos = afterPos;
|
|
2027
|
+
}
|
|
2028
|
+
if (isBoundaryPositionValid(resolvedPos, prevBoundary, resolved.expectedBoundary, segmentContent.length)) boundaryPositions.push(resolvedPos);
|
|
1968
2029
|
else {
|
|
1969
2030
|
const estimate = Math.max(prevBoundary + 1, resolved.expectedBoundary);
|
|
1970
2031
|
boundaryPositions.push(Math.min(estimate, segmentContent.length));
|
|
@@ -2001,7 +2062,8 @@ const buildBoundaryPositionsAccurate = (segmentContent, fromIdx, toIdx, pageCoun
|
|
|
2001
2062
|
*/
|
|
2002
2063
|
const buildBoundaryPositions = (segmentContent, fromIdx, toIdx, pageIds, normalizedPages, cumulativeOffsets, logger) => {
|
|
2003
2064
|
const pageCount = toIdx - fromIdx + 1;
|
|
2004
|
-
|
|
2065
|
+
const expectedLength = (cumulativeOffsets[toIdx + 1] ?? 0) - (cumulativeOffsets[fromIdx] ?? 0);
|
|
2066
|
+
if (pageCount >= FAST_PATH_THRESHOLD && segmentContent.length === expectedLength) return buildBoundaryPositionsFastPath(segmentContent, fromIdx, toIdx, pageCount, cumulativeOffsets, logger);
|
|
2005
2067
|
return buildBoundaryPositionsAccurate(segmentContent, fromIdx, toIdx, pageCount, pageIds, normalizedPages, cumulativeOffsets, logger);
|
|
2006
2068
|
};
|
|
2007
2069
|
/**
|