aurochs 0.6.3 → 0.6.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_shared/{pdf-parser-Ciztl2kx.js → pdf-parser-NK9_pSDg.js} +407 -9
- package/dist/_shared/pdf-parser-NK9_pSDg.js.map +1 -0
- package/dist/cli.js +8 -3
- package/dist/pdf/builder/index.js +4 -397
- package/dist/pdf/builder/index.js.map +1 -1
- package/dist/pdf/parser/index.js +1 -1
- package/package.json +1 -1
- package/dist/_shared/pdf-parser-Ciztl2kx.js.map +0 -1
|
@@ -456,7 +456,7 @@ function buildDiagnostics(args) {
|
|
|
456
456
|
if (decoded.includes("�")) {
|
|
457
457
|
replacementCharMapCount += 1;
|
|
458
458
|
}
|
|
459
|
-
if (containsPrivateUseCharacter(decoded)) {
|
|
459
|
+
if (containsPrivateUseCharacter$1(decoded)) {
|
|
460
460
|
privateUseCharMapCount += 1;
|
|
461
461
|
}
|
|
462
462
|
}
|
|
@@ -469,7 +469,7 @@ function buildDiagnostics(args) {
|
|
|
469
469
|
sourceCodeLengthHistogram: histogram
|
|
470
470
|
};
|
|
471
471
|
}
|
|
472
|
-
function containsPrivateUseCharacter(text) {
|
|
472
|
+
function containsPrivateUseCharacter$1(text) {
|
|
473
473
|
return Array.from(text).some((char) => {
|
|
474
474
|
const codePoint = char.codePointAt(0);
|
|
475
475
|
if (codePoint === void 0) {
|
|
@@ -1728,6 +1728,401 @@ function decodeCIDFallback(cid, ordering) {
|
|
|
1728
1728
|
}
|
|
1729
1729
|
return null;
|
|
1730
1730
|
}
|
|
1731
|
+
const BYTE_TO_HEX = Array.from(
|
|
1732
|
+
{ length: 256 },
|
|
1733
|
+
(_, value) => value.toString(16).padStart(2, "0").toUpperCase()
|
|
1734
|
+
);
|
|
1735
|
+
function bytesToString(bytes) {
|
|
1736
|
+
return String.fromCharCode(...bytes);
|
|
1737
|
+
}
|
|
1738
|
+
function rawTextToBytes$1(rawText) {
|
|
1739
|
+
const bytes = new Uint8Array(rawText.length);
|
|
1740
|
+
for (let i2 = 0; i2 < rawText.length; i2++) {
|
|
1741
|
+
bytes[i2] = rawText.charCodeAt(i2) & 255;
|
|
1742
|
+
}
|
|
1743
|
+
return bytes;
|
|
1744
|
+
}
|
|
1745
|
+
function byteSliceToHex(bytes, start, length) {
|
|
1746
|
+
const parts = new Array(length);
|
|
1747
|
+
for (let i2 = 0; i2 < length; i2++) {
|
|
1748
|
+
parts[i2] = BYTE_TO_HEX[bytes[start + i2] ?? 0];
|
|
1749
|
+
}
|
|
1750
|
+
return parts.join("");
|
|
1751
|
+
}
|
|
1752
|
+
function resolveSourceCodeByteLengths(byteMapping, preferred) {
|
|
1753
|
+
if (preferred && preferred.length > 0) {
|
|
1754
|
+
const normalized = [...new Set(preferred.filter((length) => Number.isInteger(length) && length > 0))].sort((a, b) => b - a);
|
|
1755
|
+
if (normalized.length > 0) {
|
|
1756
|
+
return normalized;
|
|
1757
|
+
}
|
|
1758
|
+
}
|
|
1759
|
+
const inferred = /* @__PURE__ */ new Set();
|
|
1760
|
+
for (const sourceHex of byteMapping.keys()) {
|
|
1761
|
+
inferred.add(sourceHex.length / 2);
|
|
1762
|
+
}
|
|
1763
|
+
return [...inferred].filter((length) => Number.isInteger(length) && length > 0).sort((a, b) => b - a);
|
|
1764
|
+
}
|
|
1765
|
+
function containsPrivateUseCharacter(text) {
|
|
1766
|
+
return Array.from(text).some((char) => {
|
|
1767
|
+
const codePoint = char.codePointAt(0);
|
|
1768
|
+
if (codePoint === void 0) {
|
|
1769
|
+
return false;
|
|
1770
|
+
}
|
|
1771
|
+
return codePoint >= 57344 && codePoint <= 63743 || codePoint >= 983040 && codePoint <= 1048573 || codePoint >= 1048576 && codePoint <= 1114109;
|
|
1772
|
+
});
|
|
1773
|
+
}
|
|
1774
|
+
function decodeCidFallback(args) {
|
|
1775
|
+
const { code, ordering, cidCodeToUnicodeFallbackMap } = args;
|
|
1776
|
+
const glyphFallback = cidCodeToUnicodeFallbackMap?.get(code);
|
|
1777
|
+
if (glyphFallback !== void 0) {
|
|
1778
|
+
return glyphFallback;
|
|
1779
|
+
}
|
|
1780
|
+
return decodeCIDFallback(code, ordering ?? null);
|
|
1781
|
+
}
|
|
1782
|
+
function resolveCidFallbackOrdering(fontInfo) {
|
|
1783
|
+
const ordering = fontInfo.ordering;
|
|
1784
|
+
if (ordering && ordering !== "Identity") {
|
|
1785
|
+
return ordering;
|
|
1786
|
+
}
|
|
1787
|
+
return void 0;
|
|
1788
|
+
}
|
|
1789
|
+
function isToUnicodeSeverelyCorrupted(fontInfo) {
|
|
1790
|
+
const byteMapping = fontInfo.toUnicodeByteMapping;
|
|
1791
|
+
const diagnostics = fontInfo.toUnicodeDiagnostics;
|
|
1792
|
+
if (!byteMapping || byteMapping.size === 0 || !diagnostics) {
|
|
1793
|
+
return false;
|
|
1794
|
+
}
|
|
1795
|
+
const total = byteMapping.size;
|
|
1796
|
+
const replacementRatio = diagnostics.replacementCharMapCount / total;
|
|
1797
|
+
const privateUseRatio = diagnostics.privateUseCharMapCount / total;
|
|
1798
|
+
return replacementRatio >= 0.5 || privateUseRatio >= 0.5;
|
|
1799
|
+
}
|
|
1800
|
+
function scoreCharacter(c) {
|
|
1801
|
+
if (c === 9 || c === 10 || c === 13) {
|
|
1802
|
+
return 1;
|
|
1803
|
+
}
|
|
1804
|
+
if (c === 32) {
|
|
1805
|
+
return 2;
|
|
1806
|
+
}
|
|
1807
|
+
if (c < 32 || c === 127) {
|
|
1808
|
+
return -3;
|
|
1809
|
+
}
|
|
1810
|
+
if (c >= 48 && c <= 57) {
|
|
1811
|
+
return 1;
|
|
1812
|
+
}
|
|
1813
|
+
if (c >= 65 && c <= 90 || c >= 97 && c <= 122) {
|
|
1814
|
+
return 1;
|
|
1815
|
+
}
|
|
1816
|
+
if (`=()[]{}.,;:'"_-+/\\<>`.includes(String.fromCharCode(c))) {
|
|
1817
|
+
return 1;
|
|
1818
|
+
}
|
|
1819
|
+
return 0;
|
|
1820
|
+
}
|
|
1821
|
+
function scoreAsciiQuality(s) {
|
|
1822
|
+
if (s.length === 0) {
|
|
1823
|
+
return 0;
|
|
1824
|
+
}
|
|
1825
|
+
const totalScore = Array.from(s).reduce((acc, char) => acc + scoreCharacter(char.charCodeAt(0)), 0);
|
|
1826
|
+
return totalScore / s.length;
|
|
1827
|
+
}
|
|
1828
|
+
function maybeNormalizeSingleByteRawText(rawText) {
|
|
1829
|
+
if (!rawText.includes("\0")) {
|
|
1830
|
+
return rawText;
|
|
1831
|
+
}
|
|
1832
|
+
const bytes = new Array(rawText.length);
|
|
1833
|
+
for (let i2 = 0; i2 < rawText.length; i2++) {
|
|
1834
|
+
bytes[i2] = rawText.charCodeAt(i2) & 255;
|
|
1835
|
+
}
|
|
1836
|
+
const candidates = [];
|
|
1837
|
+
candidates.push(bytesToString(bytes.filter((b) => b !== 0)));
|
|
1838
|
+
if (bytes.length >= 6 && bytes.length % 2 === 0) {
|
|
1839
|
+
const pairs = bytes.length / 2;
|
|
1840
|
+
const bytePairs = Array.from({ length: pairs }, (_, i2) => ({
|
|
1841
|
+
hi: bytes[i2 * 2],
|
|
1842
|
+
lo: bytes[i2 * 2 + 1]
|
|
1843
|
+
}));
|
|
1844
|
+
const hiNearZero = bytePairs.filter(({ hi }) => hi <= 1).length;
|
|
1845
|
+
const loAscii = bytePairs.filter(({ lo }) => lo >= 3 && lo <= 126).length;
|
|
1846
|
+
if (hiNearZero / pairs >= 0.7 && loAscii / pairs >= 0.7) {
|
|
1847
|
+
const lows = bytePairs.map(({ lo }) => lo);
|
|
1848
|
+
candidates.push(bytesToString(lows));
|
|
1849
|
+
}
|
|
1850
|
+
}
|
|
1851
|
+
const shifted = candidates.map((c) => {
|
|
1852
|
+
const b2 = Array.from(c).map((char) => {
|
|
1853
|
+
const b = char.charCodeAt(0) & 255;
|
|
1854
|
+
return b >= 3 ? b - 3 : 0;
|
|
1855
|
+
});
|
|
1856
|
+
return bytesToString(b2);
|
|
1857
|
+
});
|
|
1858
|
+
const sanitizeXmlText = (s) => {
|
|
1859
|
+
return Array.from(s).filter((char) => char.charCodeAt(0) !== 0).map((char) => {
|
|
1860
|
+
const code = char.charCodeAt(0);
|
|
1861
|
+
const isForbidden = code >= 1 && code <= 8 || code === 11 || code === 12 || code >= 14 && code <= 31;
|
|
1862
|
+
return isForbidden ? " " : char;
|
|
1863
|
+
}).join("");
|
|
1864
|
+
};
|
|
1865
|
+
const all = [...candidates, ...shifted].map((s) => sanitizeXmlText(s));
|
|
1866
|
+
const sanitizedRaw = sanitizeXmlText(rawText);
|
|
1867
|
+
const initial = { best: sanitizedRaw, score: scoreAsciiQuality(sanitizedRaw) };
|
|
1868
|
+
const result = all.reduce((acc, s) => {
|
|
1869
|
+
const score = scoreAsciiQuality(s);
|
|
1870
|
+
return score > acc.score + 0.1 ? { best: s, score } : acc;
|
|
1871
|
+
}, initial);
|
|
1872
|
+
return result.best;
|
|
1873
|
+
}
|
|
1874
|
+
function sanitizeDecodedText(decoded) {
|
|
1875
|
+
return Array.from(decoded).filter((char) => char.charCodeAt(0) !== 0).map((char) => {
|
|
1876
|
+
const code = char.charCodeAt(0);
|
|
1877
|
+
const isKeepWhitespace = code === 9 || code === 10 || code === 13;
|
|
1878
|
+
const isForbiddenControl = code >= 1 && code <= 8 || code === 11 || code === 12 || code >= 14 && code <= 31 || code === 127;
|
|
1879
|
+
return isForbiddenControl && !isKeepWhitespace ? " " : char;
|
|
1880
|
+
}).join("");
|
|
1881
|
+
}
|
|
1882
|
+
function findFontInfo(fontName, mappings) {
|
|
1883
|
+
const cleanName = fontName.startsWith("/") ? fontName.slice(1) : fontName;
|
|
1884
|
+
const exactMatch = mappings.get(cleanName);
|
|
1885
|
+
if (exactMatch) {
|
|
1886
|
+
return exactMatch;
|
|
1887
|
+
}
|
|
1888
|
+
const plusIndex = cleanName.indexOf("+");
|
|
1889
|
+
if (plusIndex > 0) {
|
|
1890
|
+
const baseName = cleanName.slice(plusIndex + 1);
|
|
1891
|
+
const baseMatch = mappings.get(baseName);
|
|
1892
|
+
if (baseMatch) {
|
|
1893
|
+
return baseMatch;
|
|
1894
|
+
}
|
|
1895
|
+
}
|
|
1896
|
+
for (const [key, value] of mappings.entries()) {
|
|
1897
|
+
if (cleanName.includes(key) || key.includes(cleanName)) {
|
|
1898
|
+
return value;
|
|
1899
|
+
}
|
|
1900
|
+
}
|
|
1901
|
+
return void 0;
|
|
1902
|
+
}
|
|
1903
|
+
function decodeText(rawText, fontName, mappings) {
|
|
1904
|
+
const fontInfo = findFontInfo(fontName, mappings);
|
|
1905
|
+
if (!fontInfo) {
|
|
1906
|
+
return sanitizeDecodedText(rawText);
|
|
1907
|
+
}
|
|
1908
|
+
return decodeTextWithFontInfo(rawText, fontInfo);
|
|
1909
|
+
}
|
|
1910
|
+
function decodeTextWithFontInfo(rawText, fontInfo) {
|
|
1911
|
+
const {
|
|
1912
|
+
mapping,
|
|
1913
|
+
codeByteWidth,
|
|
1914
|
+
encodingMap,
|
|
1915
|
+
toUnicodeByteMapping,
|
|
1916
|
+
toUnicodeSourceCodeByteLengths,
|
|
1917
|
+
cidCodeToUnicodeFallbackMap
|
|
1918
|
+
} = fontInfo;
|
|
1919
|
+
const cidFallbackOrdering = resolveCidFallbackOrdering(fontInfo);
|
|
1920
|
+
const severeCidMode = isToUnicodeSeverelyCorrupted(fontInfo) && codeByteWidth === 2;
|
|
1921
|
+
if (toUnicodeByteMapping && toUnicodeByteMapping.size > 0) {
|
|
1922
|
+
return sanitizeDecodedText(
|
|
1923
|
+
decodeByToUnicodeByteMapping({
|
|
1924
|
+
rawText,
|
|
1925
|
+
byteMapping: toUnicodeByteMapping,
|
|
1926
|
+
sourceCodeByteLengths: toUnicodeSourceCodeByteLengths,
|
|
1927
|
+
legacyMapping: mapping,
|
|
1928
|
+
codeByteWidth,
|
|
1929
|
+
ordering: cidFallbackOrdering,
|
|
1930
|
+
cidCodeToUnicodeFallbackMap,
|
|
1931
|
+
treatReplacementAsMissing: severeCidMode,
|
|
1932
|
+
treatPrivateUseAsMissing: severeCidMode,
|
|
1933
|
+
allowCidFallbackOnBadToUnicode: severeCidMode,
|
|
1934
|
+
allowCidFallbackOnMiss: severeCidMode
|
|
1935
|
+
})
|
|
1936
|
+
);
|
|
1937
|
+
}
|
|
1938
|
+
if (codeByteWidth === 2) {
|
|
1939
|
+
return sanitizeDecodedText(
|
|
1940
|
+
decodeTwoByteText({
|
|
1941
|
+
rawText,
|
|
1942
|
+
mapping,
|
|
1943
|
+
ordering: cidFallbackOrdering,
|
|
1944
|
+
cidCodeToUnicodeFallbackMap,
|
|
1945
|
+
allowCidFallback: mapping.size === 0
|
|
1946
|
+
})
|
|
1947
|
+
);
|
|
1948
|
+
}
|
|
1949
|
+
if (mapping.size > 0) {
|
|
1950
|
+
return sanitizeDecodedText(decodeSingleByteTextWithFallback(rawText, mapping, encodingMap));
|
|
1951
|
+
}
|
|
1952
|
+
if (encodingMap && encodingMap.size > 0) {
|
|
1953
|
+
const normalized = maybeNormalizeSingleByteRawText(rawText);
|
|
1954
|
+
const mutableMap = new Map(encodingMap);
|
|
1955
|
+
return sanitizeDecodedText(decodeSingleByteText(normalized, mutableMap));
|
|
1956
|
+
}
|
|
1957
|
+
return sanitizeDecodedText(rawText);
|
|
1958
|
+
}
|
|
1959
|
+
function decodeByToUnicodeByteMapping(args) {
|
|
1960
|
+
const {
|
|
1961
|
+
rawText,
|
|
1962
|
+
byteMapping,
|
|
1963
|
+
sourceCodeByteLengths,
|
|
1964
|
+
legacyMapping,
|
|
1965
|
+
codeByteWidth,
|
|
1966
|
+
ordering,
|
|
1967
|
+
cidCodeToUnicodeFallbackMap,
|
|
1968
|
+
treatReplacementAsMissing = false,
|
|
1969
|
+
treatPrivateUseAsMissing = false,
|
|
1970
|
+
allowCidFallbackOnBadToUnicode = false,
|
|
1971
|
+
allowCidFallbackOnMiss = false
|
|
1972
|
+
} = args;
|
|
1973
|
+
const bytes = rawTextToBytes$1(rawText);
|
|
1974
|
+
const byteLengths = resolveSourceCodeByteLengths(byteMapping, sourceCodeByteLengths);
|
|
1975
|
+
if (byteLengths.length === 0) {
|
|
1976
|
+
return rawText;
|
|
1977
|
+
}
|
|
1978
|
+
const minByteLength = byteLengths[byteLengths.length - 1] ?? 1;
|
|
1979
|
+
const missAdvance = minByteLength;
|
|
1980
|
+
const chars = [];
|
|
1981
|
+
for (let i2 = 0; i2 < bytes.length; ) {
|
|
1982
|
+
const matchState = { mapped: void 0, consumed: 0 };
|
|
1983
|
+
for (const byteLength of byteLengths) {
|
|
1984
|
+
if (i2 + byteLength > bytes.length) {
|
|
1985
|
+
continue;
|
|
1986
|
+
}
|
|
1987
|
+
const sourceHex = byteSliceToHex(bytes, i2, byteLength);
|
|
1988
|
+
const hit = byteMapping.get(sourceHex);
|
|
1989
|
+
if (hit !== void 0) {
|
|
1990
|
+
matchState.mapped = hit;
|
|
1991
|
+
matchState.consumed = byteLength;
|
|
1992
|
+
break;
|
|
1993
|
+
}
|
|
1994
|
+
}
|
|
1995
|
+
if (matchState.mapped !== void 0 && matchState.consumed > 0) {
|
|
1996
|
+
const shouldDiscardReplacement = treatReplacementAsMissing && matchState.mapped.includes("�");
|
|
1997
|
+
const shouldDiscardPrivateUse = treatPrivateUseAsMissing && containsPrivateUseCharacter(matchState.mapped);
|
|
1998
|
+
if (!shouldDiscardReplacement && !shouldDiscardPrivateUse) {
|
|
1999
|
+
chars.push(matchState.mapped);
|
|
2000
|
+
i2 += matchState.consumed;
|
|
2001
|
+
continue;
|
|
2002
|
+
}
|
|
2003
|
+
if (codeByteWidth === 2 && matchState.consumed === 2) {
|
|
2004
|
+
const code = bytes[i2] << 8 | bytes[i2 + 1];
|
|
2005
|
+
if (allowCidFallbackOnBadToUnicode) {
|
|
2006
|
+
const cidFallback = decodeCidFallback({
|
|
2007
|
+
code,
|
|
2008
|
+
ordering,
|
|
2009
|
+
cidCodeToUnicodeFallbackMap
|
|
2010
|
+
});
|
|
2011
|
+
if (cidFallback) {
|
|
2012
|
+
chars.push(cidFallback);
|
|
2013
|
+
i2 += 2;
|
|
2014
|
+
continue;
|
|
2015
|
+
}
|
|
2016
|
+
}
|
|
2017
|
+
}
|
|
2018
|
+
chars.push("�");
|
|
2019
|
+
i2 += matchState.consumed;
|
|
2020
|
+
continue;
|
|
2021
|
+
}
|
|
2022
|
+
if (codeByteWidth === 2 && i2 + 1 < bytes.length) {
|
|
2023
|
+
const code = bytes[i2] << 8 | bytes[i2 + 1];
|
|
2024
|
+
const legacyMapped = legacyMapping.get(code);
|
|
2025
|
+
if (legacyMapped !== void 0) {
|
|
2026
|
+
const shouldDiscardReplacement = treatReplacementAsMissing && legacyMapped.includes("�");
|
|
2027
|
+
const shouldDiscardPrivateUse = treatPrivateUseAsMissing && containsPrivateUseCharacter(legacyMapped);
|
|
2028
|
+
if (!shouldDiscardReplacement && !shouldDiscardPrivateUse) {
|
|
2029
|
+
chars.push(legacyMapped);
|
|
2030
|
+
i2 += 2;
|
|
2031
|
+
continue;
|
|
2032
|
+
}
|
|
2033
|
+
if (allowCidFallbackOnBadToUnicode) {
|
|
2034
|
+
const cidFallback = decodeCidFallback({
|
|
2035
|
+
code,
|
|
2036
|
+
ordering,
|
|
2037
|
+
cidCodeToUnicodeFallbackMap
|
|
2038
|
+
});
|
|
2039
|
+
if (cidFallback) {
|
|
2040
|
+
chars.push(cidFallback);
|
|
2041
|
+
i2 += 2;
|
|
2042
|
+
continue;
|
|
2043
|
+
}
|
|
2044
|
+
}
|
|
2045
|
+
}
|
|
2046
|
+
if (allowCidFallbackOnMiss || byteMapping.size === 0 && legacyMapping.size === 0) {
|
|
2047
|
+
const cidFallback = decodeCidFallback({
|
|
2048
|
+
code,
|
|
2049
|
+
ordering,
|
|
2050
|
+
cidCodeToUnicodeFallbackMap
|
|
2051
|
+
});
|
|
2052
|
+
if (cidFallback) {
|
|
2053
|
+
chars.push(cidFallback);
|
|
2054
|
+
i2 += 2;
|
|
2055
|
+
continue;
|
|
2056
|
+
}
|
|
2057
|
+
}
|
|
2058
|
+
}
|
|
2059
|
+
if (codeByteWidth === 1) {
|
|
2060
|
+
const legacyMapped = legacyMapping.get(bytes[i2]);
|
|
2061
|
+
if (legacyMapped !== void 0) {
|
|
2062
|
+
chars.push(legacyMapped);
|
|
2063
|
+
i2 += 1;
|
|
2064
|
+
continue;
|
|
2065
|
+
}
|
|
2066
|
+
}
|
|
2067
|
+
chars.push("�");
|
|
2068
|
+
i2 += Math.min(Math.max(missAdvance, 1), bytes.length - i2);
|
|
2069
|
+
}
|
|
2070
|
+
return chars.join("");
|
|
2071
|
+
}
|
|
2072
|
+
function decodeTwoByteText(args) {
|
|
2073
|
+
const {
|
|
2074
|
+
rawText,
|
|
2075
|
+
mapping,
|
|
2076
|
+
ordering,
|
|
2077
|
+
cidCodeToUnicodeFallbackMap,
|
|
2078
|
+
allowCidFallback = false,
|
|
2079
|
+
treatReplacementAsMissing = false,
|
|
2080
|
+
treatPrivateUseAsMissing = false
|
|
2081
|
+
} = args;
|
|
2082
|
+
const chars = [];
|
|
2083
|
+
for (let i2 = 0; i2 < rawText.length; i2 += 2) {
|
|
2084
|
+
const highByte = rawText.charCodeAt(i2);
|
|
2085
|
+
const lowByte = i2 + 1 < rawText.length ? rawText.charCodeAt(i2 + 1) : 0;
|
|
2086
|
+
const code = highByte << 8 | lowByte;
|
|
2087
|
+
const mapped = mapping.get(code);
|
|
2088
|
+
const shouldDiscardReplacement = treatReplacementAsMissing && mapped?.includes("�") === true;
|
|
2089
|
+
const shouldDiscardPrivateUse = treatPrivateUseAsMissing && mapped !== void 0 && containsPrivateUseCharacter(mapped);
|
|
2090
|
+
if (mapped && !shouldDiscardReplacement && !shouldDiscardPrivateUse) {
|
|
2091
|
+
chars.push(mapped);
|
|
2092
|
+
continue;
|
|
2093
|
+
}
|
|
2094
|
+
if (allowCidFallback) {
|
|
2095
|
+
const cidFallback = decodeCidFallback({
|
|
2096
|
+
code,
|
|
2097
|
+
ordering,
|
|
2098
|
+
cidCodeToUnicodeFallbackMap
|
|
2099
|
+
});
|
|
2100
|
+
if (cidFallback) {
|
|
2101
|
+
chars.push(cidFallback);
|
|
2102
|
+
continue;
|
|
2103
|
+
}
|
|
2104
|
+
}
|
|
2105
|
+
chars.push("�");
|
|
2106
|
+
}
|
|
2107
|
+
return chars.join("");
|
|
2108
|
+
}
|
|
2109
|
+
function decodeSingleByteText(rawText, mapping) {
|
|
2110
|
+
return Array.from(rawText).map((char) => {
|
|
2111
|
+
const code = char.charCodeAt(0);
|
|
2112
|
+
return mapping.get(code) ?? char;
|
|
2113
|
+
}).join("");
|
|
2114
|
+
}
|
|
2115
|
+
function decodeSingleByteTextWithFallback(rawText, toUnicode, encodingMap) {
|
|
2116
|
+
return Array.from(rawText).map((char) => {
|
|
2117
|
+
const code = char.charCodeAt(0);
|
|
2118
|
+
const mapped = toUnicode.get(code);
|
|
2119
|
+
if (mapped) {
|
|
2120
|
+
return mapped;
|
|
2121
|
+
}
|
|
2122
|
+
const fallback = encodingMap?.get(code);
|
|
2123
|
+
return fallback ?? char;
|
|
2124
|
+
}).join("");
|
|
2125
|
+
}
|
|
1731
2126
|
const WINANSI_ENCODING = /* @__PURE__ */ new Map([
|
|
1732
2127
|
// 0x20-0x7E: Standard ASCII printable characters
|
|
1733
2128
|
[32, " "],
|
|
@@ -5852,9 +6247,12 @@ function createTextRun(text, textState, gfxState) {
|
|
|
5852
6247
|
const endUserY = startUserY + advanceUserY;
|
|
5853
6248
|
const endPos = transformPoint({ x: endUserX, y: endUserY }, ctm);
|
|
5854
6249
|
const effectiveFontSize = calculateEffectiveFontSize(currentFontSize, textMatrix, ctm);
|
|
6250
|
+
const decodedText = currentFontInfo ? decodeTextWithFontInfo(text, currentFontInfo) : text;
|
|
5855
6251
|
const run = {
|
|
5856
|
-
text,
|
|
6252
|
+
text: decodedText,
|
|
6253
|
+
rawText: text,
|
|
5857
6254
|
rawBytes: rawTextToBytes(text),
|
|
6255
|
+
codeByteWidth: currentCodeByteWidth,
|
|
5858
6256
|
textMatrix,
|
|
5859
6257
|
x: startPos.x,
|
|
5860
6258
|
y: startPos.y,
|
|
@@ -22071,7 +22469,7 @@ async function getPdfPageDimensions(data, pageNumber = 1) {
|
|
|
22071
22469
|
return pages[pageNumber - 1].getSize();
|
|
22072
22470
|
}
|
|
22073
22471
|
export {
|
|
22074
|
-
|
|
22472
|
+
decodeText as A,
|
|
22075
22473
|
DEFAULT_FILL_COLOR as D,
|
|
22076
22474
|
IDENTITY_MATRIX$2 as I,
|
|
22077
22475
|
translationMatrix as a,
|
|
@@ -22096,9 +22494,9 @@ export {
|
|
|
22096
22494
|
transformPoint as t,
|
|
22097
22495
|
getPdfPageCount as u,
|
|
22098
22496
|
getPdfPageDimensions as v,
|
|
22099
|
-
|
|
22100
|
-
|
|
22101
|
-
|
|
22102
|
-
|
|
22497
|
+
DEFAULT_FONT_METRICS as w,
|
|
22498
|
+
calculateTextDisplacement as x,
|
|
22499
|
+
rasterizeSoftMaskedFillPath as y,
|
|
22500
|
+
applyGraphicsSoftMaskToPdfImage as z
|
|
22103
22501
|
};
|
|
22104
|
-
//# sourceMappingURL=pdf-parser-
|
|
22502
|
+
//# sourceMappingURL=pdf-parser-NK9_pSDg.js.map
|