@gheop/tojiru 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -0
- package/dist/extractors/pdf.js +36 -3
- package/dist/extractors/pdf.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -134,6 +134,11 @@ MIT — see [LICENSE](LICENSE).
|
|
|
134
134
|
|
|
135
135
|
## Changelog
|
|
136
136
|
|
|
137
|
+
### v0.3.0 — Smaller PDF bundles (2026-06-26)
|
|
138
|
+
|
|
139
|
+
- Vector PDF page coordinates are rounded to 2 decimals — ~25% smaller pages, no visible change
|
|
140
|
+
- Image-only PDF pages (scans, comic PDFs) are auto-detected and rendered as WebP instead of SVG-wrapped bitmaps — e.g. a comic PDF dropped from ×34 to ×2.7
|
|
141
|
+
|
|
137
142
|
### v0.2.0 — Preview server and progress (2026-06-25)
|
|
138
143
|
|
|
139
144
|
- Built-in `tojiru serve <dir>` command previews a bundle locally — uses Node built-ins only, no extra install needed
|
package/dist/extractors/pdf.js
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
|
-
import { readFile } from 'node:fs/promises';
|
|
1
|
+
import { readFile, unlink, writeFile } from 'node:fs/promises';
|
|
2
2
|
import { basename, extname, join } from 'node:path';
|
|
3
3
|
import { detectKind } from './detect.js';
|
|
4
4
|
import { findPdfConverter } from '../tools.js';
|
|
5
5
|
import { run } from '../run.js';
|
|
6
|
+
import sharp from 'sharp';
|
|
7
|
+
import { imageDims } from './images.js';
|
|
6
8
|
function pad(n, width) {
|
|
7
9
|
return String(n).padStart(width, '0');
|
|
8
10
|
}
|
|
@@ -33,6 +35,19 @@ function viewBox(svg) {
|
|
|
33
35
|
return { w: Math.round(Number(w[1])), h: Math.round(Number(h[1])) };
|
|
34
36
|
throw new Error('SVG has no usable dimensions');
|
|
35
37
|
}
|
|
38
|
+
// Rounds floats with ≥3 decimal places to `decimals` places.
|
|
39
|
+
// Integers and short floats (≤2 decimals) are unchanged.
|
|
40
|
+
// Safe for glyph outlines and <use> positions at 2 decimals (0.01 pt precision).
|
|
41
|
+
export function roundCoords(svg, decimals = 2) {
|
|
42
|
+
return svg.replace(/-?\d+\.\d{3,}/g, (m) => String(Number(parseFloat(m).toFixed(decimals))));
|
|
43
|
+
}
|
|
44
|
+
// A page is raster-dominated when pdftocairo wrapped a full-page bitmap in SVG:
|
|
45
|
+
// at least one <image> element and fewer than 50 <use> elements (vector glyphs).
|
|
46
|
+
function isRasterDominated(svg) {
|
|
47
|
+
const imageCount = (svg.match(/<image/g) ?? []).length;
|
|
48
|
+
const useCount = (svg.match(/<use/g) ?? []).length;
|
|
49
|
+
return imageCount >= 1 && useCount < 50;
|
|
50
|
+
}
|
|
36
51
|
export const pdfExtractor = {
|
|
37
52
|
name: 'pdf',
|
|
38
53
|
async canHandle(file) {
|
|
@@ -47,7 +62,8 @@ export const pdfExtractor = {
|
|
|
47
62
|
const width = Math.max(4, String(count).length);
|
|
48
63
|
const pages = [];
|
|
49
64
|
for (let i = 1; i <= count; i++) {
|
|
50
|
-
const
|
|
65
|
+
const stem = pad(i, width);
|
|
66
|
+
const svgPath = join(workdir, `${stem}.svg`);
|
|
51
67
|
if (conv === 'pdftocairo') {
|
|
52
68
|
await run('pdftocairo', ['-svg', '-f', String(i), '-l', String(i), file, svgPath]);
|
|
53
69
|
}
|
|
@@ -55,7 +71,24 @@ export const pdfExtractor = {
|
|
|
55
71
|
await run('mutool', ['draw', '-F', 'svg', '-o', svgPath, file, String(i)]);
|
|
56
72
|
}
|
|
57
73
|
const svg = await readFile(svgPath, 'utf8');
|
|
58
|
-
|
|
74
|
+
if (conv === 'pdftocairo' && isRasterDominated(svg)) {
|
|
75
|
+
// Full-page bitmap wrapped in SVG: re-render to a raster image, encode it
|
|
76
|
+
// as WebP (far smaller than the lossless PNG render), and drop the SVG.
|
|
77
|
+
const stemPath = join(workdir, stem);
|
|
78
|
+
await run('pdftocairo', ['-png', '-singlefile', '-r', '150', '-f', String(i), '-l', String(i), file, stemPath]);
|
|
79
|
+
const pngPath = `${stemPath}.png`;
|
|
80
|
+
const webpPath = `${stemPath}.webp`;
|
|
81
|
+
await sharp(pngPath).webp({ quality: 82, effort: 6 }).toFile(webpPath);
|
|
82
|
+
await unlink(svgPath);
|
|
83
|
+
await unlink(pngPath);
|
|
84
|
+
pages.push({ type: 'raster', imagePath: webpPath, ...(await imageDims(webpPath)) });
|
|
85
|
+
}
|
|
86
|
+
else {
|
|
87
|
+
// Vector page: round coordinates to shrink SVG, then store.
|
|
88
|
+
const rounded = roundCoords(svg);
|
|
89
|
+
await writeFile(svgPath, rounded, 'utf8');
|
|
90
|
+
pages.push({ type: 'vector', svgPath, ...viewBox(rounded) });
|
|
91
|
+
}
|
|
59
92
|
onProgress?.(i, count, 'Converting');
|
|
60
93
|
}
|
|
61
94
|
return {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pdf.js","sourceRoot":"","sources":["../../src/extractors/pdf.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAA;
|
|
1
|
+
{"version":3,"file":"pdf.js","sourceRoot":"","sources":["../../src/extractors/pdf.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAA;AAC9D,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAA;AAEnD,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAA;AACxC,OAAO,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAA;AAC9C,OAAO,EAAE,GAAG,EAAE,MAAM,WAAW,CAAA;AAC/B,OAAO,KAAK,MAAM,OAAO,CAAA;AACzB,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAA;AAEvC,SAAS,GAAG,CAAC,CAAS,EAAE,KAAa;IACnC,OAAO,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,EAAE,GAAG,CAAC,CAAA;AACvC,CAAC;AAED,KAAK,UAAU,SAAS,CAAC,IAAY;IACnC,6DAA6D;IAC7D,IAAI,CAAC;QACH,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,GAAG,CAAC,SAAS,EAAE,CAAC,IAAI,CAAC,CAAC,CAAA;QAC/C,MAAM,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAA;QAC1C,IAAI,CAAC;YAAE,OAAO,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;IAC5B,CAAC;IAAC,MAAM,CAAC;QACP,sBAAsB;IACxB,CAAC;IACD,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,GAAG,CAAC,QAAQ,EAAE,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC,CAAA;IACtD,MAAM,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAA;IACxC,IAAI,CAAC,CAAC;QAAE,MAAM,IAAI,KAAK,CAAC,wCAAwC,CAAC,CAAA;IACjE,OAAO,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;AACrB,CAAC;AAED,SAAS,OAAO,CAAC,GAAW;IAC1B,MAAM,EAAE,GAAG,GAAG,CAAC,KAAK,CAAC,2CAA2C,CAAC,CAAA;IACjE,IAAI,EAAE;QAAE,OAAO,EAAE,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAA;IAC7E,MAAM,CAAC,GAAG,GAAG,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAA;IACtC,MAAM,CAAC,GAAG,GAAG,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAA;IACvC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,EAAE,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAA;IAC/E,MAAM,IAAI,KAAK,CAAC,8BAA8B,CAAC,CAAA;AACjD,CAAC;AAED,6DAA6D;AAC7D,yDAAyD;AACzD,iFAAiF;AACjF,MAAM,UAAU,WAAW,CAAC,GAAW,EAAE,QAAQ,GAAG,CAAC;IACnD,OAAO,GAAG,CAAC,OAAO,CAAC,gBAAgB,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAA;AAC9F,CAAC;AAED,gFAAgF;AAChF,iFAAiF;AACjF,SAAS,iBAAiB,CAAC,GAAW;IACpC,MAAM,UAAU,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAA;IACtD,MAAM,QAAQ,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAA;IAClD,OAAO,UAAU,IAAI,CAAC,IAAI,QAAQ,GAAG,EAAE,CAAA;AACzC,CAAC;AAED,MAAM,CAAC,MAAM,YAAY,GAAc;IACrC,IAAI,EAAE,KAAK;IACX,KAAK,CAAC,SAAS,CAAC,IAAI;QAClB,OAAO,CAAC,MAAM,UAAU,CAAC,IAAI,CAAC,CAAC,KAAK,KAAK,CAAA;IAC3C,CAAC;IACD,KAAK,CAAC,OAAO,CAAC,IAAI,EAAE,OAAO,EAAE,UAAuB;QAClD,MAAM,IAAI,GAAG,MAAM,gBAAgB,EAAE,CAAA;QACrC,IAAI,CAAC,IAAI,EAAE,CAAC;YACV,MAAM,IAAI,KAAK,CAAC,yEAAyE,CAAC,CAAA;QAC5F,CAAC;QACD,MAAM,KAAK,GAAG,MAAM,SAAS,CAAC,IAAI,CAAC,CAAA;QACnC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAA;QAC/C,MAAM,KAAK,GAAW,EAAE,CAAA;QAExB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC;YAChC,MAAM,IAAI,GAAG,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,CAAA;YAC1B,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,EAAE,GAAG,IAAI,MAAM,CAAC,CAAA;YAC5C,IAAI,IAAI,KAAK,YAAY,EAAE,CAAC;gBAC1B,MAAM,GAAG,CAAC,YAAY,EAAE,CAAC,MAAM,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC,CAAA;YACpF,CAAC;iBAAM,CAAC;gBACN,MAAM,GAAG,CAAC,QAAQ,EAAE,CAAC,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAA;YAC5E,CAAC;YACD,MAAM,GAAG,GAAG,MAAM,QAAQ,CAAC,OAAO,EAAE,MAAM,CAAC,CAAA;YAE3C,IAAI,IAAI,KAAK,YAAY,IAAI,iBAAiB,CAAC,GAAG,CAAC,EAAE,CAAC;gBACpD,0EAA0E;gBAC1E,wEAAwE;gBACxE,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,CAAA;gBACpC,MAAM,GAAG,CAAC,YAAY,EAAE,CAAC,MAAM,EAAE,aAAa,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,QAAQ,CAAC,CAAC,CAAA;gBAC/G,MAAM,OAAO,GAAG,GAAG,QAAQ,MAAM,CAAA;gBACjC,MAAM,QAAQ,GAAG,GAAG,QAAQ,OAAO,CAAA;gBACnC,MAAM,KAAK,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,EAAE,OAAO,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC,EAAE,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAA;gBACtE,MAAM,MAAM,CAAC,OAAO,CAAC,CAAA;gBACrB,MAAM,MAAM,CAAC,OAAO,CAAC,CAAA;gBACrB,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,SAAS,EAAE,QAAQ,EAAE,GAAG,CAAC,MAAM,SAAS,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC,CAAA;YACrF,CAAC;iBAAM,CAAC;gBACN,4DAA4D;gBAC5D,MAAM,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,CAAA;gBAChC,MAAM,SAAS,CAAC,OAAO,EAAE,OAAO,EAAE,MAAM,CAAC,CAAA;gBACzC,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,CAAA;YAC9D,CAAC;YAED,UAAU,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,YAAY,CAAC,CAAA;QACtC,CAAC;QAED,OAAO;YACL,KAAK,EAAE,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC;YACpC,IAAI,EAAE,KAAK;YACX,KAAK;SACN,CAAA;IACH,CAAC;CACF,CAAA"}
|