@heripo/pdf-parser 0.1.10 → 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +58 -38
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +58 -38
- package/dist/index.js.map +1 -1
- package/package.json +3 -3
package/dist/index.cjs
CHANGED
|
@@ -1325,14 +1325,18 @@ var ImageExtractor = class _ImageExtractor {
|
|
|
1325
1325
|
// src/processors/page-renderer.ts
|
|
1326
1326
|
var import_node_fs3 = require("fs");
|
|
1327
1327
|
var import_node_path3 = require("path");
|
|
1328
|
-
var
|
|
1328
|
+
var PROGRESS_LOG_PERCENT_STEP = 10;
|
|
1329
1329
|
var PageRenderer = class {
|
|
1330
1330
|
constructor(logger) {
|
|
1331
1331
|
this.logger = logger;
|
|
1332
1332
|
}
|
|
1333
|
+
lastLoggedPercent = 0;
|
|
1333
1334
|
/**
|
|
1334
1335
|
* Render all pages of a PDF to individual PNG files.
|
|
1335
1336
|
*
|
|
1337
|
+
* Uses per-page rendering (`magick 'input.pdf[N]'`) when page count is known,
|
|
1338
|
+
* limiting peak memory to ~15MB/page instead of loading all pages at once.
|
|
1339
|
+
*
|
|
1336
1340
|
* @param pdfPath - Absolute path to the source PDF file
|
|
1337
1341
|
* @param outputDir - Directory where pages/ subdirectory will be created
|
|
1338
1342
|
* @param options - Rendering options
|
|
@@ -1349,50 +1353,54 @@ var PageRenderer = class {
|
|
|
1349
1353
|
this.logger.info(
|
|
1350
1354
|
`[PageRenderer] Rendering ${totalPages} pages at ${dpi} DPI...`
|
|
1351
1355
|
);
|
|
1356
|
+
this.lastLoggedPercent = 0;
|
|
1357
|
+
for (let i = 0; i < totalPages; i++) {
|
|
1358
|
+
const result = await spawnAsync(
|
|
1359
|
+
"magick",
|
|
1360
|
+
[
|
|
1361
|
+
"-density",
|
|
1362
|
+
dpi.toString(),
|
|
1363
|
+
`${pdfPath}[${i}]`,
|
|
1364
|
+
"-background",
|
|
1365
|
+
"white",
|
|
1366
|
+
"-alpha",
|
|
1367
|
+
"remove",
|
|
1368
|
+
"-alpha",
|
|
1369
|
+
"off",
|
|
1370
|
+
(0, import_node_path3.join)(pagesDir, `page_${i}.png`)
|
|
1371
|
+
],
|
|
1372
|
+
{ captureStdout: false }
|
|
1373
|
+
);
|
|
1374
|
+
if (result.code !== 0) {
|
|
1375
|
+
throw new Error(
|
|
1376
|
+
`[PageRenderer] Failed to render page ${i + 1}/${totalPages}: ${result.stderr || "Unknown error"}`
|
|
1377
|
+
);
|
|
1378
|
+
}
|
|
1379
|
+
this.logProgress(i + 1, totalPages);
|
|
1380
|
+
}
|
|
1352
1381
|
} else {
|
|
1353
1382
|
this.logger.info(`[PageRenderer] Rendering PDF at ${dpi} DPI...`);
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1365
|
-
|
|
1366
|
-
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
|
|
1370
|
-
} catch {
|
|
1371
|
-
}
|
|
1372
|
-
}, PROGRESS_POLL_INTERVAL_MS);
|
|
1373
|
-
}
|
|
1374
|
-
try {
|
|
1375
|
-
const result = await spawnAsync("magick", [
|
|
1376
|
-
"-density",
|
|
1377
|
-
dpi.toString(),
|
|
1378
|
-
pdfPath,
|
|
1379
|
-
"-background",
|
|
1380
|
-
"white",
|
|
1381
|
-
"-alpha",
|
|
1382
|
-
"remove",
|
|
1383
|
-
"-alpha",
|
|
1384
|
-
"off",
|
|
1385
|
-
outputPattern
|
|
1386
|
-
]);
|
|
1383
|
+
const result = await spawnAsync(
|
|
1384
|
+
"magick",
|
|
1385
|
+
[
|
|
1386
|
+
"-density",
|
|
1387
|
+
dpi.toString(),
|
|
1388
|
+
pdfPath,
|
|
1389
|
+
"-background",
|
|
1390
|
+
"white",
|
|
1391
|
+
"-alpha",
|
|
1392
|
+
"remove",
|
|
1393
|
+
"-alpha",
|
|
1394
|
+
"off",
|
|
1395
|
+
(0, import_node_path3.join)(pagesDir, "page_%d.png")
|
|
1396
|
+
],
|
|
1397
|
+
{ captureStdout: false }
|
|
1398
|
+
);
|
|
1387
1399
|
if (result.code !== 0) {
|
|
1388
1400
|
throw new Error(
|
|
1389
1401
|
`[PageRenderer] Failed to render PDF pages: ${result.stderr || "Unknown error"}`
|
|
1390
1402
|
);
|
|
1391
1403
|
}
|
|
1392
|
-
} finally {
|
|
1393
|
-
if (progressInterval) {
|
|
1394
|
-
clearInterval(progressInterval);
|
|
1395
|
-
}
|
|
1396
1404
|
}
|
|
1397
1405
|
const pageFiles = (0, import_node_fs3.readdirSync)(pagesDir).filter((f) => f.startsWith("page_") && f.endsWith(".png")).sort((a, b) => {
|
|
1398
1406
|
const numA = parseInt(a.replace("page_", "").replace(".png", ""), 10);
|
|
@@ -1408,6 +1416,18 @@ var PageRenderer = class {
|
|
|
1408
1416
|
pageFiles
|
|
1409
1417
|
};
|
|
1410
1418
|
}
|
|
1419
|
+
/**
|
|
1420
|
+
* Log rendering progress at appropriate intervals (every 10%).
|
|
1421
|
+
*/
|
|
1422
|
+
logProgress(current, total) {
|
|
1423
|
+
const percent = Math.floor(current / total * 100);
|
|
1424
|
+
if (percent >= this.lastLoggedPercent + PROGRESS_LOG_PERCENT_STEP || current === total) {
|
|
1425
|
+
this.lastLoggedPercent = percent;
|
|
1426
|
+
this.logger.info(
|
|
1427
|
+
`[PageRenderer] Rendering pages: ${current}/${total} (${percent}%)`
|
|
1428
|
+
);
|
|
1429
|
+
}
|
|
1430
|
+
}
|
|
1411
1431
|
/**
|
|
1412
1432
|
* Get total page count using pdfinfo.
|
|
1413
1433
|
* Returns 0 on failure (progress logging will be skipped).
|