@heripo/pdf-parser 0.1.10 → 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +58 -38
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +58 -38
- package/dist/index.js.map +1 -1
- package/package.json +3 -3
package/dist/index.js
CHANGED
|
@@ -1301,14 +1301,18 @@ var ImageExtractor = class _ImageExtractor {
|
|
|
1301
1301
|
// src/processors/page-renderer.ts
|
|
1302
1302
|
import { existsSync as existsSync2, mkdirSync as mkdirSync2, readdirSync as readdirSync2 } from "fs";
|
|
1303
1303
|
import { join as join3 } from "path";
|
|
1304
|
-
var
|
|
1304
|
+
var PROGRESS_LOG_PERCENT_STEP = 10;
|
|
1305
1305
|
var PageRenderer = class {
|
|
1306
1306
|
constructor(logger) {
|
|
1307
1307
|
this.logger = logger;
|
|
1308
1308
|
}
|
|
1309
|
+
lastLoggedPercent = 0;
|
|
1309
1310
|
/**
|
|
1310
1311
|
* Render all pages of a PDF to individual PNG files.
|
|
1311
1312
|
*
|
|
1313
|
+
* Uses per-page rendering (`magick 'input.pdf[N]'`) when page count is known,
|
|
1314
|
+
* limiting peak memory to ~15MB/page instead of loading all pages at once.
|
|
1315
|
+
*
|
|
1312
1316
|
* @param pdfPath - Absolute path to the source PDF file
|
|
1313
1317
|
* @param outputDir - Directory where pages/ subdirectory will be created
|
|
1314
1318
|
* @param options - Rendering options
|
|
@@ -1325,50 +1329,54 @@ var PageRenderer = class {
|
|
|
1325
1329
|
this.logger.info(
|
|
1326
1330
|
`[PageRenderer] Rendering ${totalPages} pages at ${dpi} DPI...`
|
|
1327
1331
|
);
|
|
1332
|
+
this.lastLoggedPercent = 0;
|
|
1333
|
+
for (let i = 0; i < totalPages; i++) {
|
|
1334
|
+
const result = await spawnAsync(
|
|
1335
|
+
"magick",
|
|
1336
|
+
[
|
|
1337
|
+
"-density",
|
|
1338
|
+
dpi.toString(),
|
|
1339
|
+
`${pdfPath}[${i}]`,
|
|
1340
|
+
"-background",
|
|
1341
|
+
"white",
|
|
1342
|
+
"-alpha",
|
|
1343
|
+
"remove",
|
|
1344
|
+
"-alpha",
|
|
1345
|
+
"off",
|
|
1346
|
+
join3(pagesDir, `page_${i}.png`)
|
|
1347
|
+
],
|
|
1348
|
+
{ captureStdout: false }
|
|
1349
|
+
);
|
|
1350
|
+
if (result.code !== 0) {
|
|
1351
|
+
throw new Error(
|
|
1352
|
+
`[PageRenderer] Failed to render page ${i + 1}/${totalPages}: ${result.stderr || "Unknown error"}`
|
|
1353
|
+
);
|
|
1354
|
+
}
|
|
1355
|
+
this.logProgress(i + 1, totalPages);
|
|
1356
|
+
}
|
|
1328
1357
|
} else {
|
|
1329
1358
|
this.logger.info(`[PageRenderer] Rendering PDF at ${dpi} DPI...`);
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
} catch {
|
|
1347
|
-
}
|
|
1348
|
-
}, PROGRESS_POLL_INTERVAL_MS);
|
|
1349
|
-
}
|
|
1350
|
-
try {
|
|
1351
|
-
const result = await spawnAsync("magick", [
|
|
1352
|
-
"-density",
|
|
1353
|
-
dpi.toString(),
|
|
1354
|
-
pdfPath,
|
|
1355
|
-
"-background",
|
|
1356
|
-
"white",
|
|
1357
|
-
"-alpha",
|
|
1358
|
-
"remove",
|
|
1359
|
-
"-alpha",
|
|
1360
|
-
"off",
|
|
1361
|
-
outputPattern
|
|
1362
|
-
]);
|
|
1359
|
+
const result = await spawnAsync(
|
|
1360
|
+
"magick",
|
|
1361
|
+
[
|
|
1362
|
+
"-density",
|
|
1363
|
+
dpi.toString(),
|
|
1364
|
+
pdfPath,
|
|
1365
|
+
"-background",
|
|
1366
|
+
"white",
|
|
1367
|
+
"-alpha",
|
|
1368
|
+
"remove",
|
|
1369
|
+
"-alpha",
|
|
1370
|
+
"off",
|
|
1371
|
+
join3(pagesDir, "page_%d.png")
|
|
1372
|
+
],
|
|
1373
|
+
{ captureStdout: false }
|
|
1374
|
+
);
|
|
1363
1375
|
if (result.code !== 0) {
|
|
1364
1376
|
throw new Error(
|
|
1365
1377
|
`[PageRenderer] Failed to render PDF pages: ${result.stderr || "Unknown error"}`
|
|
1366
1378
|
);
|
|
1367
1379
|
}
|
|
1368
|
-
} finally {
|
|
1369
|
-
if (progressInterval) {
|
|
1370
|
-
clearInterval(progressInterval);
|
|
1371
|
-
}
|
|
1372
1380
|
}
|
|
1373
1381
|
const pageFiles = readdirSync2(pagesDir).filter((f) => f.startsWith("page_") && f.endsWith(".png")).sort((a, b) => {
|
|
1374
1382
|
const numA = parseInt(a.replace("page_", "").replace(".png", ""), 10);
|
|
@@ -1384,6 +1392,18 @@ var PageRenderer = class {
|
|
|
1384
1392
|
pageFiles
|
|
1385
1393
|
};
|
|
1386
1394
|
}
|
|
1395
|
+
/**
|
|
1396
|
+
* Log rendering progress at appropriate intervals (every 10%).
|
|
1397
|
+
*/
|
|
1398
|
+
logProgress(current, total) {
|
|
1399
|
+
const percent = Math.floor(current / total * 100);
|
|
1400
|
+
if (percent >= this.lastLoggedPercent + PROGRESS_LOG_PERCENT_STEP || current === total) {
|
|
1401
|
+
this.lastLoggedPercent = percent;
|
|
1402
|
+
this.logger.info(
|
|
1403
|
+
`[PageRenderer] Rendering pages: ${current}/${total} (${percent}%)`
|
|
1404
|
+
);
|
|
1405
|
+
}
|
|
1406
|
+
}
|
|
1387
1407
|
/**
|
|
1388
1408
|
* Get total page count using pdfinfo.
|
|
1389
1409
|
* Returns 0 on failure (progress logging will be skipped).
|