guidelinescraper 1.0.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/crawl.mjs +2 -6
- package/package.json +1 -1
package/crawl.mjs
CHANGED
|
@@ -109,10 +109,6 @@ let failed = 0;
|
|
|
109
109
|
const crawlStart = Date.now();
|
|
110
110
|
|
|
111
111
|
console.log(`Discovered ${totalPages} pages to crawl.\n`);
|
|
112
|
-
for (const { url, pdfPath } of pages) {
|
|
113
|
-
console.log(` ${pdfPath} ← ${url}`);
|
|
114
|
-
}
|
|
115
|
-
console.log();
|
|
116
112
|
|
|
117
113
|
for (const { pdfPath } of pages) {
|
|
118
114
|
fs.mkdirSync(path.dirname(pdfPath), { recursive: true });
|
|
@@ -169,7 +165,6 @@ const crawler = new PlaywrightCrawler({
|
|
|
169
165
|
return;
|
|
170
166
|
}
|
|
171
167
|
|
|
172
|
-
log.info(`Saving ${request.url} → ${pdfPath}`);
|
|
173
168
|
|
|
174
169
|
await page
|
|
175
170
|
.waitForLoadState("networkidle", { timeout: 30_000 })
|
|
@@ -413,7 +408,8 @@ const crawler = new PlaywrightCrawler({
|
|
|
413
408
|
const avgSec = (Date.now() - crawlStart) / 1000 / completed;
|
|
414
409
|
const remaining = Math.round(avgSec * (totalPages - completed));
|
|
415
410
|
const eta = remaining > 0 ? `~${formatDuration(remaining * 1000)} left` : "";
|
|
416
|
-
|
|
411
|
+
const shortPath = pdfPath.replace(/\.pdf$/, "").replace(/^output\//, "");
|
|
412
|
+
log.info(`[${completed}/${totalPages}] ${pct}% · ${elapsed}s ${eta} · ${shortPath}`);
|
|
417
413
|
},
|
|
418
414
|
|
|
419
415
|
async failedRequestHandler({ request, log }) {
|