guidelinescraper 1.0.4 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/crawl.mjs +9 -5
  2. package/package.json +1 -1
package/crawl.mjs CHANGED
@@ -125,7 +125,7 @@ const crawler = new PlaywrightCrawler({
125
125
  launchContext: {
126
126
  launchOptions: { args: ["--disable-dev-shm-usage"] },
127
127
  },
128
- maxConcurrency: 16,
128
+ maxConcurrency: 8,
129
129
  maxRequestRetries: 2,
130
130
  navigationTimeoutSecs: 120,
131
131
 
@@ -165,7 +165,6 @@ const crawler = new PlaywrightCrawler({
165
165
  return;
166
166
  }
167
167
 
168
-
169
168
  await page
170
169
  .waitForLoadState("networkidle", { timeout: 30_000 })
171
170
  .catch(() => {});
@@ -407,9 +406,12 @@ const crawler = new PlaywrightCrawler({
407
406
  const pct = Math.round((completed / totalPages) * 100);
408
407
  const avgSec = (Date.now() - crawlStart) / 1000 / completed;
409
408
  const remaining = Math.round(avgSec * (totalPages - completed));
410
- const eta = remaining > 0 ? `~${formatDuration(remaining * 1000)} left` : "";
409
+ const eta =
410
+ remaining > 0 ? `~${formatDuration(remaining * 1000)} left` : "";
411
411
  const shortPath = pdfPath.replace(/\.pdf$/, "").replace(/^output\//, "");
412
- log.info(`[${completed}/${totalPages}] ${pct}% · ${elapsed}s ${eta} · ${shortPath}`);
412
+ log.info(
413
+ `[${completed}/${totalPages}] ${pct}% · ${elapsed}s ${eta} · ${shortPath}`,
414
+ );
413
415
  },
414
416
 
415
417
  async failedRequestHandler({ request, log }) {
@@ -434,5 +436,7 @@ await crawler.run(pages.map((p) => ({ url: p.url, uniqueKey: p.url })));
434
436
  const totalMs = Date.now() - crawlStart;
435
437
  console.log(`\n${"─".repeat(50)}`);
436
438
  console.log(`Done in ${formatDuration(totalMs)}`);
437
- console.log(` ${completed - failed} saved, ${failed} failed, ${totalPages} total`);
439
+ console.log(
440
+ ` ${completed - failed} saved, ${failed} failed, ${totalPages} total`,
441
+ );
438
442
  console.log(` Output: ${domainDir}`);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "guidelinescraper",
3
- "version": "1.0.4",
3
+ "version": "1.0.5",
4
4
  "type": "module",
5
5
  "description": "Scrape a Frontify brand portal and save every page as PDF and clean HTML",
6
6
  "bin": {