npm - scraply - Versions diffs - 1.0.25 → 2.0.0 - Mend

scraply 1.0.25 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/package.json +27 -9
package/readme.md +152 -76
package/src/config/defaults.js +97 -0
package/src/config/load.js +39 -0
package/src/core/pipeline.js +61 -0
package/src/core/queue.js +131 -0
package/src/core/retry.js +67 -0
package/src/crawler.js +302 -0
package/src/extract/extract.js +40 -0
package/src/extract/links.js +29 -0
package/src/fetchers/browserFetcher.js +77 -0
package/src/fetchers/httpFetcher.js +54 -0
package/src/fetchers/index.js +29 -0
package/src/fetchers/types.js +31 -0
package/src/index.js +67 -0
package/src/output/router.js +39 -0
package/src/output/writers.js +48 -0
package/src/storage/files.js +48 -0
package/src/url/normalize.js +21 -0
package/src/url/patterns.js +57 -0
package/src/util/delay.js +1 -0
package/src/util/hooks.js +34 -0
package/src/util/logger.js +20 -0
package/.github/workflows/npm-publish.yml +0 -28
package/src/defaultConfig.js +0 -67
package/src/loadConfig.js +0 -29
package/src/scraply.js +0 -125
package/src/utils/crawl/browser/helper.js +0 -143
package/src/utils/crawl/cleanHTML.js +0 -35
package/src/utils/crawl/delay.js +0 -1
package/src/utils/crawl/fileOperations.js +0 -51
package/src/utils/crawl/url/fetch.js +0 -66
package/src/utils/crawl/url/handlers.js +0 -75
package/src/utils/crawl/url/normalize.js +0 -14
package/src/utils/crawl/url/processor.js +0 -52
package/src/utils/format/formatData.js +0 -54

package/package.json CHANGED Viewed

@@ -1,26 +1,44 @@
 {
   "name": "scraply",
   "description": "A simple, configurable and functional content scraper",
-  "version": "1.0.25",
-  "main": "src/scraply.js",
+  "version": "2.0.0",
+  "main": "src/index.js",
   "type": "module",
+  "exports": {
+    ".": "./src/index.js"
+  },
+  "files": [
+    "src"
+  ],
+  "engines": {
+    "node": ">=18"
+  },
   "scripts": {
-    "start": "node ."
+    "start": "node .",
+    "dev": "node src/dev.js"
   },
   "keywords": [
     "crawler",
-    "scraper"
+    "scraper",
+    "web-scraping",
+    "puppeteer",
+    "cheerio"
   ],
   "author": "Pau Serrat Gutiérrez",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/pauserratgutierrez/scraply.git"
+  },
   "dependencies": {
-    "axios": "^1.7.9",
-    "cheerio": "^1.0.0",
-    "he": "^1.2.0",
-    "puppeteer": "^24.2.0",
-    "puppeteer-cluster": "^0.24.0"
+    "cheerio": "1.2.0",
+    "puppeteer": "25.1.0",
+    "puppeteer-cluster": "0.25.0"
   },
   "publishConfig": {
     "registry": "https://registry.npmjs.org/",
     "access": "public"
+  },
+  "allowScripts": {
+    "puppeteer@25.1.0": true
   }
 }

package/readme.md CHANGED Viewed

@@ -1,94 +1,170 @@
 # Scraply
-Scraply is a customizable and efficient web crawler and data scraper for Node.js, designed to handle various web crawling needs with ease. You can define the URLs to crawl, configure patterns to include/exclude, and format the output data in JSON. Scraply is built to be flexible, with user-configurable settings and dynamic paths.
-Bug Reports & Dev Stuff on: [Scraply's GitHub](https://github.com/pauserratgutierrez/scraply)
-NPM Package: [Scraply's NPM](https://www.npmjs.com/package/scraply)
+Scraply is a customizable, modular web crawler and content scraper for Node.js. Define the URLs to crawl, control which links are followed, choose how pages are fetched (plain HTTP or a real browser), and route the extracted text into JSON files. Crawls are persistent and resumable, so they are well suited to long-running or scheduled jobs.
-## Installation
-Using npm:
-``npm install scraply``
+Bug reports and development: [Scraply on GitHub](https://github.com/pauserratgutierrez/scraply)
+NPM package: [Scraply on NPM](https://www.npmjs.com/package/scraply)
+> Scraply 2.0 is a ground-up rewrite with a new configuration shape and public API. See [Migrating from 1.x](#migrating-from-1x).
+## Requirements
+- Node.js >= 18 (uses the built-in `fetch`).
-## Working Example
-Initialize Scraply with provided URLs to start crawling:
+## Installation
+```
+npm install scraply
 ```
+## Quick start
+```js
 import { scraply } from 'scraply';
-scraply({
-  CRAWLER: {
-    INITIAL_URLS: ['https://example.com']
+await scraply({
+  startUrls: ['https://example.com'],
+  output: {
+    routes: {
+      'https://example.com': { '*': 'example.json' }
+    }
   }
 });
 ```
-## How Scraply Works
-### Persistent Data Storage
-Scraply persistently saves the state of the crawler in JSON files (the queue, crawled data, etc.). If the crawler is interrupted or rate-limited, all progress is saved, and the crawler will automatically stop. When restarted, Scraply resumes crawling exactly where it left off, without reprocessing already crawled URLs.
+This crawls `example.com`, extracts the readable text of every allowed page, and writes the results to `dataset/formatted/example.json`.
+## How Scraply works
+1. The crawl is seeded from `startUrls`.
+2. Each page is fetched, its links are discovered and filtered (`include` / `exclude`), and new links are queued.
+3. The page text is extracted (configurable element removal) and saved under `dataset/crawled/`.
+4. When the queue drains, all crawled pages are routed by URL into the files defined in `output.routes` and written to `dataset/formatted/`.
+### Persistence and resuming
+The queue and crawled pages are checkpointed to disk in `dataset/`. If a run is interrupted (or rate-limited), progress is saved and the next run resumes exactly where it left off without re-crawling finished URLs. When every URL has been processed, Scraply starts a fresh crawl (set `crawl.resetOnComplete: false` to keep the finished queue instead).
+### Concurrency and politeness
+Pages are crawled with a worker pool (`crawl.concurrency`). Requests to the same host are spaced by `crawl.delay` for politeness, while different hosts run in parallel.
-### Handling Rate Limiting
-Scraply is designed to handle rate-limiting gracefully. If the crawler encounters rate-limited responses (e.g., status code `429`), it stops processing further requests and saves everything in the queue. Once restarted, it resumes the crawling process from where it stopped.
+### Rate limiting
+On HTTP `429`, Scraply either exits immediately with `rateLimit.exitCode` (default) so a scheduler can retry later, or waits (honoring `retry-after` / `x-ratelimit-reset`) and continues when `rateLimit.exitOnLimit` is `false`.
-This makes Scraply ideal for long-running, continuous crawling tasks. You can integrate Scraply with GitHub Actions or other CI/CD pipelines to perform endless crawling jobs over time. Simply schedule Scraply to run periodically, and it will continue gathering data without duplicating work.
+## Fetchers
+`fetcher` selects the backend:
+- `'http'` (default): fast static fetching with the native `fetch`.
+- `'browser'`: full JavaScript rendering via Puppeteer (`puppeteer-cluster`).
+- a custom object implementing the `Fetcher` interface (`{ name, fetch, init?, close? }`), so backends like Playwright or a remote CDP browser can be plugged in without changing the crawler.
-### Integration with GitHub Actions
-Scraply can be easily integrated into a GitHub Action workflow for continuous, long-running crawling tasks. You can set it up to crawl for a set duration or number of URLs, persistently saving the progress, and then resuming where it left off on the next run.
+## Programmatic API
+`createCrawler(config)` returns an instance exposing each stage, plus lifecycle hooks:
-## Config Options
-Scraply allows you to pass a configuration object to the main ```scraply()``` function to customize the crawling behavior. Below are the current configuration options:
+```js
+import { createCrawler } from 'scraply';
+const crawler = createCrawler({ startUrls: ['https://example.com'] });
+// React to every crawled page as it happens.
+crawler.on('page', (record) => console.log('crawled', record.url));
+// Veto links before they are queued.
+crawler.on('shouldEnqueue', (url) => !url.includes('/admin'));
+// Transform the stored record.
+crawler.on('transform', (record) => ({ ...record, length: record.content.length }));
+await crawler.run();
 ```
-MAIN_DIR: 'dataset',
-CRAWLER: {
-  INITIAL_URLS: [
-    'https://crawler-test.com/'
-  ],
-  INCLUDE_URLS: [
-    'https://crawler-test.com/'
-  ],
-  ALLOWED_CONTENT_TYPES: [
-    'text/html'
-  ],
-  EXCLUDE_PATTERNS: [
-    '/cdn-cgi/',
-    /\.(zip|rar|webp|png|jpg|jpeg|gif|mp3|mp4|pdf|css|js|svg|ico|eot|ttf|woff|woff2|otf|webm|ogg|wav|flac|m4a|mkv|mov|avi|wmv|flv|swf|exe|msi|dmg|iso|bin)$/,
-  ],
-  DOM_ELEMENTS_REMOVE: [
-    'script',
-    'noscript',
-    'style',
-    'meta',
-    'link',
-    'svg',
-    'path',
-    'img',
-    'input',
-    'textarea',
-    'embed',
-    'object',
-    'iframe',
-    'nav',
-    'header',
-    'footer',
-    'aside',
-    'button'
-  ],
-  RETRY_STATUS_CODES: [408, 500, 502, 503, 504],
-  REQUEST_TIMEOUT: 3000,
-  MAX_REDIRECTS: 2,
-  MAX_CONTENT_LENGTH: 20 * 1024 * 1024, // 20MB
-  MAX_RETRIES: 1,
-  CRAWL_DELAY_MS: 200,
-  CRAWL_ERROR_RETRY_DELAY_MS: 1000,
-  CRAWL_RATE_LIMIT_FALLBACK_DELAY_MS: 60000,
-  EXIT_ON_RATE_LIMIT: true, // If true, forces exit instantly. If false, only exits after retries (if still 429)
-  EXIT_CODE_RATE_LIMIT: 10
-},
-DATA_FORMATTER: {
-  EXCLUDED_PATTERNS: [],
-  CATEGORISED_PATHS: {
-    'https://crawler-test.com': {
-      'mobile': 'mobile.json',
-      '*': 'general.json'
+Instance methods: `run()`, `crawl()`, `fetch(url)`, `extract(html, url)`, `enqueue(urls, opts)`, `format(records?)`, `stop()`, `on(event, fn)`.
+Hooks: `response`, `extract`, `shouldEnqueue`, `transform`, `page`, `error`.
+Standalone exports for advanced use: `normalizeUrl`, `matchesPattern`, `matchesAnyPattern`, `extractText`, `discoverLinks`, `routeRecord`, `writeRecords`, `formatRecords`, `loadConfig`, `DEFAULT_CONFIG`, `resolveFetcher`, `createHttpFetcher`, `createBrowserFetcher`.
+## Configuration
+All options are optional except `startUrls`. Durations are milliseconds.
+```js
+{
+  startUrls: ['https://crawler-test.com/'],
+  include: [],                 // URL prefixes or RegExp; defaults to startUrls
+  exclude: [/\.(zip|png|js|css|...)$/i],
+  allowedContentTypes: ['text/html'],
+  fetcher: 'http',             // 'http' | 'browser' | Fetcher instance
+  logLevel: 'info',            // 'silent' | 'error' | 'warn' | 'info' | 'debug'
+  storage: { dir: 'dataset' },
+  request: {
+    timeout: 10000,
+    maxRedirects: 5,
+    maxContentLength: 20 * 1024 * 1024,
+    userAgent: 'Mozilla/5.0 (compatible; Scraply/2.0; +https://www.npmjs.com/package/scraply)'
+  },
+  retry: {
+    max: 1,
+    statusCodes: [408, 500, 502, 503, 504],
+    delay: 1000
+  },
+  rateLimit: {
+    fallbackDelay: 60000,
+    exitOnLimit: true,
+    exitCode: 10
+  },
+  crawl: {
+    concurrency: 5,
+    delay: 200,                // per-host spacing
+    maxDepth: Infinity,
+    resetOnComplete: true
+  },
+  extract: {
+    removeSelectors: ['script', 'style', 'nav', 'header', 'footer', '...']
+  },
+  output: {
+    format: 'json',            // 'json' | 'jsonl' | 'lines'
+    exclude: [],
+    routes: {
+      'https://crawler-test.com': { '*': 'general.json' }
+    }
+  }
+}
+```
+### Output routing
+`output.routes` maps a URL prefix to `{ pathKey: filename, '*': fallback }`. The most specific matching prefix wins, then the most specific path key, then `'*'`. For example:
+```js
+output: {
+  routes: {
+    'https://docs.example.com': {
+      'guide': 'guides.json',
+      '*': 'docs.json'
     },
+    'https://example.com': { '*': 'main.json' }
   }
 }
-```
+```
+## GitHub Actions
+Because crawls are persistent and exit cleanly on rate limits, Scraply works well on a schedule. Commit the `dataset/` directory between runs, and each scheduled run continues the crawl.
+## Migrating from 1.x
+The configuration is now camelCase and grouped, and the entry point is `src/index.js`.
+- `MAIN_DIR` -> `storage.dir`
+- `CRAWLER.INITIAL_URLS` -> `startUrls`
+- `CRAWLER.INCLUDE_URLS` -> `include`
+- `CRAWLER.EXCLUDE_PATTERNS` -> `exclude`
+- `CRAWLER.ALLOWED_CONTENT_TYPES` -> `allowedContentTypes`
+- `CRAWLER.DOM_ELEMENTS_REMOVE` -> `extract.removeSelectors`
+- `CRAWLER.DYNAMIC_CRAWLING: true` -> `fetcher: 'browser'`
+- `REQUEST_TIMEOUT` / `MAX_REDIRECTS` / `MAX_CONTENT_LENGTH` -> `request.*`
+- `MAX_RETRIES` / `RETRY_STATUS_CODES` / `CRAWL_ERROR_RETRY_DELAY_MS` -> `retry.{max,statusCodes,delay}`
+- `CRAWL_RATE_LIMIT_FALLBACK_DELAY_MS` / `EXIT_ON_RATE_LIMIT` / `EXIT_CODE_RATE_LIMIT` -> `rateLimit.*`
+- `CRAWL_DELAY_MS` -> `crawl.delay`
+- `DATA_FORMATTER.CATEGORISED_PATHS` -> `output.routes`
+- `DATA_FORMATTER.EXCLUDED_PATTERNS` -> `output.exclude`
+New in 2.0: `crawl.concurrency`, `crawl.maxDepth`, `crawl.resetOnComplete`, `output.format`, pluggable `fetcher`, and lifecycle hooks. Formatted output is now real JSON by default (1.x wrote `url content` text lines).

package/src/config/defaults.js ADDED Viewed

@@ -0,0 +1,97 @@
+/**
+ * Default Scraply configuration. Every value here can be overridden by the
+ * object passed to `createCrawler()` / `scraply()`. Durations are in milliseconds.
+ *
+ * @type {import('../index.js').ScraplyConfig}
+ */
+export const DEFAULT_CONFIG = {
+  // URLs the crawl is seeded with.
+  startUrls: ['https://crawler-test.com/'],
+  // Which discovered links are allowed into the queue. Each entry is either an
+  // absolute URL prefix (e.g. 'https://site.com/blog') or a RegExp. Empty means
+  // "default to startUrls".
+  include: [],
+  // Links matching any of these (string prefix or RegExp) are never queued.
+  exclude: [
+    /\.(zip|rar|webp|png|jpg|jpeg|gif|mp3|mp4|pdf|css|js|svg|ico|eot|ttf|woff|woff2|otf|webm|ogg|wav|flac|m4a|mkv|mov|avi|wmv|flv|swf|exe|msi|dmg|iso|bin)$/i
+  ],
+  // Only responses whose Content-Type includes one of these are parsed.
+  allowedContentTypes: ['text/html'],
+  // 'http' (native fetch), 'browser' (Puppeteer) or a custom Fetcher instance.
+  fetcher: 'http',
+  // 'silent' | 'error' | 'warn' | 'info' | 'debug'
+  logLevel: 'info',
+  storage: {
+    dir: 'dataset'
+  },
+  request: {
+    timeout: 10000,
+    maxRedirects: 5,
+    maxContentLength: 20 * 1024 * 1024,
+    userAgent: 'Mozilla/5.0 (compatible; Scraply/2.0; +https://www.npmjs.com/package/scraply)'
+  },
+  retry: {
+    max: 1,
+    statusCodes: [408, 500, 502, 503, 504],
+    delay: 1000
+  },
+  rateLimit: {
+    fallbackDelay: 60000,
+    exitOnLimit: true,
+    exitCode: 10
+  },
+  crawl: {
+    concurrency: 5,
+    delay: 200, // minimum spacing between requests to the same host
+    maxDepth: Infinity,
+    resetOnComplete: true
+  },
+  extract: {
+    removeSelectors: [
+      'script',
+      'noscript',
+      'style',
+      'meta',
+      'link',
+      'svg',
+      'path',
+      'img',
+      'input',
+      'textarea',
+      'embed',
+      'object',
+      'iframe',
+      'nav',
+      'header',
+      'footer',
+      'aside',
+      'button',
+      '[aria-modal]',
+      '[role="dialog"]',
+      '[role="alert"]',
+      '[role="banner"]',
+      '[role="form"]',
+      '[role="navigation"]',
+      '[role="search"]'
+    ]
+  },
+  output: {
+    format: 'json', // 'json' | 'jsonl' | 'lines'
+    exclude: [],
+    routes: {
+      'https://crawler-test.com': { '*': 'general.json' }
+    }
+  }
+};

package/src/config/load.js ADDED Viewed

@@ -0,0 +1,39 @@
+import path from 'node:path';
+import { DEFAULT_CONFIG } from './defaults.js';
+const isPlainObject = (value) =>
+  value !== null && typeof value === 'object' && !Array.isArray(value) && !(value instanceof RegExp);
+const deepMerge = (target, source) => {
+  const merged = { ...target };
+  for (const [key, value] of Object.entries(source)) {
+    if (isPlainObject(value) && isPlainObject(target[key])) {
+      merged[key] = deepMerge(target[key], value);
+    } else if (value !== undefined) {
+      merged[key] = value;
+    }
+  }
+  return merged;
+};
+/**
+ * Merges a user config over the defaults and derives the storage paths.
+ * @param {import('../index.js').ScraplyConfig} [userConfig]
+ * @returns {import('../index.js').ResolvedConfig}
+ */
+export const loadConfig = (userConfig = {}) => {
+  const config = deepMerge(DEFAULT_CONFIG, userConfig);
+  const { dir } = config.storage;
+  config.storage.queuePath = path.posix.join(dir, 'queue.json');
+  config.storage.crawledDir = path.posix.join(dir, 'crawled');
+  config.storage.formattedDir = path.posix.join(dir, 'formatted');
+  if (!config.include?.length) {
+    config.include = [...config.startUrls];
+  }
+  return config;
+};

package/src/core/pipeline.js ADDED Viewed

@@ -0,0 +1,61 @@
+import { URL } from 'node:url';
+import { delay } from '../util/delay.js';
+/**
+ * Drains the queue with a fixed-size worker pool. Requests to the same host are
+ * spaced by `perHostDelay` for politeness, while different hosts run in parallel.
+ * Workers stop when the queue is drained and nothing is in flight, or when
+ * `isStopped()` becomes true.
+ *
+ * @param {Object} deps
+ * @param {import('./queue.js').QueueManager} deps.queue
+ * @param {number} deps.concurrency
+ * @param {number} deps.perHostDelay
+ * @param {(entry: import('./queue.js').QueueEntry) => Promise<void>} deps.processOne
+ * @param {() => boolean} deps.isStopped
+ */
+export const runPipeline = async ({ queue, concurrency, perHostDelay, processOne, isStopped }) => {
+  const lastHostAt = new Map();
+  let active = 0;
+  const respectHostDelay = async (url) => {
+    if (perHostDelay <= 0) return;
+    let host;
+    try {
+      host = new URL(url).host;
+    } catch {
+      return;
+    }
+    const now = Date.now();
+    const scheduled = Math.max(now, (lastHostAt.get(host) ?? 0) + perHostDelay);
+    lastHostAt.set(host, scheduled);
+    const wait = scheduled - now;
+    if (wait > 0) await delay(wait);
+  };
+  const worker = async () => {
+    while (!isStopped()) {
+      const entry = queue.claimNext();
+      if (!entry) {
+        if (active === 0) return; // queue drained and nothing can enqueue more
+        await delay(25);
+        continue;
+      }
+      active++;
+      try {
+        await respectHostDelay(entry.url);
+        await processOne(entry);
+      } finally {
+        active--;
+      }
+    }
+  };
+  const workers = Array.from({ length: Math.max(concurrency, 1) }, () => worker());
+  await Promise.all(workers);
+};

package/src/core/queue.js ADDED Viewed

@@ -0,0 +1,131 @@
+import { loadJSON, saveJSON, deletePath } from '../storage/files.js';
+/**
+ * @typedef {Object} QueueEntry
+ * @property {string} url
+ * @property {string|null} file     - path to the saved crawled file, or null
+ * @property {number|null} status   - last HTTP status
+ * @property {string|null} error    - error message, or null
+ * @property {string|null} referrer - URL this entry was discovered on
+ * @property {number} depth
+ */
+const isProcessed = (entry) => entry.file !== null || entry.error !== null;
+/**
+ * Owns the crawl queue: dedup, depth limiting, status tracking and durable
+ * checkpointing. Persistence is debounced so a high-concurrency crawl does not
+ * rewrite the queue file on every single URL.
+ */
+export class QueueManager {
+  /** @param {{ config: import('../index.js').ResolvedConfig, logger: any }} deps */
+  constructor({ config, logger }) {
+    this.config = config;
+    this.logger = logger;
+    this.path = config.storage.queuePath;
+    this.maxDepth = config.crawl.maxDepth;
+    /** @type {QueueEntry[]} */
+    this.entries = [];
+    /** @type {Set<string>} */
+    this.index = new Set();
+    /** @type {QueueEntry[]} */
+    this._pending = [];
+    this._cursor = 0;
+    this._dirty = false;
+    this._timer = null;
+    this._persistInterval = 1000;
+  }
+  /** Loads any previously persisted queue and rebuilds the in-memory indexes. */
+  load() {
+    this.entries = loadJSON(this.path, []) ?? [];
+    this.index = new Set(this.entries.map((entry) => entry.url));
+    this._pending = this.entries.filter((entry) => !isProcessed(entry));
+    this._cursor = 0;
+    return this.entries;
+  }
+  /** Replaces the queue with a fresh set of start URLs. */
+  seed(urls) {
+    for (const url of urls) this.add(url, { depth: 0, referrer: null });
+    this.flush();
+  }
+  /**
+   * Adds a URL if it is new and within the depth limit.
+   * @returns {boolean} whether the URL was added
+   */
+  add(url, { depth = 0, referrer = null } = {}) {
+    if (this.index.has(url) || depth > this.maxDepth) return false;
+    const entry = { url, file: null, status: null, error: null, referrer, depth };
+    this.index.add(url);
+    this.entries.push(entry);
+    this._pending.push(entry);
+    this._markDirty();
+    return true;
+  }
+  /** Returns the next unprocessed entry, or null when the queue is drained. */
+  claimNext() {
+    return this._cursor < this._pending.length ? this._pending[this._cursor++] : null;
+  }
+  markDone(entry, { file, status }) {
+    entry.file = file;
+    entry.status = status;
+    entry.error = null;
+    this._markDirty();
+  }
+  markError(entry, { error, status }) {
+    entry.error = error;
+    entry.status = status ?? null;
+    this._markDirty();
+  }
+  isAllProcessed() {
+    return this.entries.length > 0 && this.entries.every(isProcessed);
+  }
+  pendingCount() {
+    return this.entries.filter((entry) => !isProcessed(entry)).length;
+  }
+  crawledCount() {
+    return this.entries.filter((entry) => entry.file !== null).length;
+  }
+  errorCount() {
+    return this.entries.filter((entry) => entry.error !== null).length;
+  }
+  /** Clears in-memory state and removes the persisted queue file. */
+  reset() {
+    this.entries = [];
+    this.index = new Set();
+    this._pending = [];
+    this._cursor = 0;
+    this._dirty = false;
+    deletePath(this.path);
+  }
+  _markDirty() {
+    this._dirty = true;
+    if (this._timer) return;
+    this._timer = setTimeout(() => this.flush(), this._persistInterval);
+    if (typeof this._timer.unref === 'function') this._timer.unref();
+  }
+  /** Writes the queue to disk if it has unsaved changes. */
+  flush() {
+    if (this._timer) {
+      clearTimeout(this._timer);
+      this._timer = null;
+    }
+    if (!this._dirty) return;
+    saveJSON(this.path, this.entries);
+    this._dirty = false;
+  }
+}