npm - @d-zero/beholder - Versions diffs - 2.0.1 → 2.1.0 - Mend

@d-zero/beholder 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/CHANGELOG.md +6 -0
package/LICENSE +21 -0
package/README.md +4 -0
package/dist/debug.d.ts +9 -0
package/dist/debug.js +9 -0
package/dist/dom-evaluation.d.ts +109 -0
package/dist/dom-evaluation.js +273 -0
package/dist/index.d.ts +21 -0
package/dist/index.js +16 -0
package/dist/is-error.d.ts +8 -0
package/dist/is-error.js +10 -0
package/dist/keyword-check.d.ts +8 -0
package/dist/keyword-check.js +17 -0
package/dist/network-disconnection.d.ts +28 -0
package/dist/network-disconnection.js +30 -0
package/dist/parse-url.d.ts +14 -0
package/dist/parse-url.js +23 -0
package/dist/scraper.d.ts +41 -0
package/dist/scraper.js +712 -0
package/dist/types.d.ts +348 -0
package/dist/types.js +7 -0
package/package.json +5 -4
package/src/network-disconnection.spec.ts +68 -0
package/src/network-disconnection.ts +33 -0
package/src/scraper.ts +72 -13
package/src/types.ts +4 -2
package/tsconfig.tsbuildinfo +1 -0

package/dist/scraper.d.ts ADDED Viewed

@@ -0,0 +1,41 @@
+import type { ScraperEventTypes, ScraperOptions, ScrapeResult, ExURL } from './types.js';
+import type { Page } from 'puppeteer';
+import { TypedAwaitEventEmitter as EventEmitter } from '@d-zero/shared/typed-await-event-emitter';
+/**
+ * Page-level scraper that extracts data from a single browser page.
+ *
+ * The scraper returns results as values from `scrapeStart()` rather than
+ * emitting them as events. Only streaming events (changePhase, resourceResponse)
+ * are emitted for progress monitoring.
+ *
+ * The Puppeteer `Page` object is injected externally, and page lifecycle
+ * (including `page.close()`) is managed by the caller.
+ * @example
+ * ```ts
+ * const scraper = new Scraper();
+ * scraper.on('changePhase', (e) => console.log(e.name));
+ * const result = await scraper.scrapeStart(page, url, { isExternal: false });
+ * ```
+ */
+export default class Scraper extends EventEmitter<ScraperEventTypes> {
+    #private;
+    /** Number of retries for `@retryable`-decorated methods. Set per-scrape from options. */
+    retries?: number;
+    /**
+     * Begins the scraping process for a given URL on the provided Puppeteer page.
+     *
+     * Returns a `ScrapeResult` containing the outcome:
+     * - `type: "success"` with `pageData` on success
+     * - `type: "skipped"` with `ignored` details when the page is excluded
+     * - `type: "error"` with `error` details when scraping fails
+     *
+     * Sub-resources are collected via the `resourceResponse` event and
+     * included in the returned `ScrapeResult.resources`.
+     * @param page - The Puppeteer page instance to use for navigation and DOM evaluation.
+     * @param url - The extended URL to scrape.
+     * @param options - Optional scraper configuration overriding defaults.
+     * @param isSkip - When `true`, the page is immediately skipped without any network requests.
+     * @returns The scrape result containing the outcome and captured resources.
+     */
+    scrapeStart(page: Page, url: ExURL, options?: Partial<ScraperOptions>, isSkip?: boolean): Promise<ScrapeResult>;
+}