@d-zero/beholder 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,41 @@
1
+ import type { ScraperEventTypes, ScraperOptions, ScrapeResult, ExURL } from './types.js';
2
+ import type { Page } from 'puppeteer';
3
+ import { TypedAwaitEventEmitter as EventEmitter } from '@d-zero/shared/typed-await-event-emitter';
4
+ /**
5
+ * Page-level scraper that extracts data from a single browser page.
6
+ *
7
+ * The scraper returns results as values from `scrapeStart()` rather than
8
+ * emitting them as events. Only streaming events (changePhase, resourceResponse)
9
+ * are emitted for progress monitoring.
10
+ *
11
+ * The Puppeteer `Page` object is injected externally, and page lifecycle
12
+ * (including `page.close()`) is managed by the caller.
13
+ * @example
14
+ * ```ts
15
+ * const scraper = new Scraper();
16
+ * scraper.on('changePhase', (e) => console.log(e.name));
17
+ * const result = await scraper.scrapeStart(page, url, { isExternal: false });
18
+ * ```
19
+ */
20
+ export default class Scraper extends EventEmitter<ScraperEventTypes> {
21
+ #private;
22
+ /** Number of retries for `@retryable`-decorated methods. Set per-scrape from options. */
23
+ retries?: number;
24
+ /**
25
+ * Begins the scraping process for a given URL on the provided Puppeteer page.
26
+ *
27
+ * Returns a `ScrapeResult` containing the outcome:
28
+ * - `type: "success"` with `pageData` on success
29
+ * - `type: "skipped"` with `ignored` details when the page is excluded
30
+ * - `type: "error"` with `error` details when scraping fails
31
+ *
32
+ * Sub-resources are collected via the `resourceResponse` event and
33
+ * included in the returned `ScrapeResult.resources`.
34
+ * @param page - The Puppeteer page instance to use for navigation and DOM evaluation.
35
+ * @param url - The extended URL to scrape.
36
+ * @param options - Optional scraper configuration overriding defaults.
37
+ * @param isSkip - When `true`, the page is immediately skipped without any network requests.
38
+ * @returns The scrape result containing the outcome and captured resources.
39
+ */
40
+ scrapeStart(page: Page, url: ExURL, options?: Partial<ScraperOptions>, isSkip?: boolean): Promise<ScrapeResult>;
41
+ }