@purepageio/fetch-engines 0.2.12 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -32,6 +32,7 @@ This package provides a high-level abstraction, letting you focus on using the w
32
32
  - [API Reference](#api-reference)
33
33
  - [Stealth / Anti-Detection (`PlaywrightEngine`)](#stealth--anti-detection-playwrightengine)
34
34
  - [Error Handling](#error-handling)
35
+ - [Logging](#logging)
35
36
  - [Contributing](#contributing)
36
37
  - [License](#license)
37
38
 
@@ -106,8 +107,11 @@ main();
106
107
  ```typescript
107
108
  import { PlaywrightEngine } from "@purepageio/fetch-engines";
108
109
 
109
- // Engine configured to fetch HTML by default
110
- const engine = new PlaywrightEngine({ markdown: false });
110
+ // Engine configured to fetch HTML by default and pass custom launch arguments
111
+ const engine = new PlaywrightEngine({
112
+ markdown: false,
113
+ playwrightLaunchOptions: { args: ["--disable-gpu"] },
114
+ });
111
115
 
112
116
  async function main() {
113
117
  try {
@@ -191,19 +195,20 @@ The `PlaywrightEngine` accepts a `PlaywrightEngineConfig` object with the follow
191
195
 
192
196
  **General Options:**
193
197
 
194
- | Option | Type | Default | Description |
195
- | ----------------------- | --------- | -------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
196
- | `markdown` | `boolean` | `false` | If `true`, converts content (from Playwright or fallback) to Markdown. `contentType` will be `'markdown'`. Can be overridden per-request. |
197
- | `useHttpFallback` | `boolean` | `true` | If `true`, attempts a fast HTTP fetch before using Playwright. |
198
- | `useHeadedModeFallback` | `boolean` | `false` | If `true`, automatically retries specific failed domains in headed (visible) mode. |
199
- | `defaultFastMode` | `boolean` | `true` | If `true`, initially blocks non-essential resources and skips human simulation. Can be overridden per-request. |
200
- | `simulateHumanBehavior` | `boolean` | `true` | If `true` (and not `fastMode`), attempts basic human-like interactions. |
201
- | `concurrentPages` | `number` | `3` | Max number of pages to process concurrently within the engine queue. |
202
- | `maxRetries` | `number` | `3` | Max retry attempts for a failed fetch (excluding initial try). |
203
- | `retryDelay` | `number` | `5000` | Delay (ms) between retries. |
204
- | `cacheTTL` | `number` | `900000` | Cache Time-To-Live (ms). `0` disables caching. (15 mins default) |
205
- | `spaMode` | `boolean` | `false` | If `true`, enables Single Page Application mode. This typically bypasses `useHttpFallback`, forces `fastMode` to effectively `false`, uses more patient load conditions (e.g., network idle), and may apply `spaRenderDelayMs`. Recommended for JavaScript-heavy sites. |
206
- | `spaRenderDelayMs` | `number` | `0` | Explicit delay (ms) after page load events in `spaMode` to allow for client-side rendering. Only applies if `spaMode` is `true`. |
198
+ | Option | Type | Default | Description |
199
+ | ------------------------- | --------------- | ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
200
+ | `markdown` | `boolean` | `false` | If `true`, converts content (from Playwright or its internal HTTP fallback) to Markdown. `contentType` will be `'markdown'`. Can be overridden per-request. |
201
+ | `useHttpFallback` | `boolean` | `true` | If `true`, attempts a fast HTTP fetch before using Playwright. Ineffective if `spaMode` is `true`. |
202
+ | `useHeadedModeFallback` | `boolean` | `false` | If `true`, automatically retries specific failed Playwright attempts in headed (visible) mode. |
203
+ | `defaultFastMode` | `boolean` | `true` | If `true`, initially blocks non-essential resources and skips human simulation. Can be overridden per-request. Effectively `false` if `spaMode` is `true`. |
204
+ | `simulateHumanBehavior` | `boolean` | `true` | If `true` (and not `fastMode` or `spaMode`), attempts basic human-like interactions. |
205
+ | `concurrentPages` | `number` | `3` | Max number of pages to process concurrently within the engine queue. |
206
+ | `maxRetries` | `number` | `3` | Max retry attempts for a failed fetch (excluding initial try). |
207
+ | `retryDelay` | `number` | `5000` | Delay (ms) between retries. |
208
+ | `cacheTTL` | `number` | `900000` | Cache Time-To-Live (ms). `0` disables caching. (15 mins default) |
209
+ | `spaMode` | `boolean` | `false` | If `true`, enables Single Page Application mode. This typically bypasses `useHttpFallback`, effectively sets `fastMode` to `false`, uses more patient load conditions (e.g., network idle), and may apply `spaRenderDelayMs`. Recommended for JavaScript-heavy sites. |
210
+ | `spaRenderDelayMs` | `number` | `0` | Explicit delay (ms) after page load events in `spaMode` to allow for client-side rendering. Only applies if `spaMode` is `true`. |
211
+ | `playwrightLaunchOptions` | `LaunchOptions` | `undefined` | Optional Playwright launch options (from `playwright` package, e.g., `{ args: ['--some-flag'] }`) passed when a browser instance is created. Merged with internal defaults. |
207
212
 
208
213
  **Browser Pool Options (Passed to internal `PlaywrightBrowserPool`):**
209
214
 
@@ -220,14 +225,26 @@ The `PlaywrightEngine` accepts a `PlaywrightEngineConfig` object with the follow
220
225
 
221
226
  ### HybridEngine
222
227
 
223
- The `HybridEngine` constructor accepts a single optional argument which uses the **`PlaywrightEngineConfig`** structure (see the `PlaywrightEngine` tables above). These options configure the underlying engines where applicable:
228
+ The `HybridEngine` constructor accepts `PlaywrightEngineConfig` options. These settings configure the underlying engines and the hybrid strategy:
224
229
 
225
- - Options like `maxRetries`, `cacheTTL`, `proxy`, `maxBrowsers`, `spaMode`, `spaRenderDelayMs`, etc., are primarily passed to the internal `PlaywrightEngine` or used by `HybridEngine` to decide its strategy.
226
- - The `markdown` setting in the constructor (`boolean`, default: `false`) applies to **both** internal engines by default.
227
- - The `spaMode` setting in the constructor (`boolean`, default: `false`) configures the default SPA behavior for the `HybridEngine`. If `spaMode` is true, the `HybridEngine` will attempt to detect if the `FetchEngine` result is an SPA shell (e.g., empty root div, noscript tag). If so, it will automatically fallback to `PlaywrightEngine` (with `spaMode` active) even if `FetchEngine` returned a 200 status.
228
- - If you provide `markdown: true` or `spaMode: true` in the `options` object when calling `fetchHTML`, this override is handled as follows:
229
- - For `markdown`: Only applies if a fallback to `PlaywrightEngine` is necessary or if `FetchEngine` succeeded but an SPA shell was detected in `spaMode` (forcing Playwright). The `FetchEngine` part (if its result is used) will always use the `markdown` setting provided in the `HybridEngine` constructor.
230
- - For `spaMode`: This directly controls the `HybridEngine`'s SPA shell detection and informs the `PlaywrightEngine` if a fallback occurs.
230
+ - **Constructor `markdown` option:**
231
+ - Sets the default Markdown conversion for the internal `FetchEngine`. This `FetchEngine` instance **does not** react to per-request `markdown` overrides.
232
+ - Sets the default for the internal `PlaywrightEngine`.
233
+ - **Constructor `spaMode` option:**
234
+ - Sets the default SPA mode for `HybridEngine`. If `true`, `HybridEngine` checks `FetchEngine`'s output for SPA shell characteristics. If an SPA shell is detected, it forces a fallback to `PlaywrightEngine` (which will also run in SPA mode).
235
+ - Sets the default for the internal `PlaywrightEngine`.
236
+ - **Other `PlaywrightEngineConfig` options** (e.g., `maxRetries`, `cacheTTL`, `playwrightLaunchOptions`, pool settings) are primarily passed to and used by the internal `PlaywrightEngine`.
237
+
238
+ **Per-request `options` in `HybridEngine.fetchHTML(url, options)`:**
239
+
240
+ - **`options.markdown` (`boolean`):**
241
+ - If `FetchEngine` succeeds and its content is used (i.e., not an SPA shell when `spaMode` is active), this per-request `markdown` option is **ignored**. The content's format is determined by the `FetchEngine`'s constructor `markdown` setting.
242
+ - If `HybridEngine` falls back to `PlaywrightEngine` (due to `FetchEngine` failure or SPA shell detection), this per-request `markdown` option **overrides** the `PlaywrightEngine`'s default and determines if its output is Markdown.
243
+ - **`options.spaMode` (`boolean`):**
244
+ - Overrides the `HybridEngine`'s default SPA mode behavior for this specific request (affecting SPA shell detection and potential fallback to `PlaywrightEngine`).
245
+ - If `PlaywrightEngine` is used, this option also overrides its default SPA mode.
246
+ - **`options.fastMode` (`boolean`):**
247
+ - If `PlaywrightEngine` is used, this option overrides its `defaultFastMode` setting. It has no effect on `FetchEngine`.
231
248
 
232
249
  ```typescript
233
250
  // Example: HybridEngine with SPA mode enabled by default
@@ -295,67 +312,72 @@ Errors during fetching are typically thrown as instances of `FetchError` (or its
295
312
  - `originalError` (`Error | undefined`): The underlying error that caused this fetch error (e.g., a Playwright error object).
296
313
  - `statusCode` (`number | undefined`): The HTTP status code, if relevant (especially for `FetchEngineHttpError`).
297
314
 
298
- Common error scenarios include:
299
-
300
- - Network issues (DNS resolution failure, connection refused).
301
- - HTTP errors (4xx client errors, 5xx server errors) -> `FetchEngineHttpError` from `FetchEngine` or potentially wrapped `FetchError` from `PlaywrightEngine`.
302
- - Non-HTML content type received -> `FetchError` with code `ERR_NON_HTML_CONTENT` from `FetchEngine`.
303
- - Playwright navigation timeouts -> `FetchError` wrapping Playwright error, often with code `ERR_NAVIGATION_TIMEOUT`.
304
- - Proxy connection errors.
305
- - Page crashes within Playwright.
306
- - Errors thrown by the browser pool (e.g., failure to launch browser).
315
+ Common `FetchError` codes and scenarios:
316
+
317
+ - **`ERR_HTTP_ERROR`**: Thrown by `FetchEngine` for HTTP status codes >= 400. `error.statusCode` will be set.
318
+ - **`ERR_NON_HTML_CONTENT`**: Thrown by `FetchEngine` if the content type is not HTML and `markdown` conversion is not requested.
319
+ - **`ERR_PLAYWRIGHT_OPERATION`**: A general error from `PlaywrightEngine` indicating a failure during a Playwright operation (e.g., page acquisition, navigation, interaction). The `originalError` property will often contain the specific Playwright error.
320
+ - **`ERR_NAVIGATION`**: Often seen as part of `ERR_PLAYWRIGHT_OPERATION`'s message or in `originalError` when a Playwright navigation fails (e.g., timeout, SSL error).
321
+ - **`ERR_MARKDOWN_CONVERSION_NON_HTML`**: Thrown by `PlaywrightEngine` (or `HybridEngine` if falling back to Playwright) if `markdown: true` is requested for a non-HTML content type (e.g., XML, JSON).
322
+ - **`ERR_UNSUPPORTED_RAW_CONTENT_TYPE`**: Thrown by `PlaywrightEngine` if `markdown: false` is requested for a content type it doesn't support for direct fetching (e.g., images, applications). Currently, it primarily supports `text/*` and `application/json`, `application/xml` like types when `markdown: false`.
323
+ - **`ERR_CACHE_ERROR`**: Indicates an issue with cache read/write operations.
324
+ - **`ERR_PROXY_CONFIG_ERROR`**: Problem with proxy configuration.
325
+ - **`ERR_BROWSER_POOL_EXHAUSTED`**: If the browser pool cannot provide a page (e.g. max browsers reached and all are busy beyond timeout).
326
+ - **Other Scenarios (often wrapped by `ERR_PLAYWRIGHT_OPERATION` or a generic `FetchError`):**
327
+ - Network issues (DNS resolution, connection refused).
328
+ - Proxy connection failures.
329
+ - Page crashes or context/browser disconnections within Playwright.
330
+ - Failures during browser launch or management by the pool.
307
331
 
308
332
  The `HTMLFetchResult` object may also contain an `error` property if the final fetch attempt failed after all retries but an earlier attempt (within retries) might have produced some intermediate (potentially unusable) result data. It's generally best to rely on the thrown error for failure handling.
309
333
 
310
334
  **Example:**
311
335
 
312
336
  ```typescript
313
- import { FetchEngine, FetchError } from "@purepageio/fetch-engines";
337
+ import { PlaywrightEngine, FetchError } from "@purepageio/fetch-engines";
314
338
 
315
- const engine = new FetchEngine();
339
+ // Example using PlaywrightEngine to illustrate more complex error handling
340
+ const engine = new PlaywrightEngine({ useHttpFallback: false, maxRetries: 1 });
316
341
 
317
342
  async function fetchWithHandling(url: string) {
318
343
  try {
319
344
  const result = await engine.fetchHTML(url);
320
- // Note: result.error is less common, primary errors are thrown.
321
345
  if (result.error) {
322
- console.error(`Fetch for ${url} reported error after retries: ${result.error.message}`);
323
- } else {
324
- console.log(`Success for ${url}! Content type: ${result.contentType}`);
325
- // Use result.content
346
+ console.warn(`Fetch for ${url} included non-critical error after retries: ${result.error.message}`);
326
347
  }
348
+ console.log(`Success for ${url}! Title: ${result.title}, Content type: ${result.contentType}`);
349
+ // Use result.content
327
350
  } catch (error) {
328
- console.error(`Fetch failed entirely for ${url}:`);
351
+ console.error(`Fetch failed for ${url}:`);
329
352
  if (error instanceof FetchError) {
330
- // Handle specific FetchError codes
331
- switch (error.code) {
332
- case "ERR_HTTP_ERROR":
333
- console.error(` HTTP Error: Status ${error.statusCode} - ${error.message}`);
334
- break;
335
- case "ERR_NON_HTML_CONTENT":
336
- console.error(` Wrong Content Type: ${error.message}`);
337
- break;
338
- // Add other specific codes as needed
339
- default:
340
- console.error(` FetchError (${error.code || "UNKNOWN"}): ${error.message}`);
341
- break;
353
+ console.error(` Error Code: ${error.code || "N/A"}`);
354
+ console.error(` Message: ${error.message}`);
355
+ if (error.statusCode) {
356
+ console.error(` Status Code: ${error.statusCode}`);
342
357
  }
343
358
  if (error.originalError) {
344
- console.error(` Original Error: ${error.originalError.message}`);
359
+ console.error(` Original Error: ${error.originalError.name} - ${error.originalError.message}`);
360
+ }
361
+ // Example of specific handling:
362
+ if (error.code === "ERR_PLAYWRIGHT_OPERATION") {
363
+ console.error(" Hint: This was a Playwright operation failure. Check Playwright logs or originalError.");
345
364
  }
346
365
  } else if (error instanceof Error) {
347
- // Handle generic JavaScript errors
348
366
  console.error(` Generic Error: ${error.message}`);
349
367
  } else {
350
- // Handle unexpected throw types
351
- console.error(` Unknown error occurred.`);
368
+ console.error(` Unknown error occurred: ${String(error)}`);
352
369
  }
353
370
  }
354
371
  }
355
372
 
356
- fetchWithHandling("https://example.com");
357
- fetchWithHandling("https://httpbin.org/status/404"); // Example causing HTTP error
358
- fetchWithHandling("https://httpbin.org/image/png"); // Example causing non-HTML error
373
+ async function runExamples() {
374
+ await fetchWithHandling("https://nonexistentdomain.example.com"); // Likely DNS or navigation error
375
+ await fetchWithHandling("https://example.com/non_html_resource.json"); // Test with actual JSON URL if available
376
+ // or a site known to cause Playwright issues for a demo.
377
+ await engine.cleanup(); // Important for PlaywrightEngine
378
+ }
379
+
380
+ runExamples();
359
381
  ```
360
382
 
361
383
  ## Logging
@@ -7,6 +7,7 @@ export declare class HybridEngine implements IEngine {
7
7
  private readonly fetchEngine;
8
8
  private readonly playwrightEngine;
9
9
  private readonly config;
10
+ private readonly playwrightOnlyPatterns;
10
11
  constructor(config?: PlaywrightEngineConfig);
11
12
  private _isSpaShell;
12
13
  fetchHTML(url: string, options?: FetchOptions): Promise<HTMLFetchResult>;
@@ -1 +1 @@
1
- {"version":3,"file":"HybridEngine.d.ts","sourceRoot":"","sources":["../src/HybridEngine.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAC5C,OAAO,KAAK,EAAE,eAAe,EAAE,sBAAsB,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAExG;;GAEG;AACH,qBAAa,YAAa,YAAW,OAAO;IAC1C,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAc;IAC1C,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAmB;IACpD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAyB;gBAEpC,MAAM,GAAE,sBAA2B;IAS/C,OAAO,CAAC,WAAW;IAkBb,SAAS,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,YAAiB,GAAG,OAAO,CAAC,eAAe,CAAC;IAoDlF;;OAEG;IACH,UAAU,IAAI,cAAc,EAAE;IAI9B;;OAEG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAM/B"}
1
+ {"version":3,"file":"HybridEngine.d.ts","sourceRoot":"","sources":["../src/HybridEngine.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAC5C,OAAO,KAAK,EAAE,eAAe,EAAE,sBAAsB,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAExG;;GAEG;AACH,qBAAa,YAAa,YAAW,OAAO;IAC1C,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAc;IAC1C,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAmB;IACpD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAyB;IAChD,OAAO,CAAC,QAAQ,CAAC,sBAAsB,CAAsB;gBAEjD,MAAM,GAAE,sBAA2B;IAU/C,OAAO,CAAC,WAAW;IAkBb,SAAS,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,YAAiB,GAAG,OAAO,CAAC,eAAe,CAAC;IA+DlF;;OAEG;IACH,UAAU,IAAI,cAAc,EAAE;IAI9B;;OAEG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAM/B"}
@@ -7,6 +7,7 @@ export class HybridEngine {
7
7
  fetchEngine;
8
8
  playwrightEngine;
9
9
  config; // Store config for potential per-request PW overrides
10
+ playwrightOnlyPatterns;
10
11
  constructor(config = {}) {
11
12
  // Pass relevant config parts to each engine
12
13
  // FetchEngine only takes markdown option from the shared config
@@ -14,6 +15,7 @@ export class HybridEngine {
14
15
  this.fetchEngine = new FetchEngine({ markdown: config.markdown });
15
16
  this.playwrightEngine = new PlaywrightEngine(config);
16
17
  this.config = config; // Store for merging later
18
+ this.playwrightOnlyPatterns = config.playwrightOnlyPatterns || [];
17
19
  }
18
20
  _isSpaShell(htmlContent) {
19
21
  if (!htmlContent || htmlContent.length < 150) {
@@ -42,15 +44,24 @@ export class HybridEngine {
42
44
  : this.config.markdown !== undefined
43
45
  ? this.config.markdown
44
46
  : false;
45
- // Prepare options for PlaywrightEngine, to be used in fallback scenarios
46
- // The order of spread and explicit assignment ensures that effectiveSpaMode and effectiveMarkdown (HybridEngine's interpretation)
47
- // are what PlaywrightEngine receives for these specific fields, while other configs are passed through.
47
+ // Prepare options for PlaywrightEngine, to be used in fallback scenarios or direct calls
48
48
  const playwrightOptions = {
49
49
  ...this.config, // Start with base config given to HybridEngine (e.g. spaRenderDelayMs)
50
50
  ...options, // Apply all per-request overrides first
51
51
  markdown: effectiveMarkdown, // Then ensure HybridEngine's resolved markdown is set
52
52
  spaMode: effectiveSpaMode, // Then ensure HybridEngine's resolved spaMode is set
53
53
  };
54
+ // Check playwrightOnlyPatterns first
55
+ for (const pattern of this.playwrightOnlyPatterns) {
56
+ if (typeof pattern === "string" && url.includes(pattern)) {
57
+ console.warn(`HybridEngine: URL ${url} matches string pattern "${pattern}". Using PlaywrightEngine directly.`);
58
+ return this.playwrightEngine.fetchHTML(url, playwrightOptions);
59
+ }
60
+ else if (pattern instanceof RegExp && pattern.test(url)) {
61
+ console.warn(`HybridEngine: URL ${url} matches regex pattern "${pattern.toString()}". Using PlaywrightEngine directly.`);
62
+ return this.playwrightEngine.fetchHTML(url, playwrightOptions);
63
+ }
64
+ }
54
65
  try {
55
66
  const fetchResult = await this.fetchEngine.fetchHTML(url);
56
67
  // If FetchEngine succeeded AND spaMode is active, check if it's just a shell
@@ -1 +1 @@
1
- {"version":3,"file":"HybridEngine.js","sourceRoot":"","sources":["../src/HybridEngine.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAC/C,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAIzD;;GAEG;AACH,MAAM,OAAO,YAAY;IACN,WAAW,CAAc;IACzB,gBAAgB,CAAmB;IACnC,MAAM,CAAyB,CAAC,sDAAsD;IAEvG,YAAY,SAAiC,EAAE;QAC7C,4CAA4C;QAC5C,gEAAgE;QAChE,uGAAuG;QACvG,IAAI,CAAC,WAAW,GAAG,IAAI,WAAW,CAAC,EAAE,QAAQ,EAAE,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC;QAClE,IAAI,CAAC,gBAAgB,GAAG,IAAI,gBAAgB,CAAC,MAAM,CAAC,CAAC;QACrD,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,CAAC,0BAA0B;IAClD,CAAC;IAEO,WAAW,CAAC,WAAmB;QACrC,IAAI,CAAC,WAAW,IAAI,WAAW,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YAC7C,+CAA+C;YAC/C,iFAAiF;YACjF,IAAI,WAAW,CAAC,QAAQ,CAAC,YAAY,CAAC;gBAAE,OAAO,IAAI,CAAC;QACtD,CAAC;QACD,2BAA2B;QAC3B,IAAI,WAAW,CAAC,QAAQ,CAAC,YAAY,CAAC;YAAE,OAAO,IAAI,CAAC;QAEpD,mCAAmC;QACnC,IAAI,qDAAqD,CAAC,IAAI,CAAC,WAAW,CAAC;YAAE,OAAO,IAAI,CAAC;QAEzF,mDAAmD;QACnD,IAAI,sBAAsB,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,WAAW,CAAC;YAAE,OAAO,IAAI,CAAC;QAEhG,OAAO,KAAK,CAAC;IACf,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,GAAW,EAAE,UAAwB,EAAE;QACrD,oDAAoD;QACpD,gHAAgH;QAChH,MAAM,gBAAgB,GACpB,OAAO,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC;QACpH,MAAM,iBAAiB,GACrB,OAAO,CAAC,QAAQ,KAAK,SAAS;YAC5B,CAAC,CAAC,OAAO,CAAC,QAAQ;YAClB,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,KAAK,SAAS;gBAClC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ;gBACtB,CAAC,CAAC,KAAK,CAAC;QAEd,yEAAyE;QACzE,kIAAkI;QAClI,wGAAwG;QACxG,MAAM,iBAAiB,GAA6D;YAClF,GAAG,IAAI,CAAC,MAAM,EAAE,uEAAuE;YACvF,GAAG,OAAO,EAAE,wCAAwC;YACpD,QAAQ,EAAE,iBAAiB,EAAE,sDAAsD;YACnF,OAAO,EAAE,gBAAgB,EAAE,qDAAqD;SACjF,CAAC;QAEF,IAAI,CAAC;YACH,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;YAE1D,6EAA6E;YAC7E,IAAI,gBAAgB,IAAI,WAAW,IAAI,WAAW,CAAC,OAAO,EAAE,CAAC;gBAC3D,IAAI,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,OAAO,CAAC,EAAE,CAAC;oBAC1C,OAAO,CAAC,IAAI,CACV,2DAA2D,GAAG,wCAAwC,CACvG,CAAC;oBACF,yEAAyE;oBACzE,OAAO,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,GAAG,EAAE,iBAAiB,CAAC,CAAC;gBACjE,CAAC;YACH,CAAC;YACD,wFAAwF;YACxF,OAAO,WAAW,CAAC;QACrB,CAAC;QAAC,OAAO,UAAe,EAAE,CAAC;YACzB,OAAO,CAAC,IAAI,CACV,wCAAwC,GAAG,KAAK,UAAU,CAAC,OAAO,qCAAqC,CACxG,CAAC;YACF,IAAI,CAAC;gBACH,yEAAyE;gBACzE,MAAM,gBAAgB,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,GAAG,EAAE,iBAAiB,CAAC,CAAC;gBACvF,OAAO,gBAAgB,CAAC;YAC1B,CAAC;YAAC,OAAO,eAAoB,EAAE,CAAC;gBAC9B,OAAO,CAAC,KAAK,CAAC,2DAA2D,GAAG,KAAK,eAAe,CAAC,OAAO,EAAE,CAAC,CAAC;gBAC5G,MAAM,eAAe,CAAC,CAAC,8DAA8D;YACvF,CAAC;QACH,CAAC;IACH,CAAC;IAED;;OAEG;IACH,UAAU;QACR,OAAO,IAAI,CAAC,gBAAgB,CAAC,UAAU,EAAE,CAAC;IAC5C,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO;QACX,MAAM,OAAO,CAAC,UAAU,CAAC;YACvB,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,EAAE,yCAAyC;YACrE,IAAI,CAAC,gBAAgB,CAAC,OAAO,EAAE;SAChC,CAAC,CAAC;IACL,CAAC;CACF"}
1
+ {"version":3,"file":"HybridEngine.js","sourceRoot":"","sources":["../src/HybridEngine.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAC/C,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAIzD;;GAEG;AACH,MAAM,OAAO,YAAY;IACN,WAAW,CAAc;IACzB,gBAAgB,CAAmB;IACnC,MAAM,CAAyB,CAAC,sDAAsD;IACtF,sBAAsB,CAAsB;IAE7D,YAAY,SAAiC,EAAE;QAC7C,4CAA4C;QAC5C,gEAAgE;QAChE,uGAAuG;QACvG,IAAI,CAAC,WAAW,GAAG,IAAI,WAAW,CAAC,EAAE,QAAQ,EAAE,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC;QAClE,IAAI,CAAC,gBAAgB,GAAG,IAAI,gBAAgB,CAAC,MAAM,CAAC,CAAC;QACrD,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,CAAC,0BAA0B;QAChD,IAAI,CAAC,sBAAsB,GAAG,MAAM,CAAC,sBAAsB,IAAI,EAAE,CAAC;IACpE,CAAC;IAEO,WAAW,CAAC,WAAmB;QACrC,IAAI,CAAC,WAAW,IAAI,WAAW,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YAC7C,+CAA+C;YAC/C,iFAAiF;YACjF,IAAI,WAAW,CAAC,QAAQ,CAAC,YAAY,CAAC;gBAAE,OAAO,IAAI,CAAC;QACtD,CAAC;QACD,2BAA2B;QAC3B,IAAI,WAAW,CAAC,QAAQ,CAAC,YAAY,CAAC;YAAE,OAAO,IAAI,CAAC;QAEpD,mCAAmC;QACnC,IAAI,qDAAqD,CAAC,IAAI,CAAC,WAAW,CAAC;YAAE,OAAO,IAAI,CAAC;QAEzF,mDAAmD;QACnD,IAAI,sBAAsB,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,WAAW,CAAC;YAAE,OAAO,IAAI,CAAC;QAEhG,OAAO,KAAK,CAAC;IACf,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,GAAW,EAAE,UAAwB,EAAE;QACrD,oDAAoD;QACpD,gHAAgH;QAChH,MAAM,gBAAgB,GACpB,OAAO,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC;QACpH,MAAM,iBAAiB,GACrB,OAAO,CAAC,QAAQ,KAAK,SAAS;YAC5B,CAAC,CAAC,OAAO,CAAC,QAAQ;YAClB,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,KAAK,SAAS;gBAClC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ;gBACtB,CAAC,CAAC,KAAK,CAAC;QAEd,yFAAyF;QACzF,MAAM,iBAAiB,GAA6D;YAClF,GAAG,IAAI,CAAC,MAAM,EAAE,uEAAuE;YACvF,GAAG,OAAO,EAAE,wCAAwC;YACpD,QAAQ,EAAE,iBAAiB,EAAE,sDAAsD;YACnF,OAAO,EAAE,gBAAgB,EAAE,qDAAqD;SACjF,CAAC;QAEF,qCAAqC;QACrC,KAAK,MAAM,OAAO,IAAI,IAAI,CAAC,sBAAsB,EAAE,CAAC;YAClD,IAAI,OAAO,OAAO,KAAK,QAAQ,IAAI,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;gBACzD,OAAO,CAAC,IAAI,CAAC,qBAAqB,GAAG,4BAA4B,OAAO,qCAAqC,CAAC,CAAC;gBAC/G,OAAO,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,GAAG,EAAE,iBAAiB,CAAC,CAAC;YACjE,CAAC;iBAAM,IAAI,OAAO,YAAY,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC1D,OAAO,CAAC,IAAI,CACV,qBAAqB,GAAG,2BAA2B,OAAO,CAAC,QAAQ,EAAE,qCAAqC,CAC3G,CAAC;gBACF,OAAO,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,GAAG,EAAE,iBAAiB,CAAC,CAAC;YACjE,CAAC;QACH,CAAC;QAED,IAAI,CAAC;YACH,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;YAE1D,6EAA6E;YAC7E,IAAI,gBAAgB,IAAI,WAAW,IAAI,WAAW,CAAC,OAAO,EAAE,CAAC;gBAC3D,IAAI,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,OAAO,CAAC,EAAE,CAAC;oBAC1C,OAAO,CAAC,IAAI,CACV,2DAA2D,GAAG,wCAAwC,CACvG,CAAC;oBACF,yEAAyE;oBACzE,OAAO,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,GAAG,EAAE,iBAAiB,CAAC,CAAC;gBACjE,CAAC;YACH,CAAC;YACD,wFAAwF;YACxF,OAAO,WAAW,CAAC;QACrB,CAAC;QAAC,OAAO,UAAe,EAAE,CAAC;YACzB,OAAO,CAAC,IAAI,CACV,wCAAwC,GAAG,KAAK,UAAU,CAAC,OAAO,qCAAqC,CACxG,CAAC;YACF,IAAI,CAAC;gBACH,yEAAyE;gBACzE,MAAM,gBAAgB,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,GAAG,EAAE,iBAAiB,CAAC,CAAC;gBACvF,OAAO,gBAAgB,CAAC;YAC1B,CAAC;YAAC,OAAO,eAAoB,EAAE,CAAC;gBAC9B,OAAO,CAAC,KAAK,CAAC,2DAA2D,GAAG,KAAK,eAAe,CAAC,OAAO,EAAE,CAAC,CAAC;gBAC5G,MAAM,eAAe,CAAC,CAAC,8DAA8D;YACvF,CAAC;QACH,CAAC;IACH,CAAC;IAED;;OAEG;IACH,UAAU;QACR,OAAO,IAAI,CAAC,gBAAgB,CAAC,UAAU,EAAE,CAAC;IAC5C,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO;QACX,MAAM,OAAO,CAAC,UAAU,CAAC;YACvB,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,EAAE,yCAAyC;YACrE,IAAI,CAAC,gBAAgB,CAAC,OAAO,EAAE;SAChC,CAAC,CAAC;IACL,CAAC;CACF"}
@@ -59,13 +59,40 @@ export declare class PlaywrightEngine implements IEngine {
59
59
  markdown?: boolean;
60
60
  spaMode?: boolean;
61
61
  }): Promise<HTMLFetchResult>;
62
+ /**
63
+ * Helper to check cache and potentially return a cached result.
64
+ * Handles logic for re-fetching if cache is stale or content type mismatch for markdown.
65
+ *
66
+ * @param url URL to check in cache
67
+ * @param currentConfig Current fetch configuration
68
+ * @returns Cached result or null if not found/needs re-fetch.
69
+ */
70
+ private _handleCacheCheck;
71
+ /**
72
+ * Attempts to fetch the URL using a simple HTTP GET request as a fallback.
73
+ *
74
+ * @param url The URL to fetch.
75
+ * @param currentConfig The current fetch configuration.
76
+ * @returns A Promise resolving to an HTMLFetchResult if successful, or null if fallback is skipped or a challenge page is encountered.
77
+ * @throws {FetchError} If the HTTP fallback itself fails with an unrecoverable error.
78
+ */
79
+ private _attemptHttpFallback;
80
+ /**
81
+ * Ensures the browser pool is initialized with the correct mode (headed/headless).
82
+ * Handles one retry attempt if the initial pool initialization fails.
83
+ *
84
+ * @param useHeadedMode Whether to initialize the pool in headed mode.
85
+ * @param currentConfig The current fetch configuration (for retryDelay).
86
+ * @returns A Promise that resolves when the pool is initialized, or rejects if initialization fails after retries.
87
+ * @throws {FetchError} If pool initialization fails after retries or if the pool is unavailable.
88
+ */
89
+ private _ensureBrowserPoolInitialized;
62
90
  /**
63
91
  * Internal recursive method to handle fetching with retries.
64
92
  *
65
93
  * @param url URL to fetch
66
94
  * @param currentConfig The merged configuration including markdown option
67
95
  * @param retryAttempt Current retry attempt number (starts at 0)
68
- * @param parentRetryCount Tracks retries related to pool initialization errors (starts at 0)
69
96
  * @returns Promise resolving to HTMLFetchResult
70
97
  */
71
98
  private _fetchRecursive;
@@ -1 +1 @@
1
- {"version":3,"file":"PlaywrightEngine.d.ts","sourceRoot":"","sources":["../src/PlaywrightEngine.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,cAAc,EAAE,sBAAsB,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AACxG,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAmB5C;;;;;;GAMG;AACH,qBAAa,gBAAiB,YAAW,OAAO;IAC9C,OAAO,CAAC,WAAW,CAAsC;IACzD,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAS;IAC/B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAsC;IAC5D,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAmC;IAG1D,OAAO,CAAC,uBAAuB,CAAkB;IACjD,OAAO,CAAC,iBAAiB,CAAkB;IAC3C,OAAO,CAAC,mBAAmB,CAA0B;IAGrD,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,cAAc,CAoBpC;IAEF;;;;;OAKG;gBACS,MAAM,GAAE,sBAA2B;IAM/C;;OAEG;YACW,qBAAqB;IAuCnC;;;OAGG;YACW,yBAAyB;IAmFvC,OAAO,CAAC,UAAU;IAalB;;OAEG;YACW,WAAW;IAazB;;OAEG;YACW,qBAAqB;IAqCnC;;OAEG;IACH,OAAO,CAAC,UAAU;IAUlB;;;;;;;;;OASG;IACG,SAAS,CACb,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,YAAY,GAAG;QAAE,QAAQ,CAAC,EAAE,OAAO,CAAC;QAAC,OAAO,CAAC,EAAE,OAAO,CAAA;KAAO,GACrE,OAAO,CAAC,eAAe,CAAC;IAa3B;;;;;;;;OAQG;YACW,eAAe;IAmI7B;;;OAGG;YACW,mBAAmB;YA4JnB,kBAAkB;IAmChC;;;;;OAKG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAe9B;;;OAGG;IACH,UAAU,IAAI,cAAc,EAAE;IAQ9B,OAAO,CAAC,mBAAmB;CAS5B"}
1
+ {"version":3,"file":"PlaywrightEngine.d.ts","sourceRoot":"","sources":["../src/PlaywrightEngine.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,cAAc,EAAE,sBAAsB,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AACxG,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAyC5C;;;;;;GAMG;AACH,qBAAa,gBAAiB,YAAW,OAAO;IAC9C,OAAO,CAAC,WAAW,CAAsC;IACzD,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAS;IAC/B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAsC;IAC5D,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAiC;IAGxD,OAAO,CAAC,uBAAuB,CAAkB;IACjD,OAAO,CAAC,iBAAiB,CAAkB;IAC3C,OAAO,CAAC,mBAAmB,CAA0B;IAGrD,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,cAAc,CAsBpC;IAEF;;;;;OAKG;gBACS,MAAM,GAAE,sBAA2B;IAM/C;;OAEG;YACW,qBAAqB;IAwCnC;;;OAGG;YACW,yBAAyB;IAiEvC,OAAO,CAAC,UAAU;IAalB;;OAEG;YACW,WAAW;IAazB;;OAEG;YACW,qBAAqB;IAwCnC;;OAEG;IACH,OAAO,CAAC,UAAU;IAUlB;;;;;;;;;OASG;IACG,SAAS,CACb,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,YAAY,GAAG;QAAE,QAAQ,CAAC,EAAE,OAAO,CAAC;QAAC,OAAO,CAAC,EAAE,OAAO,CAAA;KAAO,GACrE,OAAO,CAAC,eAAe,CAAC;IAc3B;;;;;;;OAOG;IACH,OAAO,CAAC,iBAAiB;IAmDzB;;;;;;;OAOG;YACW,oBAAoB;IAiClC;;;;;;;;OAQG;YACW,6BAA6B;IAmC3C;;;;;;;OAOG;YACW,eAAe;IAgH7B;;;OAGG;YACW,mBAAmB;YAqKnB,kBAAkB;IAyChC;;;;;OAKG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAoB9B;;;OAGG;IACH,UAAU,IAAI,cAAc,EAAE;IAQ9B,OAAO,CAAC,mBAAmB;CAS5B"}