@purepageio/fetch-engines 0.2.12 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +80 -58
- package/dist/HybridEngine.d.ts +1 -0
- package/dist/HybridEngine.d.ts.map +1 -1
- package/dist/HybridEngine.js +14 -3
- package/dist/HybridEngine.js.map +1 -1
- package/dist/PlaywrightEngine.d.ts +28 -1
- package/dist/PlaywrightEngine.d.ts.map +1 -1
- package/dist/PlaywrightEngine.js +220 -128
- package/dist/PlaywrightEngine.js.map +1 -1
- package/dist/browser/PlaywrightBrowserPool.d.ts +3 -1
- package/dist/browser/PlaywrightBrowserPool.d.ts.map +1 -1
- package/dist/browser/PlaywrightBrowserPool.js +319 -190
- package/dist/browser/PlaywrightBrowserPool.js.map +1 -1
- package/dist/constants.d.ts +29 -0
- package/dist/constants.d.ts.map +1 -0
- package/dist/constants.js +31 -0
- package/dist/constants.js.map +1 -0
- package/dist/types.d.ts +14 -1
- package/dist/types.d.ts.map +1 -1
- package/dist/utils/markdown-converter.d.ts +7 -0
- package/dist/utils/markdown-converter.d.ts.map +1 -1
- package/dist/utils/markdown-converter.js +155 -64
- package/dist/utils/markdown-converter.js.map +1 -1
- package/package.json +2 -1
package/README.md
CHANGED
|
@@ -32,6 +32,7 @@ This package provides a high-level abstraction, letting you focus on using the w
|
|
|
32
32
|
- [API Reference](#api-reference)
|
|
33
33
|
- [Stealth / Anti-Detection (`PlaywrightEngine`)](#stealth--anti-detection-playwrightengine)
|
|
34
34
|
- [Error Handling](#error-handling)
|
|
35
|
+
- [Logging](#logging)
|
|
35
36
|
- [Contributing](#contributing)
|
|
36
37
|
- [License](#license)
|
|
37
38
|
|
|
@@ -106,8 +107,11 @@ main();
|
|
|
106
107
|
```typescript
|
|
107
108
|
import { PlaywrightEngine } from "@purepageio/fetch-engines";
|
|
108
109
|
|
|
109
|
-
// Engine configured to fetch HTML by default
|
|
110
|
-
const engine = new PlaywrightEngine({
|
|
110
|
+
// Engine configured to fetch HTML by default and pass custom launch arguments
|
|
111
|
+
const engine = new PlaywrightEngine({
|
|
112
|
+
markdown: false,
|
|
113
|
+
playwrightLaunchOptions: { args: ["--disable-gpu"] },
|
|
114
|
+
});
|
|
111
115
|
|
|
112
116
|
async function main() {
|
|
113
117
|
try {
|
|
@@ -191,19 +195,20 @@ The `PlaywrightEngine` accepts a `PlaywrightEngineConfig` object with the follow
|
|
|
191
195
|
|
|
192
196
|
**General Options:**
|
|
193
197
|
|
|
194
|
-
| Option
|
|
195
|
-
|
|
|
196
|
-
| `markdown`
|
|
197
|
-
| `useHttpFallback`
|
|
198
|
-
| `useHeadedModeFallback`
|
|
199
|
-
| `defaultFastMode`
|
|
200
|
-
| `simulateHumanBehavior`
|
|
201
|
-
| `concurrentPages`
|
|
202
|
-
| `maxRetries`
|
|
203
|
-
| `retryDelay`
|
|
204
|
-
| `cacheTTL`
|
|
205
|
-
| `spaMode`
|
|
206
|
-
| `spaRenderDelayMs`
|
|
198
|
+
| Option | Type | Default | Description |
|
|
199
|
+
| ------------------------- | --------------- | ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
200
|
+
| `markdown` | `boolean` | `false` | If `true`, converts content (from Playwright or its internal HTTP fallback) to Markdown. `contentType` will be `'markdown'`. Can be overridden per-request. |
|
|
201
|
+
| `useHttpFallback` | `boolean` | `true` | If `true`, attempts a fast HTTP fetch before using Playwright. Ineffective if `spaMode` is `true`. |
|
|
202
|
+
| `useHeadedModeFallback` | `boolean` | `false` | If `true`, automatically retries specific failed Playwright attempts in headed (visible) mode. |
|
|
203
|
+
| `defaultFastMode` | `boolean` | `true` | If `true`, initially blocks non-essential resources and skips human simulation. Can be overridden per-request. Effectively `false` if `spaMode` is `true`. |
|
|
204
|
+
| `simulateHumanBehavior` | `boolean` | `true` | If `true` (and not `fastMode` or `spaMode`), attempts basic human-like interactions. |
|
|
205
|
+
| `concurrentPages` | `number` | `3` | Max number of pages to process concurrently within the engine queue. |
|
|
206
|
+
| `maxRetries` | `number` | `3` | Max retry attempts for a failed fetch (excluding initial try). |
|
|
207
|
+
| `retryDelay` | `number` | `5000` | Delay (ms) between retries. |
|
|
208
|
+
| `cacheTTL` | `number` | `900000` | Cache Time-To-Live (ms). `0` disables caching. (15 mins default) |
|
|
209
|
+
| `spaMode` | `boolean` | `false` | If `true`, enables Single Page Application mode. This typically bypasses `useHttpFallback`, effectively sets `fastMode` to `false`, uses more patient load conditions (e.g., network idle), and may apply `spaRenderDelayMs`. Recommended for JavaScript-heavy sites. |
|
|
210
|
+
| `spaRenderDelayMs` | `number` | `0` | Explicit delay (ms) after page load events in `spaMode` to allow for client-side rendering. Only applies if `spaMode` is `true`. |
|
|
211
|
+
| `playwrightLaunchOptions` | `LaunchOptions` | `undefined` | Optional Playwright launch options (from `playwright` package, e.g., `{ args: ['--some-flag'] }`) passed when a browser instance is created. Merged with internal defaults. |
|
|
207
212
|
|
|
208
213
|
**Browser Pool Options (Passed to internal `PlaywrightBrowserPool`):**
|
|
209
214
|
|
|
@@ -220,14 +225,26 @@ The `PlaywrightEngine` accepts a `PlaywrightEngineConfig` object with the follow
|
|
|
220
225
|
|
|
221
226
|
### HybridEngine
|
|
222
227
|
|
|
223
|
-
The `HybridEngine` constructor accepts
|
|
228
|
+
The `HybridEngine` constructor accepts `PlaywrightEngineConfig` options. These settings configure the underlying engines and the hybrid strategy:
|
|
224
229
|
|
|
225
|
-
-
|
|
226
|
-
-
|
|
227
|
-
-
|
|
228
|
-
-
|
|
229
|
-
-
|
|
230
|
-
-
|
|
230
|
+
- **Constructor `markdown` option:**
|
|
231
|
+
- Sets the default Markdown conversion for the internal `FetchEngine`. This `FetchEngine` instance **does not** react to per-request `markdown` overrides.
|
|
232
|
+
- Sets the default for the internal `PlaywrightEngine`.
|
|
233
|
+
- **Constructor `spaMode` option:**
|
|
234
|
+
- Sets the default SPA mode for `HybridEngine`. If `true`, `HybridEngine` checks `FetchEngine`'s output for SPA shell characteristics. If an SPA shell is detected, it forces a fallback to `PlaywrightEngine` (which will also run in SPA mode).
|
|
235
|
+
- Sets the default for the internal `PlaywrightEngine`.
|
|
236
|
+
- **Other `PlaywrightEngineConfig` options** (e.g., `maxRetries`, `cacheTTL`, `playwrightLaunchOptions`, pool settings) are primarily passed to and used by the internal `PlaywrightEngine`.
|
|
237
|
+
|
|
238
|
+
**Per-request `options` in `HybridEngine.fetchHTML(url, options)`:**
|
|
239
|
+
|
|
240
|
+
- **`options.markdown` (`boolean`):**
|
|
241
|
+
- If `FetchEngine` succeeds and its content is used (i.e., not an SPA shell when `spaMode` is active), this per-request `markdown` option is **ignored**. The content's format is determined by the `FetchEngine`'s constructor `markdown` setting.
|
|
242
|
+
- If `HybridEngine` falls back to `PlaywrightEngine` (due to `FetchEngine` failure or SPA shell detection), this per-request `markdown` option **overrides** the `PlaywrightEngine`'s default and determines if its output is Markdown.
|
|
243
|
+
- **`options.spaMode` (`boolean`):**
|
|
244
|
+
- Overrides the `HybridEngine`'s default SPA mode behavior for this specific request (affecting SPA shell detection and potential fallback to `PlaywrightEngine`).
|
|
245
|
+
- If `PlaywrightEngine` is used, this option also overrides its default SPA mode.
|
|
246
|
+
- **`options.fastMode` (`boolean`):**
|
|
247
|
+
- If `PlaywrightEngine` is used, this option overrides its `defaultFastMode` setting. It has no effect on `FetchEngine`.
|
|
231
248
|
|
|
232
249
|
```typescript
|
|
233
250
|
// Example: HybridEngine with SPA mode enabled by default
|
|
@@ -295,67 +312,72 @@ Errors during fetching are typically thrown as instances of `FetchError` (or its
|
|
|
295
312
|
- `originalError` (`Error | undefined`): The underlying error that caused this fetch error (e.g., a Playwright error object).
|
|
296
313
|
- `statusCode` (`number | undefined`): The HTTP status code, if relevant (especially for `FetchEngineHttpError`).
|
|
297
314
|
|
|
298
|
-
Common
|
|
299
|
-
|
|
300
|
-
-
|
|
301
|
-
-
|
|
302
|
-
-
|
|
303
|
-
-
|
|
304
|
-
-
|
|
305
|
-
-
|
|
306
|
-
-
|
|
315
|
+
Common `FetchError` codes and scenarios:
|
|
316
|
+
|
|
317
|
+
- **`ERR_HTTP_ERROR`**: Thrown by `FetchEngine` for HTTP status codes >= 400. `error.statusCode` will be set.
|
|
318
|
+
- **`ERR_NON_HTML_CONTENT`**: Thrown by `FetchEngine` if the content type is not HTML and `markdown` conversion is not requested.
|
|
319
|
+
- **`ERR_PLAYWRIGHT_OPERATION`**: A general error from `PlaywrightEngine` indicating a failure during a Playwright operation (e.g., page acquisition, navigation, interaction). The `originalError` property will often contain the specific Playwright error.
|
|
320
|
+
- **`ERR_NAVIGATION`**: Often seen as part of `ERR_PLAYWRIGHT_OPERATION`'s message or in `originalError` when a Playwright navigation fails (e.g., timeout, SSL error).
|
|
321
|
+
- **`ERR_MARKDOWN_CONVERSION_NON_HTML`**: Thrown by `PlaywrightEngine` (or `HybridEngine` if falling back to Playwright) if `markdown: true` is requested for a non-HTML content type (e.g., XML, JSON).
|
|
322
|
+
- **`ERR_UNSUPPORTED_RAW_CONTENT_TYPE`**: Thrown by `PlaywrightEngine` if `markdown: false` is requested for a content type it doesn't support for direct fetching (e.g., images, applications). Currently, it primarily supports `text/*` and `application/json`, `application/xml` like types when `markdown: false`.
|
|
323
|
+
- **`ERR_CACHE_ERROR`**: Indicates an issue with cache read/write operations.
|
|
324
|
+
- **`ERR_PROXY_CONFIG_ERROR`**: Problem with proxy configuration.
|
|
325
|
+
- **`ERR_BROWSER_POOL_EXHAUSTED`**: If the browser pool cannot provide a page (e.g. max browsers reached and all are busy beyond timeout).
|
|
326
|
+
- **Other Scenarios (often wrapped by `ERR_PLAYWRIGHT_OPERATION` or a generic `FetchError`):**
|
|
327
|
+
- Network issues (DNS resolution, connection refused).
|
|
328
|
+
- Proxy connection failures.
|
|
329
|
+
- Page crashes or context/browser disconnections within Playwright.
|
|
330
|
+
- Failures during browser launch or management by the pool.
|
|
307
331
|
|
|
308
332
|
The `HTMLFetchResult` object may also contain an `error` property if the final fetch attempt failed after all retries but an earlier attempt (within retries) might have produced some intermediate (potentially unusable) result data. It's generally best to rely on the thrown error for failure handling.
|
|
309
333
|
|
|
310
334
|
**Example:**
|
|
311
335
|
|
|
312
336
|
```typescript
|
|
313
|
-
import {
|
|
337
|
+
import { PlaywrightEngine, FetchError } from "@purepageio/fetch-engines";
|
|
314
338
|
|
|
315
|
-
|
|
339
|
+
// Example using PlaywrightEngine to illustrate more complex error handling
|
|
340
|
+
const engine = new PlaywrightEngine({ useHttpFallback: false, maxRetries: 1 });
|
|
316
341
|
|
|
317
342
|
async function fetchWithHandling(url: string) {
|
|
318
343
|
try {
|
|
319
344
|
const result = await engine.fetchHTML(url);
|
|
320
|
-
// Note: result.error is less common, primary errors are thrown.
|
|
321
345
|
if (result.error) {
|
|
322
|
-
console.
|
|
323
|
-
} else {
|
|
324
|
-
console.log(`Success for ${url}! Content type: ${result.contentType}`);
|
|
325
|
-
// Use result.content
|
|
346
|
+
console.warn(`Fetch for ${url} included non-critical error after retries: ${result.error.message}`);
|
|
326
347
|
}
|
|
348
|
+
console.log(`Success for ${url}! Title: ${result.title}, Content type: ${result.contentType}`);
|
|
349
|
+
// Use result.content
|
|
327
350
|
} catch (error) {
|
|
328
|
-
console.error(`Fetch failed
|
|
351
|
+
console.error(`Fetch failed for ${url}:`);
|
|
329
352
|
if (error instanceof FetchError) {
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
break;
|
|
335
|
-
case "ERR_NON_HTML_CONTENT":
|
|
336
|
-
console.error(` Wrong Content Type: ${error.message}`);
|
|
337
|
-
break;
|
|
338
|
-
// Add other specific codes as needed
|
|
339
|
-
default:
|
|
340
|
-
console.error(` FetchError (${error.code || "UNKNOWN"}): ${error.message}`);
|
|
341
|
-
break;
|
|
353
|
+
console.error(` Error Code: ${error.code || "N/A"}`);
|
|
354
|
+
console.error(` Message: ${error.message}`);
|
|
355
|
+
if (error.statusCode) {
|
|
356
|
+
console.error(` Status Code: ${error.statusCode}`);
|
|
342
357
|
}
|
|
343
358
|
if (error.originalError) {
|
|
344
|
-
console.error(` Original Error: ${error.originalError.message}`);
|
|
359
|
+
console.error(` Original Error: ${error.originalError.name} - ${error.originalError.message}`);
|
|
360
|
+
}
|
|
361
|
+
// Example of specific handling:
|
|
362
|
+
if (error.code === "ERR_PLAYWRIGHT_OPERATION") {
|
|
363
|
+
console.error(" Hint: This was a Playwright operation failure. Check Playwright logs or originalError.");
|
|
345
364
|
}
|
|
346
365
|
} else if (error instanceof Error) {
|
|
347
|
-
// Handle generic JavaScript errors
|
|
348
366
|
console.error(` Generic Error: ${error.message}`);
|
|
349
367
|
} else {
|
|
350
|
-
|
|
351
|
-
console.error(` Unknown error occurred.`);
|
|
368
|
+
console.error(` Unknown error occurred: ${String(error)}`);
|
|
352
369
|
}
|
|
353
370
|
}
|
|
354
371
|
}
|
|
355
372
|
|
|
356
|
-
|
|
357
|
-
fetchWithHandling("https://
|
|
358
|
-
fetchWithHandling("https://
|
|
373
|
+
async function runExamples() {
|
|
374
|
+
await fetchWithHandling("https://nonexistentdomain.example.com"); // Likely DNS or navigation error
|
|
375
|
+
await fetchWithHandling("https://example.com/non_html_resource.json"); // Test with actual JSON URL if available
|
|
376
|
+
// or a site known to cause Playwright issues for a demo.
|
|
377
|
+
await engine.cleanup(); // Important for PlaywrightEngine
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
runExamples();
|
|
359
381
|
```
|
|
360
382
|
|
|
361
383
|
## Logging
|
package/dist/HybridEngine.d.ts
CHANGED
|
@@ -7,6 +7,7 @@ export declare class HybridEngine implements IEngine {
|
|
|
7
7
|
private readonly fetchEngine;
|
|
8
8
|
private readonly playwrightEngine;
|
|
9
9
|
private readonly config;
|
|
10
|
+
private readonly playwrightOnlyPatterns;
|
|
10
11
|
constructor(config?: PlaywrightEngineConfig);
|
|
11
12
|
private _isSpaShell;
|
|
12
13
|
fetchHTML(url: string, options?: FetchOptions): Promise<HTMLFetchResult>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"HybridEngine.d.ts","sourceRoot":"","sources":["../src/HybridEngine.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAC5C,OAAO,KAAK,EAAE,eAAe,EAAE,sBAAsB,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAExG;;GAEG;AACH,qBAAa,YAAa,YAAW,OAAO;IAC1C,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAc;IAC1C,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAmB;IACpD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAyB;
|
|
1
|
+
{"version":3,"file":"HybridEngine.d.ts","sourceRoot":"","sources":["../src/HybridEngine.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAC5C,OAAO,KAAK,EAAE,eAAe,EAAE,sBAAsB,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAExG;;GAEG;AACH,qBAAa,YAAa,YAAW,OAAO;IAC1C,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAc;IAC1C,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAmB;IACpD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAyB;IAChD,OAAO,CAAC,QAAQ,CAAC,sBAAsB,CAAsB;gBAEjD,MAAM,GAAE,sBAA2B;IAU/C,OAAO,CAAC,WAAW;IAkBb,SAAS,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,YAAiB,GAAG,OAAO,CAAC,eAAe,CAAC;IA+DlF;;OAEG;IACH,UAAU,IAAI,cAAc,EAAE;IAI9B;;OAEG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAM/B"}
|
package/dist/HybridEngine.js
CHANGED
|
@@ -7,6 +7,7 @@ export class HybridEngine {
|
|
|
7
7
|
fetchEngine;
|
|
8
8
|
playwrightEngine;
|
|
9
9
|
config; // Store config for potential per-request PW overrides
|
|
10
|
+
playwrightOnlyPatterns;
|
|
10
11
|
constructor(config = {}) {
|
|
11
12
|
// Pass relevant config parts to each engine
|
|
12
13
|
// FetchEngine only takes markdown option from the shared config
|
|
@@ -14,6 +15,7 @@ export class HybridEngine {
|
|
|
14
15
|
this.fetchEngine = new FetchEngine({ markdown: config.markdown });
|
|
15
16
|
this.playwrightEngine = new PlaywrightEngine(config);
|
|
16
17
|
this.config = config; // Store for merging later
|
|
18
|
+
this.playwrightOnlyPatterns = config.playwrightOnlyPatterns || [];
|
|
17
19
|
}
|
|
18
20
|
_isSpaShell(htmlContent) {
|
|
19
21
|
if (!htmlContent || htmlContent.length < 150) {
|
|
@@ -42,15 +44,24 @@ export class HybridEngine {
|
|
|
42
44
|
: this.config.markdown !== undefined
|
|
43
45
|
? this.config.markdown
|
|
44
46
|
: false;
|
|
45
|
-
// Prepare options for PlaywrightEngine, to be used in fallback scenarios
|
|
46
|
-
// The order of spread and explicit assignment ensures that effectiveSpaMode and effectiveMarkdown (HybridEngine's interpretation)
|
|
47
|
-
// are what PlaywrightEngine receives for these specific fields, while other configs are passed through.
|
|
47
|
+
// Prepare options for PlaywrightEngine, to be used in fallback scenarios or direct calls
|
|
48
48
|
const playwrightOptions = {
|
|
49
49
|
...this.config, // Start with base config given to HybridEngine (e.g. spaRenderDelayMs)
|
|
50
50
|
...options, // Apply all per-request overrides first
|
|
51
51
|
markdown: effectiveMarkdown, // Then ensure HybridEngine's resolved markdown is set
|
|
52
52
|
spaMode: effectiveSpaMode, // Then ensure HybridEngine's resolved spaMode is set
|
|
53
53
|
};
|
|
54
|
+
// Check playwrightOnlyPatterns first
|
|
55
|
+
for (const pattern of this.playwrightOnlyPatterns) {
|
|
56
|
+
if (typeof pattern === "string" && url.includes(pattern)) {
|
|
57
|
+
console.warn(`HybridEngine: URL ${url} matches string pattern "${pattern}". Using PlaywrightEngine directly.`);
|
|
58
|
+
return this.playwrightEngine.fetchHTML(url, playwrightOptions);
|
|
59
|
+
}
|
|
60
|
+
else if (pattern instanceof RegExp && pattern.test(url)) {
|
|
61
|
+
console.warn(`HybridEngine: URL ${url} matches regex pattern "${pattern.toString()}". Using PlaywrightEngine directly.`);
|
|
62
|
+
return this.playwrightEngine.fetchHTML(url, playwrightOptions);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
54
65
|
try {
|
|
55
66
|
const fetchResult = await this.fetchEngine.fetchHTML(url);
|
|
56
67
|
// If FetchEngine succeeded AND spaMode is active, check if it's just a shell
|
package/dist/HybridEngine.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"HybridEngine.js","sourceRoot":"","sources":["../src/HybridEngine.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAC/C,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAIzD;;GAEG;AACH,MAAM,OAAO,YAAY;IACN,WAAW,CAAc;IACzB,gBAAgB,CAAmB;IACnC,MAAM,CAAyB,CAAC,sDAAsD;
|
|
1
|
+
{"version":3,"file":"HybridEngine.js","sourceRoot":"","sources":["../src/HybridEngine.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAC/C,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAIzD;;GAEG;AACH,MAAM,OAAO,YAAY;IACN,WAAW,CAAc;IACzB,gBAAgB,CAAmB;IACnC,MAAM,CAAyB,CAAC,sDAAsD;IACtF,sBAAsB,CAAsB;IAE7D,YAAY,SAAiC,EAAE;QAC7C,4CAA4C;QAC5C,gEAAgE;QAChE,uGAAuG;QACvG,IAAI,CAAC,WAAW,GAAG,IAAI,WAAW,CAAC,EAAE,QAAQ,EAAE,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC;QAClE,IAAI,CAAC,gBAAgB,GAAG,IAAI,gBAAgB,CAAC,MAAM,CAAC,CAAC;QACrD,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,CAAC,0BAA0B;QAChD,IAAI,CAAC,sBAAsB,GAAG,MAAM,CAAC,sBAAsB,IAAI,EAAE,CAAC;IACpE,CAAC;IAEO,WAAW,CAAC,WAAmB;QACrC,IAAI,CAAC,WAAW,IAAI,WAAW,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YAC7C,+CAA+C;YAC/C,iFAAiF;YACjF,IAAI,WAAW,CAAC,QAAQ,CAAC,YAAY,CAAC;gBAAE,OAAO,IAAI,CAAC;QACtD,CAAC;QACD,2BAA2B;QAC3B,IAAI,WAAW,CAAC,QAAQ,CAAC,YAAY,CAAC;YAAE,OAAO,IAAI,CAAC;QAEpD,mCAAmC;QACnC,IAAI,qDAAqD,CAAC,IAAI,CAAC,WAAW,CAAC;YAAE,OAAO,IAAI,CAAC;QAEzF,mDAAmD;QACnD,IAAI,sBAAsB,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,WAAW,CAAC;YAAE,OAAO,IAAI,CAAC;QAEhG,OAAO,KAAK,CAAC;IACf,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,GAAW,EAAE,UAAwB,EAAE;QACrD,oDAAoD;QACpD,gHAAgH;QAChH,MAAM,gBAAgB,GACpB,OAAO,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC;QACpH,MAAM,iBAAiB,GACrB,OAAO,CAAC,QAAQ,KAAK,SAAS;YAC5B,CAAC,CAAC,OAAO,CAAC,QAAQ;YAClB,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,KAAK,SAAS;gBAClC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ;gBACtB,CAAC,CAAC,KAAK,CAAC;QAEd,yFAAyF;QACzF,MAAM,iBAAiB,GAA6D;YAClF,GAAG,IAAI,CAAC,MAAM,EAAE,uEAAuE;YACvF,GAAG,OAAO,EAAE,wCAAwC;YACpD,QAAQ,EAAE,iBAAiB,EAAE,sDAAsD;YACnF,OAAO,EAAE,gBAAgB,EAAE,qDAAqD;SACjF,CAAC;QAEF,qCAAqC;QACrC,KAAK,MAAM,OAAO,IAAI,IAAI,CAAC,sBAAsB,EAAE,CAAC;YAClD,IAAI,OAAO,OAAO,KAAK,QAAQ,IAAI,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;gBACzD,OAAO,CAAC,IAAI,CAAC,qBAAqB,GAAG,4BAA4B,OAAO,qCAAqC,CAAC,CAAC;gBAC/G,OAAO,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,GAAG,EAAE,iBAAiB,CAAC,CAAC;YACjE,CAAC;iBAAM,IAAI,OAAO,YAAY,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC1D,OAAO,CAAC,IAAI,CACV,qBAAqB,GAAG,2BAA2B,OAAO,CAAC,QAAQ,EAAE,qCAAqC,CAC3G,CAAC;gBACF,OAAO,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,GAAG,EAAE,iBAAiB,CAAC,CAAC;YACjE,CAAC;QACH,CAAC;QAED,IAAI,CAAC;YACH,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;YAE1D,6EAA6E;YAC7E,IAAI,gBAAgB,IAAI,WAAW,IAAI,WAAW,CAAC,OAAO,EAAE,CAAC;gBAC3D,IAAI,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,OAAO,CAAC,EAAE,CAAC;oBAC1C,OAAO,CAAC,IAAI,CACV,2DAA2D,GAAG,wCAAwC,CACvG,CAAC;oBACF,yEAAyE;oBACzE,OAAO,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,GAAG,EAAE,iBAAiB,CAAC,CAAC;gBACjE,CAAC;YACH,CAAC;YACD,wFAAwF;YACxF,OAAO,WAAW,CAAC;QACrB,CAAC;QAAC,OAAO,UAAe,EAAE,CAAC;YACzB,OAAO,CAAC,IAAI,CACV,wCAAwC,GAAG,KAAK,UAAU,CAAC,OAAO,qCAAqC,CACxG,CAAC;YACF,IAAI,CAAC;gBACH,yEAAyE;gBACzE,MAAM,gBAAgB,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,GAAG,EAAE,iBAAiB,CAAC,CAAC;gBACvF,OAAO,gBAAgB,CAAC;YAC1B,CAAC;YAAC,OAAO,eAAoB,EAAE,CAAC;gBAC9B,OAAO,CAAC,KAAK,CAAC,2DAA2D,GAAG,KAAK,eAAe,CAAC,OAAO,EAAE,CAAC,CAAC;gBAC5G,MAAM,eAAe,CAAC,CAAC,8DAA8D;YACvF,CAAC;QACH,CAAC;IACH,CAAC;IAED;;OAEG;IACH,UAAU;QACR,OAAO,IAAI,CAAC,gBAAgB,CAAC,UAAU,EAAE,CAAC;IAC5C,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO;QACX,MAAM,OAAO,CAAC,UAAU,CAAC;YACvB,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,EAAE,yCAAyC;YACrE,IAAI,CAAC,gBAAgB,CAAC,OAAO,EAAE;SAChC,CAAC,CAAC;IACL,CAAC;CACF"}
|
|
@@ -59,13 +59,40 @@ export declare class PlaywrightEngine implements IEngine {
|
|
|
59
59
|
markdown?: boolean;
|
|
60
60
|
spaMode?: boolean;
|
|
61
61
|
}): Promise<HTMLFetchResult>;
|
|
62
|
+
/**
|
|
63
|
+
* Helper to check cache and potentially return a cached result.
|
|
64
|
+
* Handles logic for re-fetching if cache is stale or content type mismatch for markdown.
|
|
65
|
+
*
|
|
66
|
+
* @param url URL to check in cache
|
|
67
|
+
* @param currentConfig Current fetch configuration
|
|
68
|
+
* @returns Cached result or null if not found/needs re-fetch.
|
|
69
|
+
*/
|
|
70
|
+
private _handleCacheCheck;
|
|
71
|
+
/**
|
|
72
|
+
* Attempts to fetch the URL using a simple HTTP GET request as a fallback.
|
|
73
|
+
*
|
|
74
|
+
* @param url The URL to fetch.
|
|
75
|
+
* @param currentConfig The current fetch configuration.
|
|
76
|
+
* @returns A Promise resolving to an HTMLFetchResult if successful, or null if fallback is skipped or a challenge page is encountered.
|
|
77
|
+
* @throws {FetchError} If the HTTP fallback itself fails with an unrecoverable error.
|
|
78
|
+
*/
|
|
79
|
+
private _attemptHttpFallback;
|
|
80
|
+
/**
|
|
81
|
+
* Ensures the browser pool is initialized with the correct mode (headed/headless).
|
|
82
|
+
* Handles one retry attempt if the initial pool initialization fails.
|
|
83
|
+
*
|
|
84
|
+
* @param useHeadedMode Whether to initialize the pool in headed mode.
|
|
85
|
+
* @param currentConfig The current fetch configuration (for retryDelay).
|
|
86
|
+
* @returns A Promise that resolves when the pool is initialized, or rejects if initialization fails after retries.
|
|
87
|
+
* @throws {FetchError} If pool initialization fails after retries or if the pool is unavailable.
|
|
88
|
+
*/
|
|
89
|
+
private _ensureBrowserPoolInitialized;
|
|
62
90
|
/**
|
|
63
91
|
* Internal recursive method to handle fetching with retries.
|
|
64
92
|
*
|
|
65
93
|
* @param url URL to fetch
|
|
66
94
|
* @param currentConfig The merged configuration including markdown option
|
|
67
95
|
* @param retryAttempt Current retry attempt number (starts at 0)
|
|
68
|
-
* @param parentRetryCount Tracks retries related to pool initialization errors (starts at 0)
|
|
69
96
|
* @returns Promise resolving to HTMLFetchResult
|
|
70
97
|
*/
|
|
71
98
|
private _fetchRecursive;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"PlaywrightEngine.d.ts","sourceRoot":"","sources":["../src/PlaywrightEngine.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,cAAc,EAAE,sBAAsB,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AACxG,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;
|
|
1
|
+
{"version":3,"file":"PlaywrightEngine.d.ts","sourceRoot":"","sources":["../src/PlaywrightEngine.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,cAAc,EAAE,sBAAsB,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AACxG,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAyC5C;;;;;;GAMG;AACH,qBAAa,gBAAiB,YAAW,OAAO;IAC9C,OAAO,CAAC,WAAW,CAAsC;IACzD,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAS;IAC/B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAsC;IAC5D,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAiC;IAGxD,OAAO,CAAC,uBAAuB,CAAkB;IACjD,OAAO,CAAC,iBAAiB,CAAkB;IAC3C,OAAO,CAAC,mBAAmB,CAA0B;IAGrD,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,cAAc,CAsBpC;IAEF;;;;;OAKG;gBACS,MAAM,GAAE,sBAA2B;IAM/C;;OAEG;YACW,qBAAqB;IAwCnC;;;OAGG;YACW,yBAAyB;IAiEvC,OAAO,CAAC,UAAU;IAalB;;OAEG;YACW,WAAW;IAazB;;OAEG;YACW,qBAAqB;IAwCnC;;OAEG;IACH,OAAO,CAAC,UAAU;IAUlB;;;;;;;;;OASG;IACG,SAAS,CACb,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,YAAY,GAAG;QAAE,QAAQ,CAAC,EAAE,OAAO,CAAC;QAAC,OAAO,CAAC,EAAE,OAAO,CAAA;KAAO,GACrE,OAAO,CAAC,eAAe,CAAC;IAc3B;;;;;;;OAOG;IACH,OAAO,CAAC,iBAAiB;IAmDzB;;;;;;;OAOG;YACW,oBAAoB;IAiClC;;;;;;;;OAQG;YACW,6BAA6B;IAmC3C;;;;;;;OAOG;YACW,eAAe;IAgH7B;;;OAGG;YACW,mBAAmB;YAqKnB,kBAAkB;IAyChC;;;;;OAKG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAoB9B;;;OAGG;IACH,UAAU,IAAI,cAAc,EAAE;IAQ9B,OAAO,CAAC,mBAAmB;CAS5B"}
|