@purepageio/fetch-engines 0.1.4 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +203 -102
- package/dist/FetchEngine.d.ts +9 -8
- package/dist/FetchEngine.d.ts.map +1 -1
- package/dist/FetchEngine.js +54 -77
- package/dist/FetchEngine.js.map +1 -1
- package/dist/HybridEngine.d.ts +13 -7
- package/dist/HybridEngine.d.ts.map +1 -1
- package/dist/HybridEngine.js +37 -17
- package/dist/HybridEngine.js.map +1 -1
- package/dist/PlaywrightEngine.d.ts +4 -2
- package/dist/PlaywrightEngine.d.ts.map +1 -1
- package/dist/PlaywrightEngine.js +97 -60
- package/dist/PlaywrightEngine.js.map +1 -1
- package/dist/types.d.ts +27 -11
- package/dist/types.d.ts.map +1 -1
- package/dist/utils/markdown-converter.d.ts +31 -0
- package/dist/utils/markdown-converter.d.ts.map +1 -0
- package/dist/utils/markdown-converter.js +794 -0
- package/dist/utils/markdown-converter.js.map +1 -0
- package/package.json +6 -2
package/dist/FetchEngine.js
CHANGED
|
@@ -1,17 +1,14 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { MarkdownConverter } from "./utils/markdown-converter.js"; // Import the converter
|
|
2
|
+
import { FetchError } from "./errors.js"; // Only import FetchError
|
|
2
3
|
/**
|
|
3
4
|
* Custom error class for HTTP errors from FetchEngine.
|
|
4
5
|
*/
|
|
5
|
-
export class FetchEngineHttpError extends
|
|
6
|
+
export class FetchEngineHttpError extends FetchError {
|
|
6
7
|
statusCode;
|
|
7
8
|
constructor(message, statusCode) {
|
|
8
|
-
super(message);
|
|
9
|
-
this.name = "FetchEngineHttpError";
|
|
9
|
+
super(message, "ERR_HTTP_ERROR", undefined, statusCode);
|
|
10
10
|
this.statusCode = statusCode;
|
|
11
|
-
|
|
12
|
-
if (Error.captureStackTrace) {
|
|
13
|
-
Error.captureStackTrace(this, FetchEngineHttpError);
|
|
14
|
-
}
|
|
11
|
+
this.name = "FetchEngineHttpError";
|
|
15
12
|
}
|
|
16
13
|
}
|
|
17
14
|
/**
|
|
@@ -21,99 +18,81 @@ export class FetchEngineHttpError extends Error {
|
|
|
21
18
|
* It does not support advanced configurations like retries, caching, or proxies directly.
|
|
22
19
|
*/
|
|
23
20
|
export class FetchEngine {
|
|
24
|
-
|
|
21
|
+
options;
|
|
22
|
+
static DEFAULT_OPTIONS = {
|
|
23
|
+
markdown: false,
|
|
24
|
+
};
|
|
25
25
|
/**
|
|
26
26
|
* Creates an instance of FetchEngine.
|
|
27
|
-
*
|
|
27
|
+
* @param options Configuration options for the FetchEngine.
|
|
28
28
|
*/
|
|
29
|
-
constructor() {
|
|
30
|
-
this.
|
|
31
|
-
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
32
|
-
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
|
33
|
-
"Accept-Language": "en-US,en;q=0.5",
|
|
34
|
-
"Upgrade-Insecure-Requests": "1",
|
|
35
|
-
"Sec-Fetch-Dest": "document",
|
|
36
|
-
"Sec-Fetch-Mode": "navigate",
|
|
37
|
-
"Sec-Fetch-Site": "none",
|
|
38
|
-
"Sec-Fetch-User": "?1",
|
|
39
|
-
};
|
|
29
|
+
constructor(options = {}) {
|
|
30
|
+
this.options = { ...FetchEngine.DEFAULT_OPTIONS, ...options };
|
|
40
31
|
}
|
|
41
32
|
/**
|
|
42
|
-
* Fetches HTML
|
|
33
|
+
* Fetches HTML or converts to Markdown from the specified URL.
|
|
43
34
|
*
|
|
44
35
|
* @param url The URL to fetch.
|
|
45
36
|
* @returns A Promise resolving to an HTMLFetchResult object.
|
|
46
37
|
* @throws {FetchEngineHttpError} If the HTTP response status is not ok (e.g., 404, 500).
|
|
47
38
|
* @throws {Error} If the content type is not HTML or for other network errors.
|
|
48
39
|
*/
|
|
49
|
-
async fetchHTML(url) {
|
|
40
|
+
async fetchHTML(url, options) {
|
|
41
|
+
const effectiveOptions = { ...this.options, ...options }; // Combine constructor and call options
|
|
42
|
+
let response;
|
|
50
43
|
try {
|
|
51
|
-
|
|
52
|
-
headers: this.headers,
|
|
44
|
+
response = await fetch(url, {
|
|
53
45
|
redirect: "follow",
|
|
46
|
+
headers: {
|
|
47
|
+
// Standard browser-like headers
|
|
48
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
|
|
49
|
+
Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
|
|
50
|
+
"Accept-Language": "en-US,en;q=0.9",
|
|
51
|
+
},
|
|
54
52
|
});
|
|
55
53
|
if (!response.ok) {
|
|
56
|
-
// Throw the custom error with status code
|
|
57
54
|
throw new FetchEngineHttpError(`HTTP error! status: ${response.status}`, response.status);
|
|
58
55
|
}
|
|
59
|
-
const
|
|
60
|
-
if (!
|
|
61
|
-
throw new
|
|
56
|
+
const contentTypeHeader = response.headers.get("content-type");
|
|
57
|
+
if (!contentTypeHeader || !contentTypeHeader.includes("text/html")) {
|
|
58
|
+
throw new FetchError("Content-Type is not text/html", "ERR_NON_HTML_CONTENT");
|
|
62
59
|
}
|
|
63
60
|
const html = await response.text();
|
|
64
|
-
|
|
65
|
-
const
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
61
|
+
const titleMatch = html.match(/<title[^>]*>([^<]+)<\/title>/i);
|
|
62
|
+
const title = titleMatch ? titleMatch[1].trim() : null;
|
|
63
|
+
let finalContent = html;
|
|
64
|
+
let finalContentType = "html";
|
|
65
|
+
if (effectiveOptions.markdown) {
|
|
66
|
+
try {
|
|
67
|
+
const converter = new MarkdownConverter();
|
|
68
|
+
finalContent = converter.convert(html);
|
|
69
|
+
finalContentType = "markdown";
|
|
70
|
+
}
|
|
71
|
+
catch (conversionError) {
|
|
72
|
+
console.error(`Markdown conversion failed for ${url} (FetchEngine):`, conversionError);
|
|
73
|
+
// Fallback to original HTML on conversion error
|
|
74
|
+
}
|
|
74
75
|
}
|
|
75
76
|
return {
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
77
|
+
content: finalContent,
|
|
78
|
+
contentType: finalContentType,
|
|
79
|
+
title: title,
|
|
80
|
+
url: response.url, // Use the final URL after redirects
|
|
81
|
+
isFromCache: false,
|
|
80
82
|
statusCode: response.status,
|
|
81
83
|
error: undefined,
|
|
82
84
|
};
|
|
83
85
|
}
|
|
84
86
|
catch (error) {
|
|
85
|
-
//
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
const spaMarkers = [
|
|
94
|
-
// React
|
|
95
|
-
"[data-reactroot]",
|
|
96
|
-
"#root",
|
|
97
|
-
"#app",
|
|
98
|
-
// Vue
|
|
99
|
-
"[data-v-app]",
|
|
100
|
-
"#app[data-v-]",
|
|
101
|
-
// Angular
|
|
102
|
-
"[ng-version]",
|
|
103
|
-
"[ng-app]",
|
|
104
|
-
// Common SPA patterns
|
|
105
|
-
'script[type="application/json+ld"]', // Less reliable marker
|
|
106
|
-
'meta[name="fragment"]',
|
|
107
|
-
];
|
|
108
|
-
// Check if the body is nearly empty but has JS (More reliable)
|
|
109
|
-
const bodyContent = document.body?.textContent?.trim() || "";
|
|
110
|
-
const hasScripts = document.scripts.length > 0;
|
|
111
|
-
if (bodyContent.length < 150 && hasScripts) {
|
|
112
|
-
// Increased threshold slightly
|
|
113
|
-
return true;
|
|
87
|
+
// Re-throw specific known errors directly
|
|
88
|
+
if (error instanceof FetchEngineHttpError ||
|
|
89
|
+
(error instanceof FetchError && error.code === "ERR_NON_HTML_CONTENT")) {
|
|
90
|
+
throw error;
|
|
91
|
+
}
|
|
92
|
+
// Wrap other/unexpected errors
|
|
93
|
+
const message = error instanceof Error ? error.message : "Unknown fetch error";
|
|
94
|
+
throw new FetchError(`Fetch failed: ${message}`, "ERR_FETCH_FAILED", error instanceof Error ? error : undefined);
|
|
114
95
|
}
|
|
115
|
-
// Check for SPA markers (Less reliable)
|
|
116
|
-
return spaMarkers.some((selector) => document.querySelector(selector) !== null);
|
|
117
96
|
}
|
|
118
97
|
/**
|
|
119
98
|
* Cleans up resources used by the engine.
|
|
@@ -121,8 +100,7 @@ export class FetchEngine {
|
|
|
121
100
|
* @returns A Promise that resolves when cleanup is complete.
|
|
122
101
|
*/
|
|
123
102
|
async cleanup() {
|
|
124
|
-
|
|
125
|
-
return Promise.resolve(); // Explicitly return resolved promise
|
|
103
|
+
return Promise.resolve();
|
|
126
104
|
}
|
|
127
105
|
/**
|
|
128
106
|
* Retrieves metrics for the engine.
|
|
@@ -130,7 +108,6 @@ export class FetchEngine {
|
|
|
130
108
|
* @returns An empty array.
|
|
131
109
|
*/
|
|
132
110
|
getMetrics() {
|
|
133
|
-
// Fetch engine doesn't maintain browser pool metrics
|
|
134
111
|
return [];
|
|
135
112
|
}
|
|
136
113
|
}
|
package/dist/FetchEngine.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"FetchEngine.js","sourceRoot":"","sources":["../src/FetchEngine.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"FetchEngine.js","sourceRoot":"","sources":["../src/FetchEngine.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,iBAAiB,EAAE,MAAM,+BAA+B,CAAC,CAAC,uBAAuB;AAC1F,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC,CAAC,yBAAyB;AAEnE;;GAEG;AACH,MAAM,OAAO,oBAAqB,SAAQ,UAAU;IAGhC;IAFlB,YACE,OAAe,EACC,UAAkB;QAElC,KAAK,CAAC,OAAO,EAAE,gBAAgB,EAAE,SAAS,EAAE,UAAU,CAAC,CAAC;QAFxC,eAAU,GAAV,UAAU,CAAQ;QAGlC,IAAI,CAAC,IAAI,GAAG,sBAAsB,CAAC;IACrC,CAAC;CACF;AAED;;;;;GAKG;AACH,MAAM,OAAO,WAAW;IACL,OAAO,CAA+B;IAE/C,MAAM,CAAU,eAAe,GAAiC;QACtE,QAAQ,EAAE,KAAK;KAChB,CAAC;IAEF;;;OAGG;IACH,YAAY,UAA8B,EAAE;QAC1C,IAAI,CAAC,OAAO,GAAG,EAAE,GAAG,WAAW,CAAC,eAAe,EAAE,GAAG,OAAO,EAAE,CAAC;IAChE,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,SAAS,CAAC,GAAW,EAAE,OAA4B;QACvD,MAAM,gBAAgB,GAAG,EAAE,GAAG,IAAI,CAAC,OAAO,EAAE,GAAG,OAAO,EAAE,CAAC,CAAC,uCAAuC;QACjG,IAAI,QAAkB,CAAC;QACvB,IAAI,CAAC;YACH,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;gBAC1B,QAAQ,EAAE,QAAQ;gBAClB,OAAO,EAAE;oBACP,gCAAgC;oBAChC,YAAY,EACV,iHAAiH;oBACnH,MAAM,EAAE,kGAAkG;oBAC1G,iBAAiB,EAAE,gBAAgB;iBACpC;aACF,CAAC,CAAC;YAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;gBACjB,MAAM,IAAI,oBAAoB,CAAC,uBAAuB,QAAQ,CAAC,MAAM,EAAE,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;YAC5F,CAAC;YAED,MAAM,iBAAiB,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC;YAC/D,IAAI,CAAC,iBAAiB,IAAI,CAAC,iBAAiB,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;gBACnE,MAAM,IAAI,UAAU,CAAC,+BAA+B,EAAE,sBAAsB,CAAC,CAAC;YAChF,CAAC;YAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACnC,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC;YAC/D,MAAM,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;YAEvD,IAAI,YAAY,GAAG,IAAI,CAAC;YACxB,IAAI,gBAAgB,GAAwB,MAAM,CAAC;YAEnD,IAAI,gBAAgB,CAAC,QAAQ,EAAE,CAAC;gBAC9B,IAAI,CAAC;oBACH,MAAM,SAAS,GAAG,IAAI,iBAAiB,EAAE,CAAC;oBAC1C,YAAY,GAAG,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;oBACvC,gBAAgB,GAAG,UAAU,CAAC;gBAChC,CAAC;gBAAC,OAAO,eAAoB,EAAE,CAAC;oBAC9B,OAAO,CAAC,KAAK,CAAC,kCAAkC,GAAG,iBAAiB,EAAE,eAAe,CAAC,CAAC;oBACvF,gDAAgD;gBAClD,CAAC;YACH,CAAC;YAED,OAAO;gBACL,OAAO,EAAE,YAAY;gBACrB,WAAW,EAAE,gBAAgB;gBAC7B,KAAK,EAAE,KAAK;gBACZ,GAAG,EAAE,QAAQ,CAAC,GAAG,EAAE,oCAAoC;gBACvD,WAAW,EAAE,KAAK;gBAClB,UAAU,EAAE,QAAQ,CAAC,MAAM;gBAC3B,KAAK,EAAE,SAAS;aACjB,CAAC;QACJ,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,0CAA0C;YAC1C,IACE,KAAK,YAAY,oBAAoB;gBACrC,CAAC,KAAK,YAAY,UAAU,IAAI,KAAK,CAAC,IAAI,KAAK,sBAAsB,CAAC,EACtE,CAAC;gBACD,MAAM,KAAK,CAAC;YACd,CAAC;YACD,+BAA+B;YAC/B,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,qBAAqB,CAAC;YAC/E,MAAM,IAAI,UAAU,CAAC,iBAAiB,OAAO,EAAE,EAAE,kBAAkB,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;QACnH,CAAC;IACH,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,OAAO;QACX,OAAO,OAAO,CAAC,OAAO,EAAE,CAAC;IAC3B,CAAC;IAED;;;;OAIG;IACH,UAAU;QACR,OAAO,EAAE,CAAC;IACZ,CAAC"}
|
package/dist/HybridEngine.d.ts
CHANGED
|
@@ -1,15 +1,21 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
import {
|
|
1
|
+
import type { IEngine } from "./IEngine.js";
|
|
2
|
+
import type { HTMLFetchResult, PlaywrightEngineConfig, FetchOptions, BrowserMetrics } from "./types.js";
|
|
3
3
|
/**
|
|
4
|
-
* HybridEngine -
|
|
5
|
-
* then falls back to PlaywrightEngine for complex sites or specific errors.
|
|
4
|
+
* HybridEngine - Tries FetchEngine first, falls back to PlaywrightEngine on failure.
|
|
6
5
|
*/
|
|
7
6
|
export declare class HybridEngine implements IEngine {
|
|
8
7
|
private readonly fetchEngine;
|
|
9
8
|
private readonly playwrightEngine;
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
9
|
+
private readonly config;
|
|
10
|
+
constructor(config?: PlaywrightEngineConfig);
|
|
11
|
+
fetchHTML(url: string, options?: FetchOptions): Promise<HTMLFetchResult>;
|
|
12
|
+
/**
|
|
13
|
+
* Delegates getMetrics to the PlaywrightEngine.
|
|
14
|
+
*/
|
|
13
15
|
getMetrics(): BrowserMetrics[];
|
|
16
|
+
/**
|
|
17
|
+
* Calls cleanup on both underlying engines.
|
|
18
|
+
*/
|
|
19
|
+
cleanup(): Promise<void>;
|
|
14
20
|
}
|
|
15
21
|
//# sourceMappingURL=HybridEngine.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"HybridEngine.d.ts","sourceRoot":"","sources":["../src/HybridEngine.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"HybridEngine.d.ts","sourceRoot":"","sources":["../src/HybridEngine.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAC5C,OAAO,KAAK,EAAE,eAAe,EAAE,sBAAsB,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAExG;;GAEG;AACH,qBAAa,YAAa,YAAW,OAAO;IAC1C,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAc;IAC1C,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAmB;IACpD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAyB;gBAEpC,MAAM,GAAE,sBAA2B;IAQzC,SAAS,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,YAAiB,GAAG,OAAO,CAAC,eAAe,CAAC;IA8BlF;;OAEG;IACH,UAAU,IAAI,cAAc,EAAE;IAI9B;;OAEG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAM/B"}
|
package/dist/HybridEngine.js
CHANGED
|
@@ -1,42 +1,62 @@
|
|
|
1
1
|
import { FetchEngine } from "./FetchEngine.js";
|
|
2
2
|
import { PlaywrightEngine } from "./PlaywrightEngine.js";
|
|
3
3
|
/**
|
|
4
|
-
* HybridEngine -
|
|
5
|
-
* then falls back to PlaywrightEngine for complex sites or specific errors.
|
|
4
|
+
* HybridEngine - Tries FetchEngine first, falls back to PlaywrightEngine on failure.
|
|
6
5
|
*/
|
|
7
6
|
export class HybridEngine {
|
|
8
7
|
fetchEngine;
|
|
9
8
|
playwrightEngine;
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
9
|
+
config; // Store config for potential per-request PW overrides
|
|
10
|
+
constructor(config = {}) {
|
|
11
|
+
// Pass relevant config parts to each engine
|
|
12
|
+
// FetchEngine only takes markdown option from the shared config
|
|
13
|
+
this.fetchEngine = new FetchEngine({ markdown: config.markdown });
|
|
14
|
+
this.playwrightEngine = new PlaywrightEngine(config);
|
|
15
|
+
this.config = config; // Store for merging later
|
|
13
16
|
}
|
|
14
|
-
async fetchHTML(url) {
|
|
17
|
+
async fetchHTML(url, options = {}) {
|
|
18
|
+
// FetchEngine uses its constructor config; it doesn't accept per-request options here.
|
|
15
19
|
try {
|
|
16
|
-
// Attempt 1: Use the fast FetchEngine
|
|
17
20
|
const fetchResult = await this.fetchEngine.fetchHTML(url);
|
|
21
|
+
// If fetch succeeded, return its result directly (it handles its own markdown config)
|
|
22
|
+
// No need to check contentType here, FetchEngine handles it based on its constructor.
|
|
18
23
|
return fetchResult;
|
|
19
24
|
}
|
|
20
|
-
catch (
|
|
21
|
-
|
|
22
|
-
//
|
|
25
|
+
catch (fetchError) {
|
|
26
|
+
console.warn(`FetchEngine failed for ${url}: ${fetchError.message}. Falling back to PlaywrightEngine.`);
|
|
27
|
+
// Merge constructor config with per-request options for Playwright fallback
|
|
28
|
+
const playwrightOptions = {
|
|
29
|
+
...this.config, // Start with base config given to HybridEngine
|
|
30
|
+
...options, // Override with per-request options
|
|
31
|
+
};
|
|
23
32
|
try {
|
|
24
|
-
|
|
33
|
+
// Pass merged options to PlaywrightEngine
|
|
34
|
+
const playwrightResult = await this.playwrightEngine.fetchHTML(url, playwrightOptions);
|
|
25
35
|
return playwrightResult;
|
|
26
36
|
}
|
|
27
37
|
catch (playwrightError) {
|
|
28
|
-
//
|
|
38
|
+
// Catch potential Playwright error
|
|
39
|
+
console.error(`PlaywrightEngine fallback failed for ${url}: ${playwrightError.message}`);
|
|
40
|
+
// Optionally, wrap or prioritize which error to throw
|
|
41
|
+
// Throwing the Playwright error as it's the last one encountered
|
|
29
42
|
throw playwrightError;
|
|
30
43
|
}
|
|
31
44
|
}
|
|
32
45
|
}
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
}
|
|
46
|
+
/**
|
|
47
|
+
* Delegates getMetrics to the PlaywrightEngine.
|
|
48
|
+
*/
|
|
37
49
|
getMetrics() {
|
|
38
|
-
// FetchEngine doesn't produce metrics, only PlaywrightEngine does
|
|
39
50
|
return this.playwrightEngine.getMetrics();
|
|
40
51
|
}
|
|
52
|
+
/**
|
|
53
|
+
* Calls cleanup on both underlying engines.
|
|
54
|
+
*/
|
|
55
|
+
async cleanup() {
|
|
56
|
+
await Promise.allSettled([
|
|
57
|
+
this.fetchEngine.cleanup(), // Although a no-op, call for consistency
|
|
58
|
+
this.playwrightEngine.cleanup(),
|
|
59
|
+
]);
|
|
60
|
+
}
|
|
41
61
|
}
|
|
42
62
|
//# sourceMappingURL=HybridEngine.js.map
|
package/dist/HybridEngine.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"HybridEngine.js","sourceRoot":"","sources":["../src/HybridEngine.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAC/C,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAIzD
|
|
1
|
+
{"version":3,"file":"HybridEngine.js","sourceRoot":"","sources":["../src/HybridEngine.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAC/C,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAIzD;;GAEG;AACH,MAAM,OAAO,YAAY;IACN,WAAW,CAAc;IACzB,gBAAgB,CAAmB;IACnC,MAAM,CAAyB,CAAC,sDAAsD;IAEvG,YAAY,SAAiC,EAAE;QAC7C,4CAA4C;QAC5C,gEAAgE;QAChE,IAAI,CAAC,WAAW,GAAG,IAAI,WAAW,CAAC,EAAE,QAAQ,EAAE,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC;QAClE,IAAI,CAAC,gBAAgB,GAAG,IAAI,gBAAgB,CAAC,MAAM,CAAC,CAAC;QACrD,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,CAAC,0BAA0B;IAClD,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,GAAW,EAAE,UAAwB,EAAE;QACrD,uFAAuF;QACvF,IAAI,CAAC;YACH,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;YAC1D,sFAAsF;YACtF,sFAAsF;YACtF,OAAO,WAAW,CAAC;QACrB,CAAC;QAAC,OAAO,UAAe,EAAE,CAAC;YACzB,OAAO,CAAC,IAAI,CAAC,0BAA0B,GAAG,KAAK,UAAU,CAAC,OAAO,qCAAqC,CAAC,CAAC;YAExG,4EAA4E;YAC5E,MAAM,iBAAiB,GAAiB;gBACtC,GAAG,IAAI,CAAC,MAAM,EAAE,+CAA+C;gBAC/D,GAAG,OAAO,EAAE,oCAAoC;aACjD,CAAC;YAEF,IAAI,CAAC;gBACH,0CAA0C;gBAC1C,MAAM,gBAAgB,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,GAAG,EAAE,iBAAiB,CAAC,CAAC;gBACvF,OAAO,gBAAgB,CAAC;YAC1B,CAAC;YAAC,OAAO,eAAoB,EAAE,CAAC;gBAC9B,mCAAmC;gBACnC,OAAO,CAAC,KAAK,CAAC,wCAAwC,GAAG,KAAK,eAAe,CAAC,OAAO,EAAE,CAAC,CAAC;gBACzF,sDAAsD;gBACtD,iEAAiE;gBACjE,MAAM,eAAe,CAAC;YACxB,CAAC;QACH,CAAC;IACH,CAAC;IAED;;OAEG;IACH,UAAU;QACR,OAAO,IAAI,CAAC,gBAAgB,CAAC,UAAU,EAAE,CAAC;IAC5C,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO;QACX,MAAM,OAAO,CAAC,UAAU,CAAC;YACvB,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,EAAE,yCAAyC;YACrE,IAAI,CAAC,gBAAgB,CAAC,OAAO,EAAE;SAChC,CAAC,CAAC;IACL,CAAC;CACF"}
|
|
@@ -54,12 +54,14 @@ export declare class PlaywrightEngine implements IEngine {
|
|
|
54
54
|
* @returns A Promise resolving to an HTMLFetchResult object.
|
|
55
55
|
* @throws {FetchError} If the fetch fails after all retries or encounters critical errors.
|
|
56
56
|
*/
|
|
57
|
-
fetchHTML(url: string, options?: FetchOptions
|
|
57
|
+
fetchHTML(url: string, options?: FetchOptions & {
|
|
58
|
+
markdown?: boolean;
|
|
59
|
+
}): Promise<HTMLFetchResult>;
|
|
58
60
|
/**
|
|
59
61
|
* Internal recursive method to handle fetching with retries.
|
|
60
62
|
*
|
|
61
63
|
* @param url URL to fetch
|
|
62
|
-
* @param
|
|
64
|
+
* @param currentConfig The merged configuration including markdown option
|
|
63
65
|
* @param retryAttempt Current retry attempt number (starts at 0)
|
|
64
66
|
* @param parentRetryCount Tracks retries related to pool initialization errors (starts at 0)
|
|
65
67
|
* @returns Promise resolving to HTMLFetchResult
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"PlaywrightEngine.d.ts","sourceRoot":"","sources":["../src/PlaywrightEngine.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,cAAc,EAAE,sBAAsB,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AACxG,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;
|
|
1
|
+
{"version":3,"file":"PlaywrightEngine.d.ts","sourceRoot":"","sources":["../src/PlaywrightEngine.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,cAAc,EAAE,sBAAsB,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AACxG,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAmB5C;;;;;;GAMG;AACH,qBAAa,gBAAiB,YAAW,OAAO;IAC9C,OAAO,CAAC,WAAW,CAAsC;IACzD,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAS;IAC/B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAsC;IAC5D,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAmC;IAG1D,OAAO,CAAC,uBAAuB,CAAkB;IACjD,OAAO,CAAC,iBAAiB,CAAkB;IAC3C,OAAO,CAAC,mBAAmB,CAA0B;IAGrD,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,cAAc,CAkBpC;IAEF;;;;;OAKG;gBACS,MAAM,GAAE,sBAA2B;IAM/C;;OAEG;YACW,qBAAqB;IAuCnC;;;OAGG;YACW,yBAAyB;IAmFvC,OAAO,CAAC,UAAU;IAalB;;OAEG;YACW,WAAW;IAazB;;OAEG;YACW,qBAAqB;IAqCnC;;OAEG;IACH,OAAO,CAAC,UAAU;IAUlB;;;;;;;;OAQG;IACG,SAAS,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,YAAY,GAAG;QAAE,QAAQ,CAAC,EAAE,OAAO,CAAA;KAAO,GAAG,OAAO,CAAC,eAAe,CAAC;IAU3G;;;;;;;;OAQG;YACW,eAAe;IAsH7B;;;OAGG;YACW,mBAAmB;YAmFnB,kBAAkB;IAmChC;;;;;OAKG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAe9B;;;OAGG;IACH,UAAU,IAAI,cAAc,EAAE;IAQ9B,OAAO,CAAC,mBAAmB;CAS5B"}
|