@purepageio/fetch-engines 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import type { HTMLFetchResult, BrowserMetrics, FetchEngineOptions } from "./types.js";
1
+ import type { HTMLFetchResult, ContentFetchResult, ContentFetchOptions, BrowserMetrics, FetchEngineOptions } from "./types.js";
2
2
  import type { IEngine } from "./IEngine.js";
3
3
  import { FetchError } from "./errors.js";
4
4
  /**
@@ -31,6 +31,16 @@ export declare class FetchEngine implements IEngine {
31
31
  * @throws {Error} If the content type is not HTML or for other network errors.
32
32
  */
33
33
  fetchHTML(url: string, options?: FetchEngineOptions): Promise<HTMLFetchResult>;
34
+ /**
35
+ * Fetches raw content from the specified URL (mimics standard fetch API).
36
+ *
37
+ * @param url The URL to fetch.
38
+ * @param options Optional fetch options.
39
+ * @returns A Promise resolving to a ContentFetchResult object.
40
+ * @throws {FetchEngineHttpError} If the HTTP response status is not ok (e.g., 404, 500).
41
+ * @throws {Error} For network errors or other fetch failures.
42
+ */
43
+ fetchContent(url: string, options?: ContentFetchOptions): Promise<ContentFetchResult>;
34
44
  /**
35
45
  * Cleans up resources used by the engine.
36
46
  * For FetchEngine, this is a no-op as it doesn't manage persistent resources.
@@ -1 +1 @@
1
- {"version":3,"file":"FetchEngine.d.ts","sourceRoot":"","sources":["../src/FetchEngine.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,cAAc,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAC;AACtF,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAG5C,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEzC;;GAEG;AACH,qBAAa,oBAAqB,SAAQ,UAAU;aAGhC,UAAU,EAAE,MAAM;gBADlC,OAAO,EAAE,MAAM,EACC,UAAU,EAAE,MAAM;CAKrC;AAED;;;;;GAKG;AACH,qBAAa,WAAY,YAAW,OAAO;IACzC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAA+B;IAEvD,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,eAAe,CAErC;IAEF;;;OAGG;gBACS,OAAO,GAAE,kBAAuB;IAI5C;;;;;;;OAOG;IACG,SAAS,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC,eAAe,CAAC;IAiEpF;;;;OAIG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAI9B;;;;OAIG;IACH,UAAU,IAAI,cAAc,EAAE;CAG/B"}
1
+ {"version":3,"file":"FetchEngine.d.ts","sourceRoot":"","sources":["../src/FetchEngine.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,eAAe,EACf,kBAAkB,EAClB,mBAAmB,EACnB,cAAc,EACd,kBAAkB,EACnB,MAAM,YAAY,CAAC;AACpB,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAG5C,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEzC;;GAEG;AACH,qBAAa,oBAAqB,SAAQ,UAAU;aAGhC,UAAU,EAAE,MAAM;gBADlC,OAAO,EAAE,MAAM,EACC,UAAU,EAAE,MAAM;CAKrC;AAED;;;;;GAKG;AACH,qBAAa,WAAY,YAAW,OAAO;IACzC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAA+B;IAEvD,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,eAAe,CAGrC;IAEF;;;OAGG;gBACS,OAAO,GAAE,kBAAuB;IAI5C;;;;;;;OAOG;IACG,SAAS,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC,eAAe,CAAC;IA+EpF;;;;;;;;OAQG;IACG,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,mBAAmB,GAAG,OAAO,CAAC,kBAAkB,CAAC;IA8E3F;;;;OAIG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAI9B;;;;OAIG;IACH,UAAU,IAAI,cAAc,EAAE;CAG/B"}
@@ -21,6 +21,7 @@ export class FetchEngine {
21
21
  options;
22
22
  static DEFAULT_OPTIONS = {
23
23
  markdown: false,
24
+ headers: {},
24
25
  };
25
26
  /**
26
27
  * Creates an instance of FetchEngine.
@@ -41,14 +42,24 @@ export class FetchEngine {
41
42
  const effectiveOptions = { ...this.options, ...options }; // Combine constructor and call options
42
43
  let response;
43
44
  try {
45
+ const baseHeaders = {
46
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
47
+ Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
48
+ "Accept-Language": "en-US,en;q=0.9",
49
+ };
50
+ // this.options.headers are headers passed to the constructor
51
+ const constructorHeaders = this.options.headers || {};
52
+ // options.headers are headers passed directly to the fetchHTML method
53
+ // options is the second argument to fetchHTML: async fetchHTML(url: string, options?: FetchEngineOptions)
54
+ const callSpecificHeaders = options?.headers || {};
55
+ const finalHeaders = {
56
+ ...baseHeaders,
57
+ ...constructorHeaders,
58
+ ...callSpecificHeaders, // Ensures callSpecificHeaders override constructorHeaders, which override baseHeaders
59
+ };
44
60
  response = await fetch(url, {
45
61
  redirect: "follow",
46
- headers: {
47
- // Standard browser-like headers
48
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
49
- Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
50
- "Accept-Language": "en-US,en;q=0.9",
51
- },
62
+ headers: finalHeaders,
52
63
  });
53
64
  if (!response.ok) {
54
65
  throw new FetchEngineHttpError(`HTTP error! status: ${response.status}`, response.status);
@@ -94,6 +105,79 @@ export class FetchEngine {
94
105
  throw new FetchError(`Fetch failed: ${message}`, "ERR_FETCH_FAILED", error instanceof Error ? error : undefined);
95
106
  }
96
107
  }
108
+ /**
109
+ * Fetches raw content from the specified URL (mimics standard fetch API).
110
+ *
111
+ * @param url The URL to fetch.
112
+ * @param options Optional fetch options.
113
+ * @returns A Promise resolving to a ContentFetchResult object.
114
+ * @throws {FetchEngineHttpError} If the HTTP response status is not ok (e.g., 404, 500).
115
+ * @throws {Error} For network errors or other fetch failures.
116
+ */
117
+ async fetchContent(url, options) {
118
+ let response;
119
+ try {
120
+ const baseHeaders = {
121
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
122
+ Accept: "*/*", // Accept any content type for raw content fetching
123
+ };
124
+ // Merge headers with the same precedence as fetchHTML
125
+ const constructorHeaders = this.options.headers || {};
126
+ const callSpecificHeaders = options?.headers || {};
127
+ const finalHeaders = {
128
+ ...baseHeaders,
129
+ ...constructorHeaders,
130
+ ...callSpecificHeaders,
131
+ };
132
+ response = await fetch(url, {
133
+ redirect: "follow",
134
+ headers: finalHeaders,
135
+ });
136
+ if (!response.ok) {
137
+ throw new FetchEngineHttpError(`HTTP error! status: ${response.status}`, response.status);
138
+ }
139
+ const contentTypeHeader = response.headers.get("content-type") || "application/octet-stream";
140
+ // Determine if content is text-based or binary
141
+ const isTextBased = contentTypeHeader.startsWith("text/") ||
142
+ contentTypeHeader.includes("json") ||
143
+ contentTypeHeader.includes("xml") ||
144
+ contentTypeHeader.includes("javascript") ||
145
+ contentTypeHeader.includes("html") ||
146
+ contentTypeHeader.includes("css");
147
+ let content;
148
+ if (isTextBased) {
149
+ content = await response.text();
150
+ }
151
+ else {
152
+ const arrayBuffer = await response.arrayBuffer();
153
+ content = Buffer.from(arrayBuffer);
154
+ }
155
+ // Extract title only if content is HTML
156
+ let title = null;
157
+ if (typeof content === "string" && contentTypeHeader.includes("html")) {
158
+ const titleMatch = content.match(/<title[^>]*>([^<]+)<\/title>/i);
159
+ title = titleMatch ? titleMatch[1].trim() : null;
160
+ }
161
+ return {
162
+ content,
163
+ contentType: contentTypeHeader,
164
+ title,
165
+ url: response.url, // Use the final URL after redirects
166
+ isFromCache: false,
167
+ statusCode: response.status,
168
+ error: undefined,
169
+ };
170
+ }
171
+ catch (error) {
172
+ // Re-throw specific known errors directly
173
+ if (error instanceof FetchEngineHttpError) {
174
+ throw error;
175
+ }
176
+ // Wrap other/unexpected errors
177
+ const message = error instanceof Error ? error.message : "Unknown fetch error";
178
+ throw new FetchError(`Content fetch failed: ${message}`, "ERR_FETCH_FAILED", error instanceof Error ? error : undefined);
179
+ }
180
+ }
97
181
  /**
98
182
  * Cleans up resources used by the engine.
99
183
  * For FetchEngine, this is a no-op as it doesn't manage persistent resources.
@@ -1 +1 @@
1
- {"version":3,"file":"FetchEngine.js","sourceRoot":"","sources":["../src/FetchEngine.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,iBAAiB,EAAE,MAAM,+BAA+B,CAAC,CAAC,uBAAuB;AAC1F,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC,CAAC,yBAAyB;AAEnE;;GAEG;AACH,MAAM,OAAO,oBAAqB,SAAQ,UAAU;IAGhC;IAFlB,YACE,OAAe,EACC,UAAkB;QAElC,KAAK,CAAC,OAAO,EAAE,gBAAgB,EAAE,SAAS,EAAE,UAAU,CAAC,CAAC;QAFxC,eAAU,GAAV,UAAU,CAAQ;QAGlC,IAAI,CAAC,IAAI,GAAG,sBAAsB,CAAC;IACrC,CAAC;CACF;AAED;;;;;GAKG;AACH,MAAM,OAAO,WAAW;IACL,OAAO,CAA+B;IAE/C,MAAM,CAAU,eAAe,GAAiC;QACtE,QAAQ,EAAE,KAAK;KAChB,CAAC;IAEF;;;OAGG;IACH,YAAY,UAA8B,EAAE;QAC1C,IAAI,CAAC,OAAO,GAAG,EAAE,GAAG,WAAW,CAAC,eAAe,EAAE,GAAG,OAAO,EAAE,CAAC;IAChE,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,SAAS,CAAC,GAAW,EAAE,OAA4B;QACvD,MAAM,gBAAgB,GAAG,EAAE,GAAG,IAAI,CAAC,OAAO,EAAE,GAAG,OAAO,EAAE,CAAC,CAAC,uCAAuC;QACjG,IAAI,QAAkB,CAAC;QACvB,IAAI,CAAC;YACH,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;gBAC1B,QAAQ,EAAE,QAAQ;gBAClB,OAAO,EAAE;oBACP,gCAAgC;oBAChC,YAAY,EACV,iHAAiH;oBACnH,MAAM,EAAE,kGAAkG;oBAC1G,iBAAiB,EAAE,gBAAgB;iBACpC;aACF,CAAC,CAAC;YAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;gBACjB,MAAM,IAAI,oBAAoB,CAAC,uBAAuB,QAAQ,CAAC,MAAM,EAAE,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;YAC5F,CAAC;YAED,MAAM,iBAAiB,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC;YAC/D,IAAI,CAAC,iBAAiB,IAAI,CAAC,iBAAiB,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;gBACnE,MAAM,IAAI,UAAU,CAAC,+BAA+B,EAAE,sBAAsB,CAAC,CAAC;YAChF,CAAC;YAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACnC,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC;YAC/D,MAAM,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;YAEvD,IAAI,YAAY,GAAG,IAAI,CAAC;YACxB,IAAI,gBAAgB,GAAwB,MAAM,CAAC;YAEnD,IAAI,gBAAgB,CAAC,QAAQ,EAAE,CAAC;gBAC9B,IAAI,CAAC;oBACH,MAAM,SAAS,GAAG,IAAI,iBAAiB,EAAE,CAAC;oBAC1C,YAAY,GAAG,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;oBACvC,gBAAgB,GAAG,UAAU,CAAC;gBAChC,CAAC;gBAAC,OAAO,eAAoB,EAAE,CAAC;oBAC9B,OAAO,CAAC,KAAK,CAAC,kCAAkC,GAAG,iBAAiB,EAAE,eAAe,CAAC,CAAC;oBACvF,gDAAgD;gBAClD,CAAC;YACH,CAAC;YAED,OAAO;gBACL,OAAO,EAAE,YAAY;gBACrB,WAAW,EAAE,gBAAgB;gBAC7B,KAAK,EAAE,KAAK;gBACZ,GAAG,EAAE,QAAQ,CAAC,GAAG,EAAE,oCAAoC;gBACvD,WAAW,EAAE,KAAK;gBAClB,UAAU,EAAE,QAAQ,CAAC,MAAM;gBAC3B,KAAK,EAAE,SAAS;aACjB,CAAC;QACJ,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,0CAA0C;YAC1C,IACE,KAAK,YAAY,oBAAoB;gBACrC,CAAC,KAAK,YAAY,UAAU,IAAI,KAAK,CAAC,IAAI,KAAK,sBAAsB,CAAC,EACtE,CAAC;gBACD,MAAM,KAAK,CAAC;YACd,CAAC;YACD,+BAA+B;YAC/B,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,qBAAqB,CAAC;YAC/E,MAAM,IAAI,UAAU,CAAC,iBAAiB,OAAO,EAAE,EAAE,kBAAkB,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;QACnH,CAAC;IACH,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,OAAO;QACX,OAAO,OAAO,CAAC,OAAO,EAAE,CAAC;IAC3B,CAAC;IAED;;;;OAIG;IACH,UAAU;QACR,OAAO,EAAE,CAAC;IACZ,CAAC"}
1
+ {"version":3,"file":"FetchEngine.js","sourceRoot":"","sources":["../src/FetchEngine.ts"],"names":[],"mappings":"AASA,OAAO,EAAE,iBAAiB,EAAE,MAAM,+BAA+B,CAAC,CAAC,uBAAuB;AAC1F,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC,CAAC,yBAAyB;AAEnE;;GAEG;AACH,MAAM,OAAO,oBAAqB,SAAQ,UAAU;IAGhC;IAFlB,YACE,OAAe,EACC,UAAkB;QAElC,KAAK,CAAC,OAAO,EAAE,gBAAgB,EAAE,SAAS,EAAE,UAAU,CAAC,CAAC;QAFxC,eAAU,GAAV,UAAU,CAAQ;QAGlC,IAAI,CAAC,IAAI,GAAG,sBAAsB,CAAC;IACrC,CAAC;CACF;AAED;;;;;GAKG;AACH,MAAM,OAAO,WAAW;IACL,OAAO,CAA+B;IAE/C,MAAM,CAAU,eAAe,GAAiC;QACtE,QAAQ,EAAE,KAAK;QACf,OAAO,EAAE,EAAE;KACZ,CAAC;IAEF;;;OAGG;IACH,YAAY,UAA8B,EAAE;QAC1C,IAAI,CAAC,OAAO,GAAG,EAAE,GAAG,WAAW,CAAC,eAAe,EAAE,GAAG,OAAO,EAAE,CAAC;IAChE,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,SAAS,CAAC,GAAW,EAAE,OAA4B;QACvD,MAAM,gBAAgB,GAAG,EAAE,GAAG,IAAI,CAAC,OAAO,EAAE,GAAG,OAAO,EAAE,CAAC,CAAC,uCAAuC;QACjG,IAAI,QAAkB,CAAC;QACvB,IAAI,CAAC;YACH,MAAM,WAAW,GAAG;gBAClB,YAAY,EACV,iHAAiH;gBACnH,MAAM,EAAE,kGAAkG;gBAC1G,iBAAiB,EAAE,gBAAgB;aACpC,CAAC;YAEF,6DAA6D;YAC7D,MAAM,kBAAkB,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,IAAI,EAAE,CAAC;YAEtD,sEAAsE;YACtE,0GAA0G;YAC1G,MAAM,mBAAmB,GAAG,OAAO,EAAE,OAAO,IAAI,EAAE,CAAC;YAEnD,MAAM,YAAY,GAAG;gBACnB,GAAG,WAAW;gBACd,GAAG,kBAAkB;gBACrB,GAAG,mBAAmB,EAAE,sFAAsF;aAC/G,CAAC;YAEF,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;gBAC1B,QAAQ,EAAE,QAAQ;gBAClB,OAAO,EAAE,YAAY;aACtB,CAAC,CAAC;YAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;gBACjB,MAAM,IAAI,oBAAoB,CAAC,uBAAuB,QAAQ,CAAC,MAAM,EAAE,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;YAC5F,CAAC;YAED,MAAM,iBAAiB,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC;YAC/D,IAAI,CAAC,iBAAiB,IAAI,CAAC,iBAAiB,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;gBACnE,MAAM,IAAI,UAAU,CAAC,+BAA+B,EAAE,sBAAsB,CAAC,CAAC;YAChF,CAAC;YAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACnC,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC;YAC/D,MAAM,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;YAEvD,IAAI,YAAY,GAAG,IAAI,CAAC;YACxB,IAAI,gBAAgB,GAAwB,MAAM,CAAC;YAEnD,IAAI,gBAAgB,CAAC,QAAQ,EAAE,CAAC;gBAC9B,IAAI,CAAC;oBACH,MAAM,SAAS,GAAG,IAAI,iBAAiB,EAAE,CAAC;oBAC1C,YAAY,GAAG,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;oBACvC,gBAAgB,GAAG,UAAU,CAAC;gBAChC,CAAC;gBAAC,OAAO,eAAoB,EAAE,CAAC;oBAC9B,OAAO,CAAC,KAAK,CAAC,kCAAkC,GAAG,iBAAiB,EAAE,eAAe,CAAC,CAAC;oBACvF,gDAAgD;gBAClD,CAAC;YACH,CAAC;YAED,OAAO;gBACL,OAAO,EAAE,YAAY;gBACrB,WAAW,EAAE,gBAAgB;gBAC7B,KAAK,EAAE,KAAK;gBACZ,GAAG,EAAE,QAAQ,CAAC,GAAG,EAAE,oCAAoC;gBACvD,WAAW,EAAE,KAAK;gBAClB,UAAU,EAAE,QAAQ,CAAC,MAAM;gBAC3B,KAAK,EAAE,SAAS;aACjB,CAAC;QACJ,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,0CAA0C;YAC1C,IACE,KAAK,YAAY,oBAAoB;gBACrC,CAAC,KAAK,YAAY,UAAU,IAAI,KAAK,CAAC,IAAI,KAAK,sBAAsB,CAAC,EACtE,CAAC;gBACD,MAAM,KAAK,CAAC;YACd,CAAC;YACD,+BAA+B;YAC/B,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,qBAAqB,CAAC;YAC/E,MAAM,IAAI,UAAU,CAAC,iBAAiB,OAAO,EAAE,EAAE,kBAAkB,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;QACnH,CAAC;IACH,CAAC;IAED;;;;;;;;OAQG;IACH,KAAK,CAAC,YAAY,CAAC,GAAW,EAAE,OAA6B;QAC3D,IAAI,QAAkB,CAAC;QACvB,IAAI,CAAC;YACH,MAAM,WAAW,GAAG;gBAClB,YAAY,EACV,iHAAiH;gBACnH,MAAM,EAAE,KAAK,EAAE,mDAAmD;aACnE,CAAC;YAEF,sDAAsD;YACtD,MAAM,kBAAkB,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,IAAI,EAAE,CAAC;YACtD,MAAM,mBAAmB,GAAG,OAAO,EAAE,OAAO,IAAI,EAAE,CAAC;YAEnD,MAAM,YAAY,GAAG;gBACnB,GAAG,WAAW;gBACd,GAAG,kBAAkB;gBACrB,GAAG,mBAAmB;aACvB,CAAC;YAEF,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;gBAC1B,QAAQ,EAAE,QAAQ;gBAClB,OAAO,EAAE,YAAY;aACtB,CAAC,CAAC;YAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;gBACjB,MAAM,IAAI,oBAAoB,CAAC,uBAAuB,QAAQ,CAAC,MAAM,EAAE,EAAE,QAAQ,CAAC,MAAM,CAAC,CAAC;YAC5F,CAAC;YAED,MAAM,iBAAiB,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,0BAA0B,CAAC;YAE7F,+CAA+C;YAC/C,MAAM,WAAW,GACf,iBAAiB,CAAC,UAAU,CAAC,OAAO,CAAC;gBACrC,iBAAiB,CAAC,QAAQ,CAAC,MAAM,CAAC;gBAClC,iBAAiB,CAAC,QAAQ,CAAC,KAAK,CAAC;gBACjC,iBAAiB,CAAC,QAAQ,CAAC,YAAY,CAAC;gBACxC,iBAAiB,CAAC,QAAQ,CAAC,MAAM,CAAC;gBAClC,iBAAiB,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;YAEpC,IAAI,OAAwB,CAAC;YAC7B,IAAI,WAAW,EAAE,CAAC;gBAChB,OAAO,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YAClC,CAAC;iBAAM,CAAC;gBACN,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC;gBACjD,OAAO,GAAG,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YACrC,CAAC;YAED,wCAAwC;YACxC,IAAI,KAAK,GAAkB,IAAI,CAAC;YAChC,IAAI,OAAO,OAAO,KAAK,QAAQ,IAAI,iBAAiB,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;gBACtE,MAAM,UAAU,GAAG,OAAO,CAAC,KAAK,CAAC,+BAA+B,CAAC,CAAC;gBAClE,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;YACnD,CAAC;YAED,OAAO;gBACL,OAAO;gBACP,WAAW,EAAE,iBAAiB;gBAC9B,KAAK;gBACL,GAAG,EAAE,QAAQ,CAAC,GAAG,EAAE,oCAAoC;gBACvD,WAAW,EAAE,KAAK;gBAClB,UAAU,EAAE,QAAQ,CAAC,MAAM;gBAC3B,KAAK,EAAE,SAAS;aACjB,CAAC;QACJ,CAAC;QAAC,OAAO,KAAU,EAAE,CAAC;YACpB,0CAA0C;YAC1C,IAAI,KAAK,YAAY,oBAAoB,EAAE,CAAC;gBAC1C,MAAM,KAAK,CAAC;YACd,CAAC;YACD,+BAA+B;YAC/B,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,qBAAqB,CAAC;YAC/E,MAAM,IAAI,UAAU,CAClB,yBAAyB,OAAO,EAAE,EAClC,kBAAkB,EAClB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAC3C,CAAC;QACJ,CAAC;IACH,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,OAAO;QACX,OAAO,OAAO,CAAC,OAAO,EAAE,CAAC;IAC3B,CAAC;IAED;;;;OAIG;IACH,UAAU;QACR,OAAO,EAAE,CAAC;IACZ,CAAC"}
@@ -1,5 +1,5 @@
1
1
  import type { IEngine } from "./IEngine.js";
2
- import type { HTMLFetchResult, PlaywrightEngineConfig, FetchOptions, BrowserMetrics } from "./types.js";
2
+ import type { HTMLFetchResult, ContentFetchResult, ContentFetchOptions, PlaywrightEngineConfig, FetchOptions, BrowserMetrics } from "./types.js";
3
3
  /**
4
4
  * HybridEngine - Tries FetchEngine first, falls back to PlaywrightEngine on failure.
5
5
  */
@@ -11,6 +11,17 @@ export declare class HybridEngine implements IEngine {
11
11
  constructor(config?: PlaywrightEngineConfig);
12
12
  private _isSpaShell;
13
13
  fetchHTML(url: string, options?: FetchOptions): Promise<HTMLFetchResult>;
14
+ /**
15
+ * Fetches raw content from the specified URL using the hybrid approach.
16
+ * Tries FetchEngine first, falls back to PlaywrightEngine on failure.
17
+ * Mimics standard fetch API behavior.
18
+ *
19
+ * @param url The URL to fetch content from.
20
+ * @param options Optional fetch options.
21
+ * @returns A Promise resolving to a ContentFetchResult object.
22
+ * @throws {FetchError} If both engines fail to fetch the content.
23
+ */
24
+ fetchContent(url: string, options?: ContentFetchOptions): Promise<ContentFetchResult>;
14
25
  /**
15
26
  * Delegates getMetrics to the PlaywrightEngine.
16
27
  */
@@ -1 +1 @@
1
- {"version":3,"file":"HybridEngine.d.ts","sourceRoot":"","sources":["../src/HybridEngine.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAC5C,OAAO,KAAK,EAAE,eAAe,EAAE,sBAAsB,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAExG;;GAEG;AACH,qBAAa,YAAa,YAAW,OAAO;IAC1C,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAc;IAC1C,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAmB;IACpD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAyB;IAChD,OAAO,CAAC,QAAQ,CAAC,sBAAsB,CAAsB;gBAEjD,MAAM,GAAE,sBAA2B;IAU/C,OAAO,CAAC,WAAW;IAkBb,SAAS,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,YAAiB,GAAG,OAAO,CAAC,eAAe,CAAC;IA+DlF;;OAEG;IACH,UAAU,IAAI,cAAc,EAAE;IAI9B;;OAEG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAM/B"}
1
+ {"version":3,"file":"HybridEngine.d.ts","sourceRoot":"","sources":["../src/HybridEngine.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAC5C,OAAO,KAAK,EACV,eAAe,EACf,kBAAkB,EAClB,mBAAmB,EACnB,sBAAsB,EACtB,YAAY,EACZ,cAAc,EACf,MAAM,YAAY,CAAC;AAEpB;;GAEG;AACH,qBAAa,YAAa,YAAW,OAAO;IAC1C,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAc;IAC1C,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAmB;IACpD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAyB;IAChD,OAAO,CAAC,QAAQ,CAAC,sBAAsB,CAAsB;gBAEjD,MAAM,GAAE,sBAA2B;IAU/C,OAAO,CAAC,WAAW;IAkBb,SAAS,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,YAAiB,GAAG,OAAO,CAAC,eAAe,CAAC;IAiFlF;;;;;;;;;OASG;IACG,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,mBAAwB,GAAG,OAAO,CAAC,kBAAkB,CAAC;IAqC/F;;OAEG;IACH,UAAU,IAAI,cAAc,EAAE;IAI9B;;OAEG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAM/B"}
@@ -12,7 +12,7 @@ export class HybridEngine {
12
12
  // Pass relevant config parts to each engine
13
13
  // FetchEngine only takes markdown option from the shared config
14
14
  // spaMode from config is primarily for PlaywrightEngine, but HybridEngine uses it for decision making.
15
- this.fetchEngine = new FetchEngine({ markdown: config.markdown });
15
+ this.fetchEngine = new FetchEngine({ markdown: config.markdown, headers: config.headers });
16
16
  this.playwrightEngine = new PlaywrightEngine(config);
17
17
  this.config = config; // Store for merging later
18
18
  this.playwrightOnlyPatterns = config.playwrightOnlyPatterns || [];
@@ -45,11 +45,18 @@ export class HybridEngine {
45
45
  ? this.config.markdown
46
46
  : false;
47
47
  // Prepare options for PlaywrightEngine, to be used in fallback scenarios or direct calls
48
+ // Retrieve headers from constructor config and per-request options
49
+ const constructorHeaders = this.config.headers || {};
50
+ const requestSpecificHeaders = options.headers || {}; // 'options' is the FetchOptions argument to HybridEngine.fetchHTML
51
+ // Merge them, with request-specific headers taking precedence
52
+ const mergedHeadersForPlaywright = { ...constructorHeaders, ...requestSpecificHeaders };
53
+ // Construct playwrightOptions, now with explicitly merged headers
48
54
  const playwrightOptions = {
49
- ...this.config, // Start with base config given to HybridEngine (e.g. spaRenderDelayMs)
50
- ...options, // Apply all per-request overrides first
51
- markdown: effectiveMarkdown, // Then ensure HybridEngine's resolved markdown is set
52
- spaMode: effectiveSpaMode, // Then ensure HybridEngine's resolved spaMode is set
55
+ ...this.config, // Spread config for other options (like spaRenderDelayMs, etc.)
56
+ ...options, // Spread options for other options (like fastMode, etc.)
57
+ headers: mergedHeadersForPlaywright, // Assign the correctly merged headers
58
+ markdown: effectiveMarkdown,
59
+ spaMode: effectiveSpaMode,
53
60
  };
54
61
  // Check playwrightOnlyPatterns first
55
62
  for (const pattern of this.playwrightOnlyPatterns) {
@@ -63,7 +70,12 @@ export class HybridEngine {
63
70
  }
64
71
  }
65
72
  try {
66
- const fetchResult = await this.fetchEngine.fetchHTML(url);
73
+ // Prepare options for FetchEngine call
74
+ const fetchEngineCallSpecificOptions = {
75
+ markdown: effectiveMarkdown, // Pass the resolved markdown setting
76
+ headers: options.headers, // Pass only the request-specific headers. FetchEngine will merge these with its own constructor headers.
77
+ };
78
+ const fetchResult = await this.fetchEngine.fetchHTML(url, fetchEngineCallSpecificOptions);
67
79
  // If FetchEngine succeeded AND spaMode is active, check if it's just a shell
68
80
  if (effectiveSpaMode && fetchResult && fetchResult.content) {
69
81
  if (this._isSpaShell(fetchResult.content)) {
@@ -88,6 +100,46 @@ export class HybridEngine {
88
100
  }
89
101
  }
90
102
  }
103
+ /**
104
+ * Fetches raw content from the specified URL using the hybrid approach.
105
+ * Tries FetchEngine first, falls back to PlaywrightEngine on failure.
106
+ * Mimics standard fetch API behavior.
107
+ *
108
+ * @param url The URL to fetch content from.
109
+ * @param options Optional fetch options.
110
+ * @returns A Promise resolving to a ContentFetchResult object.
111
+ * @throws {FetchError} If both engines fail to fetch the content.
112
+ */
113
+ async fetchContent(url, options = {}) {
114
+ // Check playwrightOnlyPatterns first
115
+ for (const pattern of this.playwrightOnlyPatterns) {
116
+ if (typeof pattern === "string" && url.includes(pattern)) {
117
+ console.warn(`HybridEngine: URL ${url} matches string pattern "${pattern}". Using PlaywrightEngine directly for content fetch.`);
118
+ return this.playwrightEngine.fetchContent(url, options);
119
+ }
120
+ else if (pattern instanceof RegExp && pattern.test(url)) {
121
+ console.warn(`HybridEngine: URL ${url} matches regex pattern "${pattern.toString()}". Using PlaywrightEngine directly for content fetch.`);
122
+ return this.playwrightEngine.fetchContent(url, options);
123
+ }
124
+ }
125
+ try {
126
+ // Try FetchEngine first
127
+ const fetchResult = await this.fetchEngine.fetchContent(url, options);
128
+ return fetchResult;
129
+ }
130
+ catch (fetchError) {
131
+ console.warn(`HybridEngine: FetchEngine failed for content fetch ${url}: ${fetchError.message}. Falling back to PlaywrightEngine.`);
132
+ try {
133
+ // Fallback to PlaywrightEngine
134
+ const playwrightResult = await this.playwrightEngine.fetchContent(url, options);
135
+ return playwrightResult;
136
+ }
137
+ catch (playwrightError) {
138
+ console.error(`HybridEngine: PlaywrightEngine fallback also failed for content fetch ${url}: ${playwrightError.message}`);
139
+ throw playwrightError; // Throw the Playwright error as it's the last one encountered
140
+ }
141
+ }
142
+ }
91
143
  /**
92
144
  * Delegates getMetrics to the PlaywrightEngine.
93
145
  */
@@ -1 +1 @@
1
- {"version":3,"file":"HybridEngine.js","sourceRoot":"","sources":["../src/HybridEngine.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAC/C,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAIzD;;GAEG;AACH,MAAM,OAAO,YAAY;IACN,WAAW,CAAc;IACzB,gBAAgB,CAAmB;IACnC,MAAM,CAAyB,CAAC,sDAAsD;IACtF,sBAAsB,CAAsB;IAE7D,YAAY,SAAiC,EAAE;QAC7C,4CAA4C;QAC5C,gEAAgE;QAChE,uGAAuG;QACvG,IAAI,CAAC,WAAW,GAAG,IAAI,WAAW,CAAC,EAAE,QAAQ,EAAE,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC;QAClE,IAAI,CAAC,gBAAgB,GAAG,IAAI,gBAAgB,CAAC,MAAM,CAAC,CAAC;QACrD,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,CAAC,0BAA0B;QAChD,IAAI,CAAC,sBAAsB,GAAG,MAAM,CAAC,sBAAsB,IAAI,EAAE,CAAC;IACpE,CAAC;IAEO,WAAW,CAAC,WAAmB;QACrC,IAAI,CAAC,WAAW,IAAI,WAAW,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YAC7C,+CAA+C;YAC/C,iFAAiF;YACjF,IAAI,WAAW,CAAC,QAAQ,CAAC,YAAY,CAAC;gBAAE,OAAO,IAAI,CAAC;QACtD,CAAC;QACD,2BAA2B;QAC3B,IAAI,WAAW,CAAC,QAAQ,CAAC,YAAY,CAAC;YAAE,OAAO,IAAI,CAAC;QAEpD,mCAAmC;QACnC,IAAI,qDAAqD,CAAC,IAAI,CAAC,WAAW,CAAC;YAAE,OAAO,IAAI,CAAC;QAEzF,mDAAmD;QACnD,IAAI,sBAAsB,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,WAAW,CAAC;YAAE,OAAO,IAAI,CAAC;QAEhG,OAAO,KAAK,CAAC;IACf,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,GAAW,EAAE,UAAwB,EAAE;QACrD,oDAAoD;QACpD,gHAAgH;QAChH,MAAM,gBAAgB,GACpB,OAAO,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC;QACpH,MAAM,iBAAiB,GACrB,OAAO,CAAC,QAAQ,KAAK,SAAS;YAC5B,CAAC,CAAC,OAAO,CAAC,QAAQ;YAClB,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,KAAK,SAAS;gBAClC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ;gBACtB,CAAC,CAAC,KAAK,CAAC;QAEd,yFAAyF;QACzF,MAAM,iBAAiB,GAA6D;YAClF,GAAG,IAAI,CAAC,MAAM,EAAE,uEAAuE;YACvF,GAAG,OAAO,EAAE,wCAAwC;YACpD,QAAQ,EAAE,iBAAiB,EAAE,sDAAsD;YACnF,OAAO,EAAE,gBAAgB,EAAE,qDAAqD;SACjF,CAAC;QAEF,qCAAqC;QACrC,KAAK,MAAM,OAAO,IAAI,IAAI,CAAC,sBAAsB,EAAE,CAAC;YAClD,IAAI,OAAO,OAAO,KAAK,QAAQ,IAAI,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;gBACzD,OAAO,CAAC,IAAI,CAAC,qBAAqB,GAAG,4BAA4B,OAAO,qCAAqC,CAAC,CAAC;gBAC/G,OAAO,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,GAAG,EAAE,iBAAiB,CAAC,CAAC;YACjE,CAAC;iBAAM,IAAI,OAAO,YAAY,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC1D,OAAO,CAAC,IAAI,CACV,qBAAqB,GAAG,2BAA2B,OAAO,CAAC,QAAQ,EAAE,qCAAqC,CAC3G,CAAC;gBACF,OAAO,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,GAAG,EAAE,iBAAiB,CAAC,CAAC;YACjE,CAAC;QACH,CAAC;QAED,IAAI,CAAC;YACH,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;YAE1D,6EAA6E;YAC7E,IAAI,gBAAgB,IAAI,WAAW,IAAI,WAAW,CAAC,OAAO,EAAE,CAAC;gBAC3D,IAAI,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,OAAO,CAAC,EAAE,CAAC;oBAC1C,OAAO,CAAC,IAAI,CACV,2DAA2D,GAAG,wCAAwC,CACvG,CAAC;oBACF,yEAAyE;oBACzE,OAAO,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,GAAG,EAAE,iBAAiB,CAAC,CAAC;gBACjE,CAAC;YACH,CAAC;YACD,wFAAwF;YACxF,OAAO,WAAW,CAAC;QACrB,CAAC;QAAC,OAAO,UAAe,EAAE,CAAC;YACzB,OAAO,CAAC,IAAI,CACV,wCAAwC,GAAG,KAAK,UAAU,CAAC,OAAO,qCAAqC,CACxG,CAAC;YACF,IAAI,CAAC;gBACH,yEAAyE;gBACzE,MAAM,gBAAgB,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,GAAG,EAAE,iBAAiB,CAAC,CAAC;gBACvF,OAAO,gBAAgB,CAAC;YAC1B,CAAC;YAAC,OAAO,eAAoB,EAAE,CAAC;gBAC9B,OAAO,CAAC,KAAK,CAAC,2DAA2D,GAAG,KAAK,eAAe,CAAC,OAAO,EAAE,CAAC,CAAC;gBAC5G,MAAM,eAAe,CAAC,CAAC,8DAA8D;YACvF,CAAC;QACH,CAAC;IACH,CAAC;IAED;;OAEG;IACH,UAAU;QACR,OAAO,IAAI,CAAC,gBAAgB,CAAC,UAAU,EAAE,CAAC;IAC5C,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO;QACX,MAAM,OAAO,CAAC,UAAU,CAAC;YACvB,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,EAAE,yCAAyC;YACrE,IAAI,CAAC,gBAAgB,CAAC,OAAO,EAAE;SAChC,CAAC,CAAC;IACL,CAAC;CACF"}
1
+ {"version":3,"file":"HybridEngine.js","sourceRoot":"","sources":["../src/HybridEngine.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAC/C,OAAO,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAWzD;;GAEG;AACH,MAAM,OAAO,YAAY;IACN,WAAW,CAAc;IACzB,gBAAgB,CAAmB;IACnC,MAAM,CAAyB,CAAC,sDAAsD;IACtF,sBAAsB,CAAsB;IAE7D,YAAY,SAAiC,EAAE;QAC7C,4CAA4C;QAC5C,gEAAgE;QAChE,uGAAuG;QACvG,IAAI,CAAC,WAAW,GAAG,IAAI,WAAW,CAAC,EAAE,QAAQ,EAAE,MAAM,CAAC,QAAQ,EAAE,OAAO,EAAE,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC;QAC3F,IAAI,CAAC,gBAAgB,GAAG,IAAI,gBAAgB,CAAC,MAAM,CAAC,CAAC;QACrD,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,CAAC,0BAA0B;QAChD,IAAI,CAAC,sBAAsB,GAAG,MAAM,CAAC,sBAAsB,IAAI,EAAE,CAAC;IACpE,CAAC;IAEO,WAAW,CAAC,WAAmB;QACrC,IAAI,CAAC,WAAW,IAAI,WAAW,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YAC7C,+CAA+C;YAC/C,iFAAiF;YACjF,IAAI,WAAW,CAAC,QAAQ,CAAC,YAAY,CAAC;gBAAE,OAAO,IAAI,CAAC;QACtD,CAAC;QACD,2BAA2B;QAC3B,IAAI,WAAW,CAAC,QAAQ,CAAC,YAAY,CAAC;YAAE,OAAO,IAAI,CAAC;QAEpD,mCAAmC;QACnC,IAAI,qDAAqD,CAAC,IAAI,CAAC,WAAW,CAAC;YAAE,OAAO,IAAI,CAAC;QAEzF,mDAAmD;QACnD,IAAI,sBAAsB,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,WAAW,CAAC;YAAE,OAAO,IAAI,CAAC;QAEhG,OAAO,KAAK,CAAC;IACf,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,GAAW,EAAE,UAAwB,EAAE;QACrD,oDAAoD;QACpD,gHAAgH;QAChH,MAAM,gBAAgB,GACpB,OAAO,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC;QACpH,MAAM,iBAAiB,GACrB,OAAO,CAAC,QAAQ,KAAK,SAAS;YAC5B,CAAC,CAAC,OAAO,CAAC,QAAQ;YAClB,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,KAAK,SAAS;gBAClC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ;gBACtB,CAAC,CAAC,KAAK,CAAC;QAEd,yFAAyF;QACzF,mEAAmE;QACnE,MAAM,kBAAkB,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,EAAE,CAAC;QACrD,MAAM,sBAAsB,GAAG,OAAO,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC,mEAAmE;QAEzH,8DAA8D;QAC9D,MAAM,0BAA0B,GAAG,EAAE,GAAG,kBAAkB,EAAE,GAAG,sBAAsB,EAAE,CAAC;QAExF,kEAAkE;QAClE,MAAM,iBAAiB,GAInB;YACF,GAAG,IAAI,CAAC,MAAM,EAAE,gEAAgE;YAChF,GAAG,OAAO,EAAE,yDAAyD;YACrE,OAAO,EAAE,0BAA0B,EAAE,sCAAsC;YAC3E,QAAQ,EAAE,iBAAiB;YAC3B,OAAO,EAAE,gBAAgB;SAC1B,CAAC;QAEF,qCAAqC;QACrC,KAAK,MAAM,OAAO,IAAI,IAAI,CAAC,sBAAsB,EAAE,CAAC;YAClD,IAAI,OAAO,OAAO,KAAK,QAAQ,IAAI,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;gBACzD,OAAO,CAAC,IAAI,CAAC,qBAAqB,GAAG,4BAA4B,OAAO,qCAAqC,CAAC,CAAC;gBAC/G,OAAO,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,GAAG,EAAE,iBAAiB,CAAC,CAAC;YACjE,CAAC;iBAAM,IAAI,OAAO,YAAY,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC1D,OAAO,CAAC,IAAI,CACV,qBAAqB,GAAG,2BAA2B,OAAO,CAAC,QAAQ,EAAE,qCAAqC,CAC3G,CAAC;gBACF,OAAO,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,GAAG,EAAE,iBAAiB,CAAC,CAAC;YACjE,CAAC;QACH,CAAC;QAED,IAAI,CAAC;YACH,uCAAuC;YACvC,MAAM,8BAA8B,GAAiB;gBACnD,QAAQ,EAAE,iBAAiB,EAAE,qCAAqC;gBAClE,OAAO,EAAE,OAAO,CAAC,OAAO,EAAE,yGAAyG;aACpI,CAAC;YACF,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,SAAS,CAAC,GAAG,EAAE,8BAA8B,CAAC,CAAC;YAE1F,6EAA6E;YAC7E,IAAI,gBAAgB,IAAI,WAAW,IAAI,WAAW,CAAC,OAAO,EAAE,CAAC;gBAC3D,IAAI,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,OAAO,CAAC,EAAE,CAAC;oBAC1C,OAAO,CAAC,IAAI,CACV,2DAA2D,GAAG,wCAAwC,CACvG,CAAC;oBACF,yEAAyE;oBACzE,OAAO,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,GAAG,EAAE,iBAAiB,CAAC,CAAC;gBACjE,CAAC;YACH,CAAC;YACD,wFAAwF;YACxF,OAAO,WAAW,CAAC;QACrB,CAAC;QAAC,OAAO,UAAe,EAAE,CAAC;YACzB,OAAO,CAAC,IAAI,CACV,wCAAwC,GAAG,KAAK,UAAU,CAAC,OAAO,qCAAqC,CACxG,CAAC;YACF,IAAI,CAAC;gBACH,yEAAyE;gBACzE,MAAM,gBAAgB,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC,SAAS,CAAC,GAAG,EAAE,iBAAiB,CAAC,CAAC;gBACvF,OAAO,gBAAgB,CAAC;YAC1B,CAAC;YAAC,OAAO,eAAoB,EAAE,CAAC;gBAC9B,OAAO,CAAC,KAAK,CAAC,2DAA2D,GAAG,KAAK,eAAe,CAAC,OAAO,EAAE,CAAC,CAAC;gBAC5G,MAAM,eAAe,CAAC,CAAC,8DAA8D;YACvF,CAAC;QACH,CAAC;IACH,CAAC;IAED;;;;;;;;;OASG;IACH,KAAK,CAAC,YAAY,CAAC,GAAW,EAAE,UAA+B,EAAE;QAC/D,qCAAqC;QACrC,KAAK,MAAM,OAAO,IAAI,IAAI,CAAC,sBAAsB,EAAE,CAAC;YAClD,IAAI,OAAO,OAAO,KAAK,QAAQ,IAAI,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;gBACzD,OAAO,CAAC,IAAI,CACV,qBAAqB,GAAG,4BAA4B,OAAO,uDAAuD,CACnH,CAAC;gBACF,OAAO,IAAI,CAAC,gBAAgB,CAAC,YAAY,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;YAC1D,CAAC;iBAAM,IAAI,OAAO,YAAY,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC1D,OAAO,CAAC,IAAI,CACV,qBAAqB,GAAG,2BAA2B,OAAO,CAAC,QAAQ,EAAE,uDAAuD,CAC7H,CAAC;gBACF,OAAO,IAAI,CAAC,gBAAgB,CAAC,YAAY,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;YAC1D,CAAC;QACH,CAAC;QAED,IAAI,CAAC;YACH,wBAAwB;YACxB,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,YAAY,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;YACtE,OAAO,WAAW,CAAC;QACrB,CAAC;QAAC,OAAO,UAAe,EAAE,CAAC;YACzB,OAAO,CAAC,IAAI,CACV,sDAAsD,GAAG,KAAK,UAAU,CAAC,OAAO,qCAAqC,CACtH,CAAC;YACF,IAAI,CAAC;gBACH,+BAA+B;gBAC/B,MAAM,gBAAgB,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC,YAAY,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;gBAChF,OAAO,gBAAgB,CAAC;YAC1B,CAAC;YAAC,OAAO,eAAoB,EAAE,CAAC;gBAC9B,OAAO,CAAC,KAAK,CACX,yEAAyE,GAAG,KAAK,eAAe,CAAC,OAAO,EAAE,CAC3G,CAAC;gBACF,MAAM,eAAe,CAAC,CAAC,8DAA8D;YACvF,CAAC;QACH,CAAC;IACH,CAAC;IAED;;OAEG;IACH,UAAU;QACR,OAAO,IAAI,CAAC,gBAAgB,CAAC,UAAU,EAAE,CAAC;IAC5C,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,OAAO;QACX,MAAM,OAAO,CAAC,UAAU,CAAC;YACvB,IAAI,CAAC,WAAW,CAAC,OAAO,EAAE,EAAE,yCAAyC;YACrE,IAAI,CAAC,gBAAgB,CAAC,OAAO,EAAE;SAChC,CAAC,CAAC;IACL,CAAC;CACF"}
package/dist/IEngine.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import type { HTMLFetchResult, BrowserMetrics } from "./types.js";
1
+ import type { HTMLFetchResult, ContentFetchResult, ContentFetchOptions, BrowserMetrics } from "./types.js";
2
2
  /**
3
3
  * Interface for browser engines that can fetch HTML content from URLs
4
4
  */
@@ -9,6 +9,13 @@ export interface IEngine {
9
9
  * @returns A promise that resolves to an HTMLFetchResult
10
10
  */
11
11
  fetchHTML(url: string): Promise<HTMLFetchResult>;
12
+ /**
13
+ * Fetches raw content from a URL (mimics standard fetch API)
14
+ * @param url The URL to fetch
15
+ * @param options Optional fetch options
16
+ * @returns A promise that resolves to a ContentFetchResult
17
+ */
18
+ fetchContent(url: string, options?: ContentFetchOptions): Promise<ContentFetchResult>;
12
19
  /**
13
20
  * Cleans up resources used by the engine
14
21
  */
@@ -1 +1 @@
1
- {"version":3,"file":"IEngine.d.ts","sourceRoot":"","sources":["../src/IEngine.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAElE;;GAEG;AACH,MAAM,WAAW,OAAO;IACtB;;;;OAIG;IACH,SAAS,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC,CAAC;IAEjD;;OAEG;IACH,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IAEzB;;;OAGG;IACH,UAAU,IAAI,cAAc,EAAE,CAAC;CAChC"}
1
+ {"version":3,"file":"IEngine.d.ts","sourceRoot":"","sources":["../src/IEngine.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,kBAAkB,EAAE,mBAAmB,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAE3G;;GAEG;AACH,MAAM,WAAW,OAAO;IACtB;;;;OAIG;IACH,SAAS,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC,CAAC;IAEjD;;;;;OAKG;IACH,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,mBAAmB,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAAC;IAEtF;;OAEG;IACH,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;IAEzB;;;OAGG;IACH,UAAU,IAAI,cAAc,EAAE,CAAC;CAChC"}
@@ -1,4 +1,4 @@
1
- import type { HTMLFetchResult, BrowserMetrics, PlaywrightEngineConfig, FetchOptions } from "./types.js";
1
+ import type { HTMLFetchResult, ContentFetchResult, ContentFetchOptions, BrowserMetrics, PlaywrightEngineConfig, FetchOptions } from "./types.js";
2
2
  import type { IEngine } from "./IEngine.js";
3
3
  /**
4
4
  * PlaywrightEngine - Fetches HTML using a managed pool of headless Playwright browser instances.
@@ -115,5 +115,35 @@ export declare class PlaywrightEngine implements IEngine {
115
115
  */
116
116
  getMetrics(): BrowserMetrics[];
117
117
  private shouldUseHeadedMode;
118
+ /**
119
+ * Fetches raw content from the specified URL using Playwright with HTTP fallback.
120
+ * Mimics standard fetch API behavior.
121
+ *
122
+ * @param url The URL to fetch content from.
123
+ * @param options Optional fetch options.
124
+ * @returns A Promise resolving to a ContentFetchResult object.
125
+ * @throws {FetchError} If the fetch operation fails after all retries.
126
+ */
127
+ fetchContent(url: string, options?: ContentFetchOptions): Promise<ContentFetchResult>;
128
+ /**
129
+ * Check cache for content fetch results.
130
+ */
131
+ private checkContentCache;
132
+ /**
133
+ * Add content fetch result to cache.
134
+ */
135
+ private addContentToCache;
136
+ /**
137
+ * Recursive fetch implementation with retry logic for content fetching.
138
+ */
139
+ private _fetchContentRecursive;
140
+ /**
141
+ * HTTP fallback for content fetching.
142
+ */
143
+ private _attemptContentHttpFallback;
144
+ /**
145
+ * Fetch content using Playwright browser.
146
+ */
147
+ private fetchContentWithPlaywright;
118
148
  }
119
149
  //# sourceMappingURL=PlaywrightEngine.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"PlaywrightEngine.d.ts","sourceRoot":"","sources":["../src/PlaywrightEngine.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,cAAc,EAAE,sBAAsB,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AACxG,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAyC5C;;;;;;GAMG;AACH,qBAAa,gBAAiB,YAAW,OAAO;IAC9C,OAAO,CAAC,WAAW,CAAsC;IACzD,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAS;IAC/B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAsC;IAC5D,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAiC;IAGxD,OAAO,CAAC,uBAAuB,CAAkB;IACjD,OAAO,CAAC,iBAAiB,CAAkB;IAC3C,OAAO,CAAC,mBAAmB,CAA0B;IAGrD,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,cAAc,CAsBpC;IAEF;;;;;OAKG;gBACS,MAAM,GAAE,sBAA2B;IAM/C;;OAEG;YACW,qBAAqB;IAwCnC;;;OAGG;YACW,yBAAyB;IAiEvC,OAAO,CAAC,UAAU;IAalB;;OAEG;YACW,WAAW;IAazB;;OAEG;YACW,qBAAqB;IAwCnC;;OAEG;IACH,OAAO,CAAC,UAAU;IAUlB;;;;;;;;;OASG;IACG,SAAS,CACb,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,YAAY,GAAG;QAAE,QAAQ,CAAC,EAAE,OAAO,CAAC;QAAC,OAAO,CAAC,EAAE,OAAO,CAAA;KAAO,GACrE,OAAO,CAAC,eAAe,CAAC;IAc3B;;;;;;;OAOG;IACH,OAAO,CAAC,iBAAiB;IAmDzB;;;;;;;OAOG;YACW,oBAAoB;IAiClC;;;;;;;;OAQG;YACW,6BAA6B;IAmC3C;;;;;;;OAOG;YACW,eAAe;IAgH7B;;;OAGG;YACW,mBAAmB;YAqKnB,kBAAkB;IAyChC;;;;;OAKG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAoB9B;;;OAGG;IACH,UAAU,IAAI,cAAc,EAAE;IAQ9B,OAAO,CAAC,mBAAmB;CAS5B"}
1
+ {"version":3,"file":"PlaywrightEngine.d.ts","sourceRoot":"","sources":["../src/PlaywrightEngine.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,eAAe,EACf,kBAAkB,EAClB,mBAAmB,EACnB,cAAc,EACd,sBAAsB,EACtB,YAAY,EACb,MAAM,YAAY,CAAC;AACpB,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAyC5C;;;;;;GAMG;AACH,qBAAa,gBAAiB,YAAW,OAAO;IAC9C,OAAO,CAAC,WAAW,CAAsC;IACzD,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAS;IAC/B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAsC;IAC5D,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAiC;IAGxD,OAAO,CAAC,uBAAuB,CAAkB;IACjD,OAAO,CAAC,iBAAiB,CAAkB;IAC3C,OAAO,CAAC,mBAAmB,CAA0B;IAGrD,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,cAAc,CAuBpC;IAEF;;;;;OAKG;gBACS,MAAM,GAAE,sBAA2B;IAM/C;;OAEG;YACW,qBAAqB;IAwCnC;;;OAGG;YACW,yBAAyB;IAiEvC,OAAO,CAAC,UAAU;IAalB;;OAEG;YACW,WAAW;IAazB;;OAEG;YACW,qBAAqB;IAwCnC;;OAEG;IACH,OAAO,CAAC,UAAU;IAUlB;;;;;;;;;OASG;IACG,SAAS,CACb,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,YAAY,GAAG;QAAE,QAAQ,CAAC,EAAE,OAAO,CAAC;QAAC,OAAO,CAAC,EAAE,OAAO,CAAA;KAAO,GACrE,OAAO,CAAC,eAAe,CAAC;IAoB3B;;;;;;;OAOG;IACH,OAAO,CAAC,iBAAiB;IAoDzB;;;;;;;OAOG;YACW,oBAAoB;IAkClC;;;;;;;;OAQG;YACW,6BAA6B;IAmC3C;;;;;;;OAOG;YACW,eAAe;IAkH7B;;;OAGG;YACW,mBAAmB;YA2KnB,kBAAkB;IAyChC;;;;;OAKG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;IAoB9B;;;OAGG;IACH,UAAU,IAAI,cAAc,EAAE;IAQ9B,OAAO,CAAC,mBAAmB;IAU3B;;;;;;;;OAQG;IACG,YAAY,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,GAAE,mBAAwB,GAAG,OAAO,CAAC,kBAAkB,CAAC;IA0C/F;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAkBzB;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAoBzB;;OAEG;YACW,sBAAsB;IAwDpC;;OAEG;YACW,2BAA2B;IAuDzC;;OAEG;YACW,0BAA0B;CAmFzC"}