@purepageio/fetch-engines 0.2.5 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/dist/index.cjs +3274 -0
  2. package/dist/index.cjs.map +1 -0
  3. package/dist/index.d.cts +323 -0
  4. package/dist/index.d.ts +323 -8
  5. package/dist/index.js +3270 -4
  6. package/dist/index.js.map +1 -1
  7. package/package.json +14 -6
  8. package/dist/FetchEngine.d.ts +0 -47
  9. package/dist/FetchEngine.d.ts.map +0 -1
  10. package/dist/FetchEngine.js +0 -114
  11. package/dist/FetchEngine.js.map +0 -1
  12. package/dist/FetchEngine.test.d.ts +0 -2
  13. package/dist/FetchEngine.test.d.ts.map +0 -1
  14. package/dist/FetchEngine.test.js +0 -44
  15. package/dist/FetchEngine.test.js.map +0 -1
  16. package/dist/HybridEngine.d.ts +0 -21
  17. package/dist/HybridEngine.d.ts.map +0 -1
  18. package/dist/HybridEngine.js +0 -62
  19. package/dist/HybridEngine.js.map +0 -1
  20. package/dist/IEngine.d.ts +0 -22
  21. package/dist/IEngine.d.ts.map +0 -1
  22. package/dist/IEngine.js +0 -2
  23. package/dist/IEngine.js.map +0 -1
  24. package/dist/PlaywrightEngine.d.ts +0 -90
  25. package/dist/PlaywrightEngine.d.ts.map +0 -1
  26. package/dist/PlaywrightEngine.js +0 -505
  27. package/dist/PlaywrightEngine.js.map +0 -1
  28. package/dist/PlaywrightEngine.test.d.ts +0 -2
  29. package/dist/PlaywrightEngine.test.d.ts.map +0 -1
  30. package/dist/PlaywrightEngine.test.js +0 -207
  31. package/dist/PlaywrightEngine.test.js.map +0 -1
  32. package/dist/PuppeteerEngine.d.ts +0 -21
  33. package/dist/PuppeteerEngine.d.ts.map +0 -1
  34. package/dist/PuppeteerEngine.js +0 -412
  35. package/dist/PuppeteerEngine.js.map +0 -1
  36. package/dist/browser/BrowserPool.d.ts +0 -29
  37. package/dist/browser/BrowserPool.d.ts.map +0 -1
  38. package/dist/browser/BrowserPool.js +0 -378
  39. package/dist/browser/BrowserPool.js.map +0 -1
  40. package/dist/browser/PlaywrightBrowserPool.d.ts +0 -48
  41. package/dist/browser/PlaywrightBrowserPool.d.ts.map +0 -1
  42. package/dist/browser/PlaywrightBrowserPool.js +0 -378
  43. package/dist/browser/PlaywrightBrowserPool.js.map +0 -1
  44. package/dist/browser/PlaywrightBrowserPool.test.d.ts +0 -2
  45. package/dist/browser/PlaywrightBrowserPool.test.d.ts.map +0 -1
  46. package/dist/browser/PlaywrightBrowserPool.test.js +0 -422
  47. package/dist/browser/PlaywrightBrowserPool.test.js.map +0 -1
  48. package/dist/errors.d.ts +0 -20
  49. package/dist/errors.d.ts.map +0 -1
  50. package/dist/errors.js +0 -30
  51. package/dist/errors.js.map +0 -1
  52. package/dist/index.d.ts.map +0 -1
  53. package/dist/types.d.ts +0 -167
  54. package/dist/types.d.ts.map +0 -1
  55. package/dist/types.js +0 -2
  56. package/dist/types.js.map +0 -1
  57. package/dist/utils/markdown-converter.d.ts +0 -31
  58. package/dist/utils/markdown-converter.d.ts.map +0 -1
  59. package/dist/utils/markdown-converter.js +0 -796
  60. package/dist/utils/markdown-converter.js.map +0 -1
package/dist/index.cjs ADDED
@@ -0,0 +1,3274 @@
1
+ "use strict";
2
+ var __create = Object.create;
3
+ var __defProp = Object.defineProperty;
4
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
7
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __esm = (fn, res) => function __init() {
9
+ return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
10
+ };
11
+ var __commonJS = (cb, mod) => function __require() {
12
+ return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports;
13
+ };
14
+ var __export = (target, all) => {
15
+ for (var name in all)
16
+ __defProp(target, name, { get: all[name], enumerable: true });
17
+ };
18
+ var __copyProps = (to, from, except, desc) => {
19
+ if (from && typeof from === "object" || typeof from === "function") {
20
+ for (let key of __getOwnPropNames(from))
21
+ if (!__hasOwnProp.call(to, key) && key !== except)
22
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
23
+ }
24
+ return to;
25
+ };
26
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
27
+ // If the importer is in node compatibility mode or this is not an ESM
28
+ // file that has been converted to a CommonJS file using a Babel-
29
+ // compatible transform (i.e. "__esModule" has not been set), then set
30
+ // "default" to the CommonJS "module.exports" for node compatibility.
31
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
32
+ mod
33
+ ));
34
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
35
+
36
+ // node_modules/.pnpm/tsup@8.4.0_postcss@8.5.3_typescript@5.8.2/node_modules/tsup/assets/cjs_shims.js
37
+ var init_cjs_shims = __esm({
38
+ "node_modules/.pnpm/tsup@8.4.0_postcss@8.5.3_typescript@5.8.2/node_modules/tsup/assets/cjs_shims.js"() {
39
+ "use strict";
40
+ }
41
+ });
42
+
43
+ // ../node_modules/ms/index.js
44
+ var require_ms = __commonJS({
45
+ "../node_modules/ms/index.js"(exports2, module2) {
46
+ "use strict";
47
+ init_cjs_shims();
48
+ var s = 1e3;
49
+ var m = s * 60;
50
+ var h = m * 60;
51
+ var d = h * 24;
52
+ var w = d * 7;
53
+ var y = d * 365.25;
54
+ module2.exports = function(val, options) {
55
+ options = options || {};
56
+ var type = typeof val;
57
+ if (type === "string" && val.length > 0) {
58
+ return parse2(val);
59
+ } else if (type === "number" && isFinite(val)) {
60
+ return options.long ? fmtLong(val) : fmtShort(val);
61
+ }
62
+ throw new Error(
63
+ "val is not a non-empty string or a valid number. val=" + JSON.stringify(val)
64
+ );
65
+ };
66
+ function parse2(str) {
67
+ str = String(str);
68
+ if (str.length > 100) {
69
+ return;
70
+ }
71
+ var match = /^(-?(?:\d+)?\.?\d+) *(milliseconds?|msecs?|ms|seconds?|secs?|s|minutes?|mins?|m|hours?|hrs?|h|days?|d|weeks?|w|years?|yrs?|y)?$/i.exec(
72
+ str
73
+ );
74
+ if (!match) {
75
+ return;
76
+ }
77
+ var n = parseFloat(match[1]);
78
+ var type = (match[2] || "ms").toLowerCase();
79
+ switch (type) {
80
+ case "years":
81
+ case "year":
82
+ case "yrs":
83
+ case "yr":
84
+ case "y":
85
+ return n * y;
86
+ case "weeks":
87
+ case "week":
88
+ case "w":
89
+ return n * w;
90
+ case "days":
91
+ case "day":
92
+ case "d":
93
+ return n * d;
94
+ case "hours":
95
+ case "hour":
96
+ case "hrs":
97
+ case "hr":
98
+ case "h":
99
+ return n * h;
100
+ case "minutes":
101
+ case "minute":
102
+ case "mins":
103
+ case "min":
104
+ case "m":
105
+ return n * m;
106
+ case "seconds":
107
+ case "second":
108
+ case "secs":
109
+ case "sec":
110
+ case "s":
111
+ return n * s;
112
+ case "milliseconds":
113
+ case "millisecond":
114
+ case "msecs":
115
+ case "msec":
116
+ case "ms":
117
+ return n;
118
+ default:
119
+ return void 0;
120
+ }
121
+ }
122
+ function fmtShort(ms) {
123
+ var msAbs = Math.abs(ms);
124
+ if (msAbs >= d) {
125
+ return Math.round(ms / d) + "d";
126
+ }
127
+ if (msAbs >= h) {
128
+ return Math.round(ms / h) + "h";
129
+ }
130
+ if (msAbs >= m) {
131
+ return Math.round(ms / m) + "m";
132
+ }
133
+ if (msAbs >= s) {
134
+ return Math.round(ms / s) + "s";
135
+ }
136
+ return ms + "ms";
137
+ }
138
+ function fmtLong(ms) {
139
+ var msAbs = Math.abs(ms);
140
+ if (msAbs >= d) {
141
+ return plural(ms, msAbs, d, "day");
142
+ }
143
+ if (msAbs >= h) {
144
+ return plural(ms, msAbs, h, "hour");
145
+ }
146
+ if (msAbs >= m) {
147
+ return plural(ms, msAbs, m, "minute");
148
+ }
149
+ if (msAbs >= s) {
150
+ return plural(ms, msAbs, s, "second");
151
+ }
152
+ return ms + " ms";
153
+ }
154
+ function plural(ms, msAbs, n, name) {
155
+ var isPlural = msAbs >= n * 1.5;
156
+ return Math.round(ms / n) + " " + name + (isPlural ? "s" : "");
157
+ }
158
+ }
159
+ });
160
+
161
+ // ../node_modules/debug/src/common.js
162
+ var require_common = __commonJS({
163
+ "../node_modules/debug/src/common.js"(exports2, module2) {
164
+ "use strict";
165
+ init_cjs_shims();
166
+ function setup(env) {
167
+ createDebug.debug = createDebug;
168
+ createDebug.default = createDebug;
169
+ createDebug.coerce = coerce;
170
+ createDebug.disable = disable;
171
+ createDebug.enable = enable;
172
+ createDebug.enabled = enabled;
173
+ createDebug.humanize = require_ms();
174
+ createDebug.destroy = destroy;
175
+ Object.keys(env).forEach((key) => {
176
+ createDebug[key] = env[key];
177
+ });
178
+ createDebug.names = [];
179
+ createDebug.skips = [];
180
+ createDebug.formatters = {};
181
+ function selectColor(namespace) {
182
+ let hash = 0;
183
+ for (let i = 0; i < namespace.length; i++) {
184
+ hash = (hash << 5) - hash + namespace.charCodeAt(i);
185
+ hash |= 0;
186
+ }
187
+ return createDebug.colors[Math.abs(hash) % createDebug.colors.length];
188
+ }
189
+ createDebug.selectColor = selectColor;
190
+ function createDebug(namespace) {
191
+ let prevTime;
192
+ let enableOverride = null;
193
+ let namespacesCache;
194
+ let enabledCache;
195
+ function debug2(...args) {
196
+ if (!debug2.enabled) {
197
+ return;
198
+ }
199
+ const self = debug2;
200
+ const curr = Number(/* @__PURE__ */ new Date());
201
+ const ms = curr - (prevTime || curr);
202
+ self.diff = ms;
203
+ self.prev = prevTime;
204
+ self.curr = curr;
205
+ prevTime = curr;
206
+ args[0] = createDebug.coerce(args[0]);
207
+ if (typeof args[0] !== "string") {
208
+ args.unshift("%O");
209
+ }
210
+ let index = 0;
211
+ args[0] = args[0].replace(/%([a-zA-Z%])/g, (match, format) => {
212
+ if (match === "%%") {
213
+ return "%";
214
+ }
215
+ index++;
216
+ const formatter = createDebug.formatters[format];
217
+ if (typeof formatter === "function") {
218
+ const val = args[index];
219
+ match = formatter.call(self, val);
220
+ args.splice(index, 1);
221
+ index--;
222
+ }
223
+ return match;
224
+ });
225
+ createDebug.formatArgs.call(self, args);
226
+ const logFn = self.log || createDebug.log;
227
+ logFn.apply(self, args);
228
+ }
229
+ debug2.namespace = namespace;
230
+ debug2.useColors = createDebug.useColors();
231
+ debug2.color = createDebug.selectColor(namespace);
232
+ debug2.extend = extend;
233
+ debug2.destroy = createDebug.destroy;
234
+ Object.defineProperty(debug2, "enabled", {
235
+ enumerable: true,
236
+ configurable: false,
237
+ get: () => {
238
+ if (enableOverride !== null) {
239
+ return enableOverride;
240
+ }
241
+ if (namespacesCache !== createDebug.namespaces) {
242
+ namespacesCache = createDebug.namespaces;
243
+ enabledCache = createDebug.enabled(namespace);
244
+ }
245
+ return enabledCache;
246
+ },
247
+ set: (v) => {
248
+ enableOverride = v;
249
+ }
250
+ });
251
+ if (typeof createDebug.init === "function") {
252
+ createDebug.init(debug2);
253
+ }
254
+ return debug2;
255
+ }
256
+ function extend(namespace, delimiter) {
257
+ const newDebug = createDebug(this.namespace + (typeof delimiter === "undefined" ? ":" : delimiter) + namespace);
258
+ newDebug.log = this.log;
259
+ return newDebug;
260
+ }
261
+ function enable(namespaces) {
262
+ createDebug.save(namespaces);
263
+ createDebug.namespaces = namespaces;
264
+ createDebug.names = [];
265
+ createDebug.skips = [];
266
+ const split = (typeof namespaces === "string" ? namespaces : "").trim().replace(" ", ",").split(",").filter(Boolean);
267
+ for (const ns of split) {
268
+ if (ns[0] === "-") {
269
+ createDebug.skips.push(ns.slice(1));
270
+ } else {
271
+ createDebug.names.push(ns);
272
+ }
273
+ }
274
+ }
275
+ function matchesTemplate(search, template) {
276
+ let searchIndex = 0;
277
+ let templateIndex = 0;
278
+ let starIndex = -1;
279
+ let matchIndex = 0;
280
+ while (searchIndex < search.length) {
281
+ if (templateIndex < template.length && (template[templateIndex] === search[searchIndex] || template[templateIndex] === "*")) {
282
+ if (template[templateIndex] === "*") {
283
+ starIndex = templateIndex;
284
+ matchIndex = searchIndex;
285
+ templateIndex++;
286
+ } else {
287
+ searchIndex++;
288
+ templateIndex++;
289
+ }
290
+ } else if (starIndex !== -1) {
291
+ templateIndex = starIndex + 1;
292
+ matchIndex++;
293
+ searchIndex = matchIndex;
294
+ } else {
295
+ return false;
296
+ }
297
+ }
298
+ while (templateIndex < template.length && template[templateIndex] === "*") {
299
+ templateIndex++;
300
+ }
301
+ return templateIndex === template.length;
302
+ }
303
+ function disable() {
304
+ const namespaces = [
305
+ ...createDebug.names,
306
+ ...createDebug.skips.map((namespace) => "-" + namespace)
307
+ ].join(",");
308
+ createDebug.enable("");
309
+ return namespaces;
310
+ }
311
+ function enabled(name) {
312
+ for (const skip of createDebug.skips) {
313
+ if (matchesTemplate(name, skip)) {
314
+ return false;
315
+ }
316
+ }
317
+ for (const ns of createDebug.names) {
318
+ if (matchesTemplate(name, ns)) {
319
+ return true;
320
+ }
321
+ }
322
+ return false;
323
+ }
324
+ function coerce(val) {
325
+ if (val instanceof Error) {
326
+ return val.stack || val.message;
327
+ }
328
+ return val;
329
+ }
330
+ function destroy() {
331
+ console.warn("Instance method `debug.destroy()` is deprecated and no longer does anything. It will be removed in the next major version of `debug`.");
332
+ }
333
+ createDebug.enable(createDebug.load());
334
+ return createDebug;
335
+ }
336
+ module2.exports = setup;
337
+ }
338
+ });
339
+
340
+ // ../node_modules/debug/src/browser.js
341
+ var require_browser = __commonJS({
342
+ "../node_modules/debug/src/browser.js"(exports2, module2) {
343
+ "use strict";
344
+ init_cjs_shims();
345
+ exports2.formatArgs = formatArgs;
346
+ exports2.save = save;
347
+ exports2.load = load;
348
+ exports2.useColors = useColors;
349
+ exports2.storage = localstorage();
350
+ exports2.destroy = /* @__PURE__ */ (() => {
351
+ let warned = false;
352
+ return () => {
353
+ if (!warned) {
354
+ warned = true;
355
+ console.warn("Instance method `debug.destroy()` is deprecated and no longer does anything. It will be removed in the next major version of `debug`.");
356
+ }
357
+ };
358
+ })();
359
+ exports2.colors = [
360
+ "#0000CC",
361
+ "#0000FF",
362
+ "#0033CC",
363
+ "#0033FF",
364
+ "#0066CC",
365
+ "#0066FF",
366
+ "#0099CC",
367
+ "#0099FF",
368
+ "#00CC00",
369
+ "#00CC33",
370
+ "#00CC66",
371
+ "#00CC99",
372
+ "#00CCCC",
373
+ "#00CCFF",
374
+ "#3300CC",
375
+ "#3300FF",
376
+ "#3333CC",
377
+ "#3333FF",
378
+ "#3366CC",
379
+ "#3366FF",
380
+ "#3399CC",
381
+ "#3399FF",
382
+ "#33CC00",
383
+ "#33CC33",
384
+ "#33CC66",
385
+ "#33CC99",
386
+ "#33CCCC",
387
+ "#33CCFF",
388
+ "#6600CC",
389
+ "#6600FF",
390
+ "#6633CC",
391
+ "#6633FF",
392
+ "#66CC00",
393
+ "#66CC33",
394
+ "#9900CC",
395
+ "#9900FF",
396
+ "#9933CC",
397
+ "#9933FF",
398
+ "#99CC00",
399
+ "#99CC33",
400
+ "#CC0000",
401
+ "#CC0033",
402
+ "#CC0066",
403
+ "#CC0099",
404
+ "#CC00CC",
405
+ "#CC00FF",
406
+ "#CC3300",
407
+ "#CC3333",
408
+ "#CC3366",
409
+ "#CC3399",
410
+ "#CC33CC",
411
+ "#CC33FF",
412
+ "#CC6600",
413
+ "#CC6633",
414
+ "#CC9900",
415
+ "#CC9933",
416
+ "#CCCC00",
417
+ "#CCCC33",
418
+ "#FF0000",
419
+ "#FF0033",
420
+ "#FF0066",
421
+ "#FF0099",
422
+ "#FF00CC",
423
+ "#FF00FF",
424
+ "#FF3300",
425
+ "#FF3333",
426
+ "#FF3366",
427
+ "#FF3399",
428
+ "#FF33CC",
429
+ "#FF33FF",
430
+ "#FF6600",
431
+ "#FF6633",
432
+ "#FF9900",
433
+ "#FF9933",
434
+ "#FFCC00",
435
+ "#FFCC33"
436
+ ];
437
+ function useColors() {
438
+ if (typeof window !== "undefined" && window.process && (window.process.type === "renderer" || window.process.__nwjs)) {
439
+ return true;
440
+ }
441
+ if (typeof navigator !== "undefined" && navigator.userAgent && navigator.userAgent.toLowerCase().match(/(edge|trident)\/(\d+)/)) {
442
+ return false;
443
+ }
444
+ let m;
445
+ return typeof document !== "undefined" && document.documentElement && document.documentElement.style && document.documentElement.style.WebkitAppearance || // Is firebug? http://stackoverflow.com/a/398120/376773
446
+ typeof window !== "undefined" && window.console && (window.console.firebug || window.console.exception && window.console.table) || // Is firefox >= v31?
447
+ // https://developer.mozilla.org/en-US/docs/Tools/Web_Console#Styling_messages
448
+ typeof navigator !== "undefined" && navigator.userAgent && (m = navigator.userAgent.toLowerCase().match(/firefox\/(\d+)/)) && parseInt(m[1], 10) >= 31 || // Double check webkit in userAgent just in case we are in a worker
449
+ typeof navigator !== "undefined" && navigator.userAgent && navigator.userAgent.toLowerCase().match(/applewebkit\/(\d+)/);
450
+ }
451
+ function formatArgs(args) {
452
+ args[0] = (this.useColors ? "%c" : "") + this.namespace + (this.useColors ? " %c" : " ") + args[0] + (this.useColors ? "%c " : " ") + "+" + module2.exports.humanize(this.diff);
453
+ if (!this.useColors) {
454
+ return;
455
+ }
456
+ const c = "color: " + this.color;
457
+ args.splice(1, 0, c, "color: inherit");
458
+ let index = 0;
459
+ let lastC = 0;
460
+ args[0].replace(/%[a-zA-Z%]/g, (match) => {
461
+ if (match === "%%") {
462
+ return;
463
+ }
464
+ index++;
465
+ if (match === "%c") {
466
+ lastC = index;
467
+ }
468
+ });
469
+ args.splice(lastC, 0, c);
470
+ }
471
+ exports2.log = console.debug || console.log || (() => {
472
+ });
473
+ function save(namespaces) {
474
+ try {
475
+ if (namespaces) {
476
+ exports2.storage.setItem("debug", namespaces);
477
+ } else {
478
+ exports2.storage.removeItem("debug");
479
+ }
480
+ } catch (error) {
481
+ }
482
+ }
483
+ function load() {
484
+ let r;
485
+ try {
486
+ r = exports2.storage.getItem("debug");
487
+ } catch (error) {
488
+ }
489
+ if (!r && typeof process !== "undefined" && "env" in process) {
490
+ r = process.env.DEBUG;
491
+ }
492
+ return r;
493
+ }
494
+ function localstorage() {
495
+ try {
496
+ return localStorage;
497
+ } catch (error) {
498
+ }
499
+ }
500
+ module2.exports = require_common()(exports2);
501
+ var { formatters } = module2.exports;
502
+ formatters.j = function(v) {
503
+ try {
504
+ return JSON.stringify(v);
505
+ } catch (error) {
506
+ return "[UnexpectedJSONParseError]: " + error.message;
507
+ }
508
+ };
509
+ }
510
+ });
511
+
512
+ // ../node_modules/debug/src/node.js
513
+ var require_node = __commonJS({
514
+ "../node_modules/debug/src/node.js"(exports2, module2) {
515
+ "use strict";
516
+ init_cjs_shims();
517
+ var tty = require("tty");
518
+ var util = require("util");
519
+ exports2.init = init;
520
+ exports2.log = log;
521
+ exports2.formatArgs = formatArgs;
522
+ exports2.save = save;
523
+ exports2.load = load;
524
+ exports2.useColors = useColors;
525
+ exports2.destroy = util.deprecate(
526
+ () => {
527
+ },
528
+ "Instance method `debug.destroy()` is deprecated and no longer does anything. It will be removed in the next major version of `debug`."
529
+ );
530
+ exports2.colors = [6, 2, 3, 4, 5, 1];
531
+ try {
532
+ const supportsColor = require("supports-color");
533
+ if (supportsColor && (supportsColor.stderr || supportsColor).level >= 2) {
534
+ exports2.colors = [
535
+ 20,
536
+ 21,
537
+ 26,
538
+ 27,
539
+ 32,
540
+ 33,
541
+ 38,
542
+ 39,
543
+ 40,
544
+ 41,
545
+ 42,
546
+ 43,
547
+ 44,
548
+ 45,
549
+ 56,
550
+ 57,
551
+ 62,
552
+ 63,
553
+ 68,
554
+ 69,
555
+ 74,
556
+ 75,
557
+ 76,
558
+ 77,
559
+ 78,
560
+ 79,
561
+ 80,
562
+ 81,
563
+ 92,
564
+ 93,
565
+ 98,
566
+ 99,
567
+ 112,
568
+ 113,
569
+ 128,
570
+ 129,
571
+ 134,
572
+ 135,
573
+ 148,
574
+ 149,
575
+ 160,
576
+ 161,
577
+ 162,
578
+ 163,
579
+ 164,
580
+ 165,
581
+ 166,
582
+ 167,
583
+ 168,
584
+ 169,
585
+ 170,
586
+ 171,
587
+ 172,
588
+ 173,
589
+ 178,
590
+ 179,
591
+ 184,
592
+ 185,
593
+ 196,
594
+ 197,
595
+ 198,
596
+ 199,
597
+ 200,
598
+ 201,
599
+ 202,
600
+ 203,
601
+ 204,
602
+ 205,
603
+ 206,
604
+ 207,
605
+ 208,
606
+ 209,
607
+ 214,
608
+ 215,
609
+ 220,
610
+ 221
611
+ ];
612
+ }
613
+ } catch (error) {
614
+ }
615
+ exports2.inspectOpts = Object.keys(process.env).filter((key) => {
616
+ return /^debug_/i.test(key);
617
+ }).reduce((obj, key) => {
618
+ const prop = key.substring(6).toLowerCase().replace(/_([a-z])/g, (_, k) => {
619
+ return k.toUpperCase();
620
+ });
621
+ let val = process.env[key];
622
+ if (/^(yes|on|true|enabled)$/i.test(val)) {
623
+ val = true;
624
+ } else if (/^(no|off|false|disabled)$/i.test(val)) {
625
+ val = false;
626
+ } else if (val === "null") {
627
+ val = null;
628
+ } else {
629
+ val = Number(val);
630
+ }
631
+ obj[prop] = val;
632
+ return obj;
633
+ }, {});
634
+ function useColors() {
635
+ return "colors" in exports2.inspectOpts ? Boolean(exports2.inspectOpts.colors) : tty.isatty(process.stderr.fd);
636
+ }
637
+ function formatArgs(args) {
638
+ const { namespace: name, useColors: useColors2 } = this;
639
+ if (useColors2) {
640
+ const c = this.color;
641
+ const colorCode = "\x1B[3" + (c < 8 ? c : "8;5;" + c);
642
+ const prefix = ` ${colorCode};1m${name} \x1B[0m`;
643
+ args[0] = prefix + args[0].split("\n").join("\n" + prefix);
644
+ args.push(colorCode + "m+" + module2.exports.humanize(this.diff) + "\x1B[0m");
645
+ } else {
646
+ args[0] = getDate() + name + " " + args[0];
647
+ }
648
+ }
649
+ function getDate() {
650
+ if (exports2.inspectOpts.hideDate) {
651
+ return "";
652
+ }
653
+ return (/* @__PURE__ */ new Date()).toISOString() + " ";
654
+ }
655
+ function log(...args) {
656
+ return process.stderr.write(util.formatWithOptions(exports2.inspectOpts, ...args) + "\n");
657
+ }
658
+ function save(namespaces) {
659
+ if (namespaces) {
660
+ process.env.DEBUG = namespaces;
661
+ } else {
662
+ delete process.env.DEBUG;
663
+ }
664
+ }
665
+ function load() {
666
+ return process.env.DEBUG;
667
+ }
668
+ function init(debug2) {
669
+ debug2.inspectOpts = {};
670
+ const keys = Object.keys(exports2.inspectOpts);
671
+ for (let i = 0; i < keys.length; i++) {
672
+ debug2.inspectOpts[keys[i]] = exports2.inspectOpts[keys[i]];
673
+ }
674
+ }
675
+ module2.exports = require_common()(exports2);
676
+ var { formatters } = module2.exports;
677
+ formatters.o = function(v) {
678
+ this.inspectOpts.colors = this.useColors;
679
+ return util.inspect(v, this.inspectOpts).split("\n").map((str) => str.trim()).join(" ");
680
+ };
681
+ formatters.O = function(v) {
682
+ this.inspectOpts.colors = this.useColors;
683
+ return util.inspect(v, this.inspectOpts);
684
+ };
685
+ }
686
+ });
687
+
688
+ // ../node_modules/debug/src/index.js
689
+ var require_src = __commonJS({
690
+ "../node_modules/debug/src/index.js"(exports2, module2) {
691
+ "use strict";
692
+ init_cjs_shims();
693
+ if (typeof process === "undefined" || process.type === "renderer" || process.browser === true || process.__nwjs) {
694
+ module2.exports = require_browser();
695
+ } else {
696
+ module2.exports = require_node();
697
+ }
698
+ }
699
+ });
700
+
701
+ // ../node_modules/arr-union/index.js
702
+ var require_arr_union = __commonJS({
703
+ "../node_modules/arr-union/index.js"(exports2, module2) {
704
+ "use strict";
705
+ init_cjs_shims();
706
+ module2.exports = function union(init) {
707
+ if (!Array.isArray(init)) {
708
+ throw new TypeError("arr-union expects the first argument to be an array.");
709
+ }
710
+ var len = arguments.length;
711
+ var i = 0;
712
+ while (++i < len) {
713
+ var arg = arguments[i];
714
+ if (!arg) continue;
715
+ if (!Array.isArray(arg)) {
716
+ arg = [arg];
717
+ }
718
+ for (var j = 0; j < arg.length; j++) {
719
+ var ele = arg[j];
720
+ if (init.indexOf(ele) >= 0) {
721
+ continue;
722
+ }
723
+ init.push(ele);
724
+ }
725
+ }
726
+ return init;
727
+ };
728
+ }
729
+ });
730
+
731
+ // ../node_modules/lazy-cache/index.js
732
+ var require_lazy_cache = __commonJS({
733
+ "../node_modules/lazy-cache/index.js"(exports2, module2) {
734
+ "use strict";
735
+ init_cjs_shims();
736
+ function lazyCache(fn) {
737
+ var cache = {};
738
+ var proxy = function(mod, name) {
739
+ name = name || camelcase(mod);
740
+ if (process.env.UNLAZY === "true" || process.env.UNLAZY === true || process.env.TRAVIS) {
741
+ cache[name] = fn(mod);
742
+ }
743
+ Object.defineProperty(proxy, name, {
744
+ enumerable: true,
745
+ configurable: true,
746
+ get: getter
747
+ });
748
+ function getter() {
749
+ if (cache.hasOwnProperty(name)) {
750
+ return cache[name];
751
+ }
752
+ return cache[name] = fn(mod);
753
+ }
754
+ return getter;
755
+ };
756
+ return proxy;
757
+ }
758
+ function camelcase(str) {
759
+ if (str.length === 1) {
760
+ return str.toLowerCase();
761
+ }
762
+ str = str.replace(/^[\W_]+|[\W_]+$/g, "").toLowerCase();
763
+ return str.replace(/[\W_]+(\w|$)/g, function(_, ch) {
764
+ return ch.toUpperCase();
765
+ });
766
+ }
767
+ module2.exports = lazyCache;
768
+ }
769
+ });
770
+
771
+ // ../node_modules/for-in/index.js
772
+ var require_for_in = __commonJS({
773
+ "../node_modules/for-in/index.js"(exports2, module2) {
774
+ "use strict";
775
+ init_cjs_shims();
776
+ module2.exports = function forIn(obj, fn, thisArg) {
777
+ for (var key in obj) {
778
+ if (fn.call(thisArg, obj[key], key, obj) === false) {
779
+ break;
780
+ }
781
+ }
782
+ };
783
+ }
784
+ });
785
+
786
+ // ../node_modules/for-own/index.js
787
+ var require_for_own = __commonJS({
788
+ "../node_modules/for-own/index.js"(exports2, module2) {
789
+ "use strict";
790
+ init_cjs_shims();
791
+ var forIn = require_for_in();
792
+ var hasOwn = Object.prototype.hasOwnProperty;
793
+ module2.exports = function forOwn(obj, fn, thisArg) {
794
+ forIn(obj, function(val, key) {
795
+ if (hasOwn.call(obj, key)) {
796
+ return fn.call(thisArg, obj[key], key, obj);
797
+ }
798
+ });
799
+ };
800
+ }
801
+ });
802
+
803
+ // ../node_modules/clone-deep/utils.js
804
+ var require_utils = __commonJS({
805
+ "../node_modules/clone-deep/utils.js"(exports2, module2) {
806
+ "use strict";
807
+ init_cjs_shims();
808
+ var utils = require_lazy_cache()(require);
809
+ var fn = require;
810
+ require = utils;
811
+ require("is-plain-object", "isObject");
812
+ require("shallow-clone", "clone");
813
+ require("kind-of", "typeOf");
814
+ require_for_own();
815
+ require = fn;
816
+ module2.exports = utils;
817
+ }
818
+ });
819
+
820
+ // ../node_modules/clone-deep/index.js
821
+ var require_clone_deep = __commonJS({
822
+ "../node_modules/clone-deep/index.js"(exports2, module2) {
823
+ "use strict";
824
+ init_cjs_shims();
825
+ var utils = require_utils();
826
+ function cloneDeep(val, instanceClone) {
827
+ switch (utils.typeOf(val)) {
828
+ case "object":
829
+ return cloneObjectDeep(val, instanceClone);
830
+ case "array":
831
+ return cloneArrayDeep(val, instanceClone);
832
+ default:
833
+ return utils.clone(val);
834
+ }
835
+ }
836
+ function cloneObjectDeep(obj, instanceClone) {
837
+ if (utils.isObject(obj)) {
838
+ var res = {};
839
+ utils.forOwn(obj, function(obj2, key) {
840
+ this[key] = cloneDeep(obj2, instanceClone);
841
+ }, res);
842
+ return res;
843
+ } else if (instanceClone) {
844
+ return instanceClone(obj);
845
+ } else {
846
+ return obj;
847
+ }
848
+ }
849
+ function cloneArrayDeep(arr, instanceClone) {
850
+ var len = arr.length, res = [];
851
+ var i = -1;
852
+ while (++i < len) {
853
+ res[i] = cloneDeep(arr[i], instanceClone);
854
+ }
855
+ return res;
856
+ }
857
+ module2.exports = cloneDeep;
858
+ }
859
+ });
860
+
861
+ // ../node_modules/is-buffer/index.js
862
+ var require_is_buffer = __commonJS({
863
+ "../node_modules/is-buffer/index.js"(exports2, module2) {
864
+ "use strict";
865
+ init_cjs_shims();
866
+ module2.exports = function(obj) {
867
+ return obj != null && (isBuffer(obj) || isSlowBuffer(obj) || !!obj._isBuffer);
868
+ };
869
+ function isBuffer(obj) {
870
+ return !!obj.constructor && typeof obj.constructor.isBuffer === "function" && obj.constructor.isBuffer(obj);
871
+ }
872
+ function isSlowBuffer(obj) {
873
+ return typeof obj.readFloatLE === "function" && typeof obj.slice === "function" && isBuffer(obj.slice(0, 0));
874
+ }
875
+ }
876
+ });
877
+
878
+ // ../node_modules/kind-of/index.js
879
+ var require_kind_of = __commonJS({
880
+ "../node_modules/kind-of/index.js"(exports2, module2) {
881
+ "use strict";
882
+ init_cjs_shims();
883
+ var isBuffer = require_is_buffer();
884
+ var toString = Object.prototype.toString;
885
+ module2.exports = function kindOf(val) {
886
+ if (typeof val === "undefined") {
887
+ return "undefined";
888
+ }
889
+ if (val === null) {
890
+ return "null";
891
+ }
892
+ if (val === true || val === false || val instanceof Boolean) {
893
+ return "boolean";
894
+ }
895
+ if (typeof val === "string" || val instanceof String) {
896
+ return "string";
897
+ }
898
+ if (typeof val === "number" || val instanceof Number) {
899
+ return "number";
900
+ }
901
+ if (typeof val === "function" || val instanceof Function) {
902
+ return "function";
903
+ }
904
+ if (typeof Array.isArray !== "undefined" && Array.isArray(val)) {
905
+ return "array";
906
+ }
907
+ if (val instanceof RegExp) {
908
+ return "regexp";
909
+ }
910
+ if (val instanceof Date) {
911
+ return "date";
912
+ }
913
+ var type = toString.call(val);
914
+ if (type === "[object RegExp]") {
915
+ return "regexp";
916
+ }
917
+ if (type === "[object Date]") {
918
+ return "date";
919
+ }
920
+ if (type === "[object Arguments]") {
921
+ return "arguments";
922
+ }
923
+ if (type === "[object Error]") {
924
+ return "error";
925
+ }
926
+ if (isBuffer(val)) {
927
+ return "buffer";
928
+ }
929
+ if (type === "[object Set]") {
930
+ return "set";
931
+ }
932
+ if (type === "[object WeakSet]") {
933
+ return "weakset";
934
+ }
935
+ if (type === "[object Map]") {
936
+ return "map";
937
+ }
938
+ if (type === "[object WeakMap]") {
939
+ return "weakmap";
940
+ }
941
+ if (type === "[object Symbol]") {
942
+ return "symbol";
943
+ }
944
+ if (type === "[object Int8Array]") {
945
+ return "int8array";
946
+ }
947
+ if (type === "[object Uint8Array]") {
948
+ return "uint8array";
949
+ }
950
+ if (type === "[object Uint8ClampedArray]") {
951
+ return "uint8clampedarray";
952
+ }
953
+ if (type === "[object Int16Array]") {
954
+ return "int16array";
955
+ }
956
+ if (type === "[object Uint16Array]") {
957
+ return "uint16array";
958
+ }
959
+ if (type === "[object Int32Array]") {
960
+ return "int32array";
961
+ }
962
+ if (type === "[object Uint32Array]") {
963
+ return "uint32array";
964
+ }
965
+ if (type === "[object Float32Array]") {
966
+ return "float32array";
967
+ }
968
+ if (type === "[object Float64Array]") {
969
+ return "float64array";
970
+ }
971
+ return "object";
972
+ };
973
+ }
974
+ });
975
+
976
+ // ../node_modules/merge-deep/index.js
977
+ var require_merge_deep = __commonJS({
978
+ "../node_modules/merge-deep/index.js"(exports2, module2) {
979
+ "use strict";
980
+ init_cjs_shims();
981
+ var union = require_arr_union();
982
+ var clone = require_clone_deep();
983
+ var typeOf = require_kind_of();
984
+ module2.exports = function mergeDeep(orig, objects) {
985
+ if (!isObject(orig) && !Array.isArray(orig)) {
986
+ orig = {};
987
+ }
988
+ var target = clone(orig);
989
+ var len = arguments.length;
990
+ var idx = 0;
991
+ while (++idx < len) {
992
+ var val = arguments[idx];
993
+ if (isObject(val) || Array.isArray(val)) {
994
+ merge2(target, val);
995
+ }
996
+ }
997
+ return target;
998
+ };
999
+ function merge2(target, obj) {
1000
+ for (var key in obj) {
1001
+ if (!isValidKey(key) || !hasOwn(obj, key)) {
1002
+ continue;
1003
+ }
1004
+ var oldVal = obj[key];
1005
+ var newVal = target[key];
1006
+ if (isObject(newVal) && isObject(oldVal)) {
1007
+ target[key] = merge2(newVal, oldVal);
1008
+ } else if (Array.isArray(newVal)) {
1009
+ target[key] = union([], newVal, oldVal);
1010
+ } else {
1011
+ target[key] = clone(oldVal);
1012
+ }
1013
+ }
1014
+ return target;
1015
+ }
1016
+ function hasOwn(obj, key) {
1017
+ return Object.prototype.hasOwnProperty.call(obj, key);
1018
+ }
1019
+ function isObject(val) {
1020
+ return typeOf(val) === "object" || typeOf(val) === "function";
1021
+ }
1022
+ function isValidKey(key) {
1023
+ return key !== "__proto__" && key !== "constructor" && key !== "prototype";
1024
+ }
1025
+ }
1026
+ });
1027
+
1028
+ // ../node_modules/puppeteer-extra-plugin/dist/index.esm.js
1029
+ var index_esm_exports = {};
1030
+ __export(index_esm_exports, {
1031
+ PuppeteerExtraPlugin: () => PuppeteerExtraPlugin
1032
+ });
1033
+ var import_debug, merge, PuppeteerExtraPlugin;
1034
+ var init_index_esm = __esm({
1035
+ "../node_modules/puppeteer-extra-plugin/dist/index.esm.js"() {
1036
+ "use strict";
1037
+ init_cjs_shims();
1038
+ import_debug = __toESM(require_src());
1039
+ merge = require_merge_deep();
1040
+ PuppeteerExtraPlugin = class {
1041
+ constructor(opts) {
1042
+ this._debugBase = (0, import_debug.default)(`puppeteer-extra-plugin:base:${this.name}`);
1043
+ this._childClassMembers = [];
1044
+ this._opts = merge(this.defaults, opts || {});
1045
+ this._debugBase("Initialized.");
1046
+ }
1047
+ /**
1048
+ * Plugin name (required).
1049
+ *
1050
+ * Convention:
1051
+ * - Package: `puppeteer-extra-plugin-anonymize-ua`
1052
+ * - Name: `anonymize-ua`
1053
+ *
1054
+ * @example
1055
+ * get name () { return 'anonymize-ua' }
1056
+ */
1057
+ get name() {
1058
+ throw new Error('Plugin must override "name"');
1059
+ }
1060
+ /**
1061
+ * Plugin defaults (optional).
1062
+ *
1063
+ * If defined will be ([deep-](https://github.com/jonschlinkert/merge-deep))merged with the (optional) user supplied options (supplied during plugin instantiation).
1064
+ *
1065
+ * The result of merging defaults with user supplied options can be accessed through `this.opts`.
1066
+ *
1067
+ * @see [[opts]]
1068
+ *
1069
+ * @example
1070
+ * get defaults () {
1071
+ * return {
1072
+ * stripHeadless: true,
1073
+ * makeWindows: true,
1074
+ * customFn: null
1075
+ * }
1076
+ * }
1077
+ *
1078
+ * // Users can overwrite plugin defaults during instantiation:
1079
+ * puppeteer.use(require('puppeteer-extra-plugin-foobar')({ makeWindows: false }))
1080
+ */
1081
+ get defaults() {
1082
+ return {};
1083
+ }
1084
+ /**
1085
+ * Plugin requirements (optional).
1086
+ *
1087
+ * Signal certain plugin requirements to the base class and the user.
1088
+ *
1089
+ * Currently supported:
1090
+ * - `launch`
1091
+ * - If the plugin only supports locally created browser instances (no `puppeteer.connect()`),
1092
+ * will output a warning to the user.
1093
+ * - `headful`
1094
+ * - If the plugin doesn't work in `headless: true` mode,
1095
+ * will output a warning to the user.
1096
+ * - `dataFromPlugins`
1097
+ * - In case the plugin requires data from other plugins.
1098
+ * will enable usage of `this.getDataFromPlugins()`.
1099
+ * - `runLast`
1100
+ * - In case the plugin prefers to run after the others.
1101
+ * Useful when the plugin needs data from others.
1102
+ *
1103
+ * @example
1104
+ * get requirements () {
1105
+ * return new Set(['runLast', 'dataFromPlugins'])
1106
+ * }
1107
+ */
1108
+ get requirements() {
1109
+ return /* @__PURE__ */ new Set([]);
1110
+ }
1111
+ /**
1112
+ * Plugin dependencies (optional).
1113
+ *
1114
+ * Missing plugins will be required() by puppeteer-extra.
1115
+ *
1116
+ * @example
1117
+ * get dependencies () {
1118
+ * return new Set(['user-preferences'])
1119
+ * }
1120
+ * // Will ensure the 'puppeteer-extra-plugin-user-preferences' plugin is loaded.
1121
+ */
1122
+ get dependencies() {
1123
+ return /* @__PURE__ */ new Set([]);
1124
+ }
1125
+ /**
1126
+ * Plugin data (optional).
1127
+ *
1128
+ * Plugins can expose data (an array of objects), which in turn can be consumed by other plugins,
1129
+ * that list the `dataFromPlugins` requirement (by using `this.getDataFromPlugins()`).
1130
+ *
1131
+ * Convention: `[ {name: 'Any name', value: 'Any value'} ]`
1132
+ *
1133
+ * @see [[getDataFromPlugins]]
1134
+ *
1135
+ * @example
1136
+ * // plugin1.js
1137
+ * get data () {
1138
+ * return [
1139
+ * {
1140
+ * name: 'userPreferences',
1141
+ * value: { foo: 'bar' }
1142
+ * },
1143
+ * {
1144
+ * name: 'userPreferences',
1145
+ * value: { hello: 'world' }
1146
+ * }
1147
+ * ]
1148
+ *
1149
+ * // plugin2.js
1150
+ * get requirements () { return new Set(['dataFromPlugins']) }
1151
+ *
1152
+ * async beforeLaunch () {
1153
+ * const prefs = this.getDataFromPlugins('userPreferences').map(d => d.value)
1154
+ * this.debug(prefs) // => [ { foo: 'bar' }, { hello: 'world' } ]
1155
+ * }
1156
+ */
1157
+ get data() {
1158
+ return [];
1159
+ }
1160
+ /**
1161
+ * Access the plugin options (usually the `defaults` merged with user defined options)
1162
+ *
1163
+ * To skip the auto-merging of defaults with user supplied opts don't define a `defaults`
1164
+ * property and set the `this._opts` Object in your plugin constructor directly.
1165
+ *
1166
+ * @see [[defaults]]
1167
+ *
1168
+ * @example
1169
+ * get defaults () { return { foo: "bar" } }
1170
+ *
1171
+ * async onPageCreated (page) {
1172
+ * this.debug(this.opts.foo) // => bar
1173
+ * }
1174
+ */
1175
+ get opts() {
1176
+ return this._opts;
1177
+ }
1178
+ /**
1179
+ * Convenience debug logger based on the [debug] module.
1180
+ * Will automatically namespace the logging output to the plugin package name.
1181
+ * [debug]: https://www.npmjs.com/package/debug
1182
+ *
1183
+ * ```bash
1184
+ * # toggle output using environment variables
1185
+ * DEBUG=puppeteer-extra-plugin:<plugin_name> node foo.js
1186
+ * # to debug all the things:
1187
+ * DEBUG=puppeteer-extra,puppeteer-extra-plugin:* node foo.js
1188
+ * ```
1189
+ *
1190
+ * @example
1191
+ * this.debug('hello world')
1192
+ * // will output e.g. 'puppeteer-extra-plugin:anonymize-ua hello world'
1193
+ */
1194
+ get debug() {
1195
+ return (0, import_debug.default)(`puppeteer-extra-plugin:${this.name}`);
1196
+ }
1197
+ /**
1198
+ * Before a new browser instance is created/launched.
1199
+ *
1200
+ * Can be used to modify the puppeteer launch options by modifying or returning them.
1201
+ *
1202
+ * Plugins using this method will be called in sequence to each
1203
+ * be able to update the launch options.
1204
+ *
1205
+ * @example
1206
+ * async beforeLaunch (options) {
1207
+ * if (this.opts.flashPluginPath) {
1208
+ * options.args.push(`--ppapi-flash-path=${this.opts.flashPluginPath}`)
1209
+ * }
1210
+ * }
1211
+ *
1212
+ * @param options - Puppeteer launch options
1213
+ */
1214
+ async beforeLaunch(options) {
1215
+ }
1216
+ /**
1217
+ * After the browser has launched.
1218
+ *
1219
+ * Note: Don't assume that there will only be a single browser instance during the lifecycle of a plugin.
1220
+ * It's possible that `pupeeteer.launch` will be called multiple times and more than one browser created.
1221
+ * In order to make the plugins as stateless as possible don't store a reference to the browser instance
1222
+ * in the plugin but rather consider alternatives.
1223
+ *
1224
+ * E.g. when using `onPageCreated` you can get a browser reference by using `page.browser()`.
1225
+ *
1226
+ * Alternatively you could expose a class method that takes a browser instance as a parameter to work with:
1227
+ *
1228
+ * ```es6
1229
+ * const fancyPlugin = require('puppeteer-extra-plugin-fancy')()
1230
+ * puppeteer.use(fancyPlugin)
1231
+ * const browser = await puppeteer.launch()
1232
+ * await fancyPlugin.killBrowser(browser)
1233
+ * ```
1234
+ *
1235
+ * @param browser - The `puppeteer` browser instance.
1236
+ * @param opts.options - Puppeteer launch options used.
1237
+ *
1238
+ * @example
1239
+ * async afterLaunch (browser, opts) {
1240
+ * this.debug('browser has been launched', opts.options)
1241
+ * }
1242
+ */
1243
+ async afterLaunch(browser, opts = { options: {} }) {
1244
+ }
1245
+ /**
1246
+ * Before connecting to an existing browser instance.
1247
+ *
1248
+ * Can be used to modify the puppeteer connect options by modifying or returning them.
1249
+ *
1250
+ * Plugins using this method will be called in sequence to each
1251
+ * be able to update the launch options.
1252
+ *
1253
+ * @param {Object} options - Puppeteer connect options
1254
+ * @return {Object=}
1255
+ */
1256
+ async beforeConnect(options) {
1257
+ }
1258
+ /**
1259
+ * After connecting to an existing browser instance.
1260
+ *
1261
+ * > Note: Don't assume that there will only be a single browser instance during the lifecycle of a plugin.
1262
+ *
1263
+ * @param browser - The `puppeteer` browser instance.
1264
+ * @param {Object} opts
1265
+ * @param {Object} opts.options - Puppeteer connect options used.
1266
+ *
1267
+ */
1268
+ async afterConnect(browser, opts = {}) {
1269
+ }
1270
+ /**
1271
+ * Called when a browser instance is available.
1272
+ *
1273
+ * This applies to both `puppeteer.launch()` and `puppeteer.connect()`.
1274
+ *
1275
+ * Convenience method created for plugins that need access to a browser instance
1276
+ * and don't mind if it has been created through `launch` or `connect`.
1277
+ *
1278
+ * > Note: Don't assume that there will only be a single browser instance during the lifecycle of a plugin.
1279
+ *
1280
+ * @param browser - The `puppeteer` browser instance.
1281
+ */
1282
+ async onBrowser(browser, opts) {
1283
+ }
1284
+ /**
1285
+ * Called when a target is created, for example when a new page is opened by window.open or browser.newPage.
1286
+ *
1287
+ * > Note: This includes target creations in incognito browser contexts.
1288
+ *
1289
+ * > Note: This includes browser instances created through `.launch()` as well as `.connect()`.
1290
+ *
1291
+ * @param {Puppeteer.Target} target
1292
+ */
1293
+ async onTargetCreated(target) {
1294
+ }
1295
+ /**
1296
+ * Same as `onTargetCreated` but prefiltered to only contain Pages, for convenience.
1297
+ *
1298
+ * > Note: This includes page creations in incognito browser contexts.
1299
+ *
1300
+ * > Note: This includes browser instances created through `.launch()` as well as `.connect()`.
1301
+ *
1302
+ * @param {Puppeteer.Target} target
1303
+ *
1304
+ * @example
1305
+ * async onPageCreated (page) {
1306
+ * let ua = await page.browser().userAgent()
1307
+ * if (this.opts.stripHeadless) {
1308
+ * ua = ua.replace('HeadlessChrome/', 'Chrome/')
1309
+ * }
1310
+ * this.debug('new ua', ua)
1311
+ * await page.setUserAgent(ua)
1312
+ * }
1313
+ */
1314
+ async onPageCreated(page) {
1315
+ }
1316
+ /**
1317
+ * Called when the url of a target changes.
1318
+ *
1319
+ * > Note: This includes target changes in incognito browser contexts.
1320
+ *
1321
+ * > Note: This includes browser instances created through `.launch()` as well as `.connect()`.
1322
+ *
1323
+ * @param {Puppeteer.Target} target
1324
+ */
1325
+ async onTargetChanged(target) {
1326
+ }
1327
+ /**
1328
+ * Called when a target is destroyed, for example when a page is closed.
1329
+ *
1330
+ * > Note: This includes target destructions in incognito browser contexts.
1331
+ *
1332
+ * > Note: This includes browser instances created through `.launch()` as well as `.connect()`.
1333
+ *
1334
+ * @param {Puppeteer.Target} target
1335
+ */
1336
+ async onTargetDestroyed(target) {
1337
+ }
1338
+ /**
1339
+ * Called when Puppeteer gets disconnected from the Chromium instance.
1340
+ *
1341
+ * This might happen because of one of the following:
1342
+ * - Chromium is closed or crashed
1343
+ * - The `browser.disconnect` method was called
1344
+ */
1345
+ async onDisconnected() {
1346
+ }
1347
+ /**
1348
+ * **Deprecated:** Since puppeteer v1.6.0 `onDisconnected` has been improved
1349
+ * and should be used instead of `onClose`.
1350
+ *
1351
+ * In puppeteer < v1.6.0 `onDisconnected` was not catching all exit scenarios.
1352
+ * In order for plugins to clean up properly (e.g. deleting temporary files)
1353
+ * the `onClose` method had been introduced.
1354
+ *
1355
+ * > Note: Might be called multiple times on exit.
1356
+ *
1357
+ * > Note: This only includes browser instances created through `.launch()`.
1358
+ */
1359
+ async onClose() {
1360
+ }
1361
+ /**
1362
+ * After the plugin has been registered in `puppeteer-extra`.
1363
+ *
1364
+ * Normally right after `puppeteer.use(plugin)` is called
1365
+ */
1366
+ async onPluginRegistered() {
1367
+ }
1368
+ /**
1369
+ * Helper method to retrieve `data` objects from other plugins.
1370
+ *
1371
+ * A plugin needs to state the `dataFromPlugins` requirement
1372
+ * in order to use this method. Will be mapped to `puppeteer.getPluginData`.
1373
+ *
1374
+ * @param name - Filter data by `name` property
1375
+ *
1376
+ * @see [data]
1377
+ * @see [requirements]
1378
+ */
1379
+ getDataFromPlugins(name) {
1380
+ return [];
1381
+ }
1382
+ /**
1383
+ * Will match plugin dependencies against all currently registered plugins.
1384
+ * Is being called by `puppeteer-extra` and used to require missing dependencies.
1385
+ *
1386
+ * @param {Array<Object>} plugins
1387
+ * @return {Set} - list of missing plugin names
1388
+ *
1389
+ * @private
1390
+ */
1391
+ _getMissingDependencies(plugins) {
1392
+ const pluginNames = new Set(plugins.map((p) => p.name));
1393
+ const missing = new Set(Array.from(this.dependencies.values()).filter((x) => !pluginNames.has(x)));
1394
+ return missing;
1395
+ }
1396
+ /**
1397
+ * Conditionally bind browser/process events to class members.
1398
+ * The idea is to reduce event binding boilerplate in plugins.
1399
+ *
1400
+ * For efficiency we make sure the plugin is using the respective event
1401
+ * by checking the child class members before registering the listener.
1402
+ *
1403
+ * @param {<Puppeteer.Browser>} browser
1404
+ * @param {Object} opts - Options
1405
+ * @param {string} opts.context - Puppeteer context (launch/connect)
1406
+ * @param {Object} [opts.options] - Puppeteer launch or connect options
1407
+ * @param {Array<string>} [opts.defaultArgs] - The default flags that Chromium will be launched with
1408
+ *
1409
+ * @private
1410
+ */
1411
+ async _bindBrowserEvents(browser, opts = {}) {
1412
+ if (this._hasChildClassMember("onTargetCreated") || this._hasChildClassMember("onPageCreated")) {
1413
+ browser.on("targetcreated", this._onTargetCreated.bind(this));
1414
+ }
1415
+ if (this._hasChildClassMember("onTargetChanged") && this.onTargetChanged) {
1416
+ browser.on("targetchanged", this.onTargetChanged.bind(this));
1417
+ }
1418
+ if (this._hasChildClassMember("onTargetDestroyed") && this.onTargetDestroyed) {
1419
+ browser.on("targetdestroyed", this.onTargetDestroyed.bind(this));
1420
+ }
1421
+ if (this._hasChildClassMember("onDisconnected") && this.onDisconnected) {
1422
+ browser.on("disconnected", this.onDisconnected.bind(this));
1423
+ }
1424
+ if (opts.context === "launch" && this._hasChildClassMember("onClose")) {
1425
+ if (this.onClose) {
1426
+ process.on("exit", this.onClose.bind(this));
1427
+ browser.on("disconnected", this.onClose.bind(this));
1428
+ if (opts.options.handleSIGINT !== false) {
1429
+ process.on("SIGINT", this.onClose.bind(this));
1430
+ }
1431
+ if (opts.options.handleSIGTERM !== false) {
1432
+ process.on("SIGTERM", this.onClose.bind(this));
1433
+ }
1434
+ if (opts.options.handleSIGHUP !== false) {
1435
+ process.on("SIGHUP", this.onClose.bind(this));
1436
+ }
1437
+ }
1438
+ }
1439
+ if (opts.context === "launch" && this.afterLaunch) {
1440
+ await this.afterLaunch(browser, opts);
1441
+ }
1442
+ if (opts.context === "connect" && this.afterConnect) {
1443
+ await this.afterConnect(browser, opts);
1444
+ }
1445
+ if (this.onBrowser)
1446
+ await this.onBrowser(browser, opts);
1447
+ }
1448
+ /**
1449
+ * @private
1450
+ */
1451
+ async _onTargetCreated(target) {
1452
+ if (this.onTargetCreated)
1453
+ await this.onTargetCreated(target);
1454
+ if (target.type() === "page") {
1455
+ try {
1456
+ const page = await target.page();
1457
+ if (!page) {
1458
+ return;
1459
+ }
1460
+ const validPage = "isClosed" in page && !page.isClosed();
1461
+ if (this.onPageCreated && validPage) {
1462
+ await this.onPageCreated(page);
1463
+ }
1464
+ } catch (err) {
1465
+ console.error(err);
1466
+ }
1467
+ }
1468
+ }
1469
+ /**
1470
+ * @private
1471
+ */
1472
+ _register(prototype) {
1473
+ this._registerChildClassMembers(prototype);
1474
+ if (this.onPluginRegistered)
1475
+ this.onPluginRegistered();
1476
+ }
1477
+ /**
1478
+ * @private
1479
+ */
1480
+ _registerChildClassMembers(prototype) {
1481
+ this._childClassMembers = Object.getOwnPropertyNames(prototype);
1482
+ }
1483
+ /**
1484
+ * @private
1485
+ */
1486
+ _hasChildClassMember(name) {
1487
+ return !!this._childClassMembers.includes(name);
1488
+ }
1489
+ /**
1490
+ * @private
1491
+ */
1492
+ get _isPuppeteerExtraPlugin() {
1493
+ return true;
1494
+ }
1495
+ };
1496
+ }
1497
+ });
1498
+
1499
+ // ../node_modules/puppeteer-extra-plugin-stealth/index.js
1500
+ var require_puppeteer_extra_plugin_stealth = __commonJS({
1501
+ "../node_modules/puppeteer-extra-plugin-stealth/index.js"(exports2, module2) {
1502
+ "use strict";
1503
+ init_cjs_shims();
1504
+ var { PuppeteerExtraPlugin: PuppeteerExtraPlugin2 } = (init_index_esm(), __toCommonJS(index_esm_exports));
1505
+ var StealthPlugin = class extends PuppeteerExtraPlugin2 {
1506
+ constructor(opts = {}) {
1507
+ super(opts);
1508
+ }
1509
+ get name() {
1510
+ return "stealth";
1511
+ }
1512
+ get defaults() {
1513
+ const availableEvasions = /* @__PURE__ */ new Set([
1514
+ "chrome.app",
1515
+ "chrome.csi",
1516
+ "chrome.loadTimes",
1517
+ "chrome.runtime",
1518
+ "defaultArgs",
1519
+ "iframe.contentWindow",
1520
+ "media.codecs",
1521
+ "navigator.hardwareConcurrency",
1522
+ "navigator.languages",
1523
+ "navigator.permissions",
1524
+ "navigator.plugins",
1525
+ "navigator.webdriver",
1526
+ "sourceurl",
1527
+ "user-agent-override",
1528
+ "webgl.vendor",
1529
+ "window.outerdimensions"
1530
+ ]);
1531
+ return {
1532
+ availableEvasions,
1533
+ // Enable all available evasions by default
1534
+ enabledEvasions: /* @__PURE__ */ new Set([...availableEvasions])
1535
+ };
1536
+ }
1537
+ /**
1538
+ * Requires evasion techniques dynamically based on configuration.
1539
+ *
1540
+ * @private
1541
+ */
1542
+ get dependencies() {
1543
+ return new Set(
1544
+ [...this.opts.enabledEvasions].map((e) => `${this.name}/evasions/${e}`)
1545
+ );
1546
+ }
1547
+ /**
1548
+ * Get all available evasions.
1549
+ *
1550
+ * Please look into the [evasions directory](./evasions/) for an up to date list.
1551
+ *
1552
+ * @type {Set<string>} - A Set of all available evasions.
1553
+ *
1554
+ * @example
1555
+ * const pluginStealth = require('puppeteer-extra-plugin-stealth')()
1556
+ * console.log(pluginStealth.availableEvasions) // => Set { 'user-agent', 'console.debug' }
1557
+ * puppeteer.use(pluginStealth)
1558
+ */
1559
+ get availableEvasions() {
1560
+ return this.defaults.availableEvasions;
1561
+ }
1562
+ /**
1563
+ * Get all enabled evasions.
1564
+ *
1565
+ * Enabled evasions can be configured either through `opts` or by modifying this property.
1566
+ *
1567
+ * @type {Set<string>} - A Set of all enabled evasions.
1568
+ *
1569
+ * @example
1570
+ * // Remove specific evasion from enabled ones dynamically
1571
+ * const pluginStealth = require('puppeteer-extra-plugin-stealth')()
1572
+ * pluginStealth.enabledEvasions.delete('console.debug')
1573
+ * puppeteer.use(pluginStealth)
1574
+ */
1575
+ get enabledEvasions() {
1576
+ return this.opts.enabledEvasions;
1577
+ }
1578
+ /**
1579
+ * @private
1580
+ */
1581
+ set enabledEvasions(evasions) {
1582
+ this.opts.enabledEvasions = evasions;
1583
+ }
1584
+ async onBrowser(browser) {
1585
+ if (browser && browser.setMaxListeners) {
1586
+ browser.setMaxListeners(30);
1587
+ }
1588
+ }
1589
+ };
1590
+ var defaultExport = (opts) => new StealthPlugin(opts);
1591
+ module2.exports = defaultExport;
1592
+ }
1593
+ });
1594
+
1595
+ // src/index.ts
1596
+ var index_exports = {};
1597
+ __export(index_exports, {
1598
+ FetchEngine: () => FetchEngine,
1599
+ HybridEngine: () => HybridEngine,
1600
+ PlaywrightEngine: () => PlaywrightEngine
1601
+ });
1602
+ module.exports = __toCommonJS(index_exports);
1603
+ init_cjs_shims();
1604
+
1605
+ // src/FetchEngine.ts
1606
+ init_cjs_shims();
1607
+
1608
+ // src/utils/markdown-converter.ts
1609
+ init_cjs_shims();
1610
+ var import_turndown = __toESM(require("turndown"), 1);
1611
+ var import_turndown_plugin_gfm = require("turndown-plugin-gfm");
1612
+ var import_node_html_parser = require("node-html-parser");
1613
+ var PREPROCESSING_REMOVE_SELECTORS = [
1614
+ "script:not([type='application/ld+json'])",
1615
+ // Keep JSON-LD
1616
+ "style",
1617
+ "noscript",
1618
+ "iframe:not([title])"
1619
+ // Keep iframes with titles (potential embeds)
1620
+ ];
1621
+ var MAIN_CONTENT_SELECTORS = [
1622
+ // By semantics
1623
+ "article",
1624
+ "main",
1625
+ "[role='main']",
1626
+ "[role='article']",
1627
+ // By common class/id names (more robust patterns)
1628
+ "[class*='article-body']",
1629
+ "[class*='post-content']",
1630
+ "[class*='main-content']",
1631
+ "[class*='entry-content']",
1632
+ "[id*='article-body']",
1633
+ "[id*='main-content']",
1634
+ // Common CMS patterns
1635
+ ".article",
1636
+ ".post",
1637
+ ".content",
1638
+ ".entry",
1639
+ ".blog-post",
1640
+ // Fallback
1641
+ "body"
1642
+ ];
1643
+ var FORUM_COMMENT_SELECTORS = [
1644
+ ".comment",
1645
+ ".comments",
1646
+ ".comtr",
1647
+ '[id^="comment-"]',
1648
+ 'div[id^="c_"]'
1649
+ ];
1650
+ var FORUM_THREAD_SELECTORS = [".thread", ".post", '[id^="thread-"]'];
1651
+ var FORUM_VOTE_SELECTORS = [".vote", ".score", ".upvote", ".downvote", ".votelinks"];
1652
+ var FORUM_MAIN_POST_SELECTORS = [".fatitem", ".submission", ".op", ".original-post"];
1653
+ var FORUM_COMMENTS_CONTAINER_SELECTORS = [".comment-tree", ".comments", "#comments"];
1654
+ var FORUM_OBVIOUS_NON_CONTENT_SELECTORS = ["header", "footer", ".nav", ".sidebar"];
1655
+ var MIN_LINK_DENSITY_TEXT_LENGTH = 50;
1656
+ var DEFAULT_LINK_DENSITY_THRESHOLD = 0.4;
1657
+ var MIN_FORUM_INDICATOR_COUNT = 3;
1658
+ var CODE_BLOCK_LANG_PREFIXES = ["language-", "lang-"];
1659
+ var POSTPROCESSING_MAX_CONSECUTIVE_NEWLINES = 2;
1660
+ var MarkdownConverter = class {
1661
+ turndownService;
1662
+ constructor() {
1663
+ this.turndownService = new import_turndown.default({
1664
+ headingStyle: "atx",
1665
+ codeBlockStyle: "fenced",
1666
+ bulletListMarker: "-",
1667
+ strongDelimiter: "**",
1668
+ emDelimiter: "*",
1669
+ hr: "---",
1670
+ // Use nodeType check instead of window.HTMLElement
1671
+ keepReplacement: (_content, node) => {
1672
+ if (node.nodeType === 1) {
1673
+ const htmlElement = node;
1674
+ if (htmlElement.getAttribute("role") === "presentation" || htmlElement.classList?.contains("preserve")) {
1675
+ return htmlElement.outerHTML;
1676
+ }
1677
+ }
1678
+ return "";
1679
+ }
1680
+ });
1681
+ this.turndownService.use(import_turndown_plugin_gfm.gfm);
1682
+ this.setupPrioritizedRules();
1683
+ }
1684
+ // --- Public Method ---
1685
+ /**
1686
+ * Converts HTML string to Markdown.
1687
+ * @param html The HTML string to convert.
1688
+ * @param options Conversion options.
1689
+ * @returns The converted Markdown string.
1690
+ */
1691
+ convert(html, options = {}) {
1692
+ const preprocessedHtml = this.preprocessHTML(html);
1693
+ let markdown = this.turndownService.turndown(preprocessedHtml);
1694
+ markdown = this.postprocessMarkdown(markdown, options);
1695
+ return markdown;
1696
+ }
1697
+ // --- Turndown Rule Setup ---
1698
+ setupPrioritizedRules() {
1699
+ this.addContentExtractionRules();
1700
+ this.addStructureRules();
1701
+ this.addBlockRules();
1702
+ this.addInlineRules();
1703
+ }
1704
+ // We rely on preprocessing to remove nav/menus/high-link-density areas.
1705
+ // These rules primarily help Turndown understand the *structure* of the *intended* content.
1706
+ addContentExtractionRules() {
1707
+ this.turndownService.addRule("main-content-marker", {
1708
+ filter: (node) => {
1709
+ if (node.nodeType !== 1) return false;
1710
+ const el = node;
1711
+ const element = node;
1712
+ return el.tagName.toLowerCase() === "main" || ["main", "article"].includes(el.getAttribute("role") || "") || MAIN_CONTENT_SELECTORS.some((selector) => {
1713
+ try {
1714
+ return element.matches(selector) && selector !== "body";
1715
+ } catch {
1716
+ return false;
1717
+ }
1718
+ });
1719
+ },
1720
+ // Just pass content through, this rule is mainly for filter priority/debugging
1721
+ replacement: (content) => content
1722
+ });
1723
+ const unwantedTags = [
1724
+ "script",
1725
+ "style",
1726
+ "noscript",
1727
+ "iframe",
1728
+ "button",
1729
+ "input",
1730
+ "select",
1731
+ "textarea",
1732
+ "form",
1733
+ "canvas",
1734
+ /*'svg' removed */
1735
+ "audio",
1736
+ "video"
1737
+ ];
1738
+ this.turndownService.addRule("remove-unwanted", {
1739
+ filter: unwantedTags,
1740
+ replacement: () => ""
1741
+ });
1742
+ }
1743
+ addStructureRules() {
1744
+ this.turndownService.addRule("article", {
1745
+ filter: "article",
1746
+ replacement: (content) => `
1747
+
1748
+ ${content}
1749
+
1750
+ `
1751
+ // Add separation
1752
+ });
1753
+ this.turndownService.addRule("section", {
1754
+ filter: "section",
1755
+ replacement: (content) => `
1756
+
1757
+ ${content}
1758
+
1759
+ `
1760
+ // Add separation
1761
+ });
1762
+ }
1763
+ addBlockRules() {
1764
+ this.turndownService.addRule("list", {
1765
+ filter: ["ul", "ol"],
1766
+ replacement: (content, node) => {
1767
+ if (node.nodeType !== 1) return content;
1768
+ const parent = node.parentNode;
1769
+ const indent = parent && parent.nodeName.toLowerCase() === "li" ? " " : "";
1770
+ return "\n" + content.split("\n").map((line) => indent + line.trimEnd()).join("\n").trim() + "\n";
1771
+ }
1772
+ });
1773
+ this.turndownService.addRule("listItem", {
1774
+ filter: "li",
1775
+ // Use standard function for `this` context if needed, or ensure types match
1776
+ replacement: function(content, node, options) {
1777
+ content = content.replace(/^\s+/gm, "").replace(/\n(?!\s*$)/gm, "\n ");
1778
+ let prefix = options.bulletListMarker + " ";
1779
+ const parentNode = node.parentNode;
1780
+ if (parentNode && parentNode.nodeName === "OL") {
1781
+ try {
1782
+ const start = parentNode.getAttribute("start");
1783
+ const elementNode = node;
1784
+ const parentElement = parentNode;
1785
+ const index = Array.prototype.indexOf.call(parentElement.children, elementNode);
1786
+ prefix = (start ? Number(start) + index : index + 1) + ". ";
1787
+ } catch (e) {
1788
+ console.warn("Could not determine ordered list index:", e);
1789
+ prefix = "1. ";
1790
+ }
1791
+ }
1792
+ const trimmedContent = content.trim();
1793
+ return prefix + trimmedContent + (node.nextSibling && !/\n$/.test(trimmedContent) ? "\n" : "");
1794
+ }
1795
+ });
1796
+ this.turndownService.addRule("blockquote", {
1797
+ filter: "blockquote",
1798
+ replacement: (content) => {
1799
+ const trimmedContent = content.trim();
1800
+ return "\n\n> " + trimmedContent.replace(/\n/g, "\n> ") + "\n\n";
1801
+ }
1802
+ });
1803
+ }
1804
+ addInlineRules() {
1805
+ this.turndownService.addRule("link", {
1806
+ filter: (node, _options) => {
1807
+ return node.nodeType === 1 && node.nodeName === "A" && !!node.getAttribute("href");
1808
+ },
1809
+ replacement: (content, node) => {
1810
+ const element = node;
1811
+ const href = element.getAttribute("href") || "";
1812
+ const title = element.getAttribute("title");
1813
+ const text = content.trim() ? content.trim() : href;
1814
+ let decodedHref = href;
1815
+ try {
1816
+ if (href.includes("%")) {
1817
+ decodedHref = decodeURI(href);
1818
+ }
1819
+ } catch (e) {
1820
+ console.warn(`Failed to decode URI, keeping original: ${href}`, e);
1821
+ }
1822
+ return title ? `[${text}](${decodedHref} "${title}")` : `[${text}](${decodedHref})`;
1823
+ }
1824
+ });
1825
+ this.turndownService.addRule("figure", {
1826
+ filter: "figure",
1827
+ replacement: (content, node) => {
1828
+ if (node.nodeType !== 1) return content;
1829
+ const element = node;
1830
+ const img = element.querySelector("img");
1831
+ const figcaption = element.querySelector("figcaption");
1832
+ let markdown = "";
1833
+ let mainImgMd = "";
1834
+ if (img) {
1835
+ const src = img.getAttribute("src") || "";
1836
+ const alt = img.getAttribute("alt") || "";
1837
+ const title = img.getAttribute("title");
1838
+ mainImgMd = title ? `![${alt}](${src} "${title}")` : `![${alt}](${src})`;
1839
+ }
1840
+ let processedContent = content.trim();
1841
+ if (mainImgMd) {
1842
+ markdown = mainImgMd;
1843
+ const imgPlaceholder = `![${img?.getAttribute("alt") || ""}](${img?.getAttribute("src") || ""})`;
1844
+ processedContent = processedContent.replace(imgPlaceholder, "").trim();
1845
+ }
1846
+ if (figcaption) {
1847
+ const captionText = figcaption.textContent?.trim();
1848
+ if (captionText) {
1849
+ markdown += `
1850
+
1851
+ _${captionText}_`;
1852
+ processedContent = processedContent.replace(captionText, "").trim();
1853
+ processedContent = processedContent.replace(/^_+|_+$/g, "").trim();
1854
+ }
1855
+ }
1856
+ if (processedContent) {
1857
+ if (processedContent.length > 10 || /[a-zA-Z0-9]/.test(processedContent)) {
1858
+ markdown += `
1859
+
1860
+ ${processedContent}`;
1861
+ }
1862
+ }
1863
+ return "\n\n" + markdown.trim() + "\n\n";
1864
+ }
1865
+ });
1866
+ this.turndownService.addRule("image", {
1867
+ filter: (node) => {
1868
+ return node.nodeType === 1 && node.nodeName === "IMG" && !!node.getAttribute("src");
1869
+ },
1870
+ replacement: (_content, node) => {
1871
+ const element = node;
1872
+ const src = element.getAttribute("src") || "";
1873
+ const alt = element.getAttribute("alt") || "";
1874
+ const title = element.getAttribute("title");
1875
+ return title ? `
1876
+
1877
+ ![${alt}](${src} "${title}")
1878
+
1879
+ ` : `
1880
+
1881
+ ![${alt}](${src})
1882
+
1883
+ `;
1884
+ }
1885
+ });
1886
+ this.turndownService.addRule("code-block", {
1887
+ filter: (node) => {
1888
+ if (node.nodeType !== 1) return false;
1889
+ const element = node;
1890
+ const isPre = element.tagName.toLowerCase() === "pre";
1891
+ if (!isPre) return false;
1892
+ const hasCodeChild = element.querySelector("code") !== null;
1893
+ const hasCodeClass = /highlight|syntax|code|listing|source/i.test(element.className);
1894
+ const hasLangAttribute = !!element.getAttribute("lang") || !!element.getAttribute("language");
1895
+ return hasCodeChild || hasCodeClass || hasLangAttribute;
1896
+ },
1897
+ replacement: (content, node) => {
1898
+ if (node.nodeType !== 1) return content.trim();
1899
+ const element = node;
1900
+ let language = "";
1901
+ const codeElement = element.querySelector("code");
1902
+ language = element.getAttribute("lang") || element.getAttribute("language") || (codeElement ? codeElement.getAttribute("lang") || codeElement.getAttribute("language") : "") || "";
1903
+ if (!language) {
1904
+ const classes = (element.className + " " + (codeElement?.className || "")).split(" ").filter(Boolean);
1905
+ for (const cls of classes) {
1906
+ for (const prefix of CODE_BLOCK_LANG_PREFIXES) {
1907
+ if (cls.startsWith(prefix)) {
1908
+ language = cls.substring(prefix.length);
1909
+ break;
1910
+ }
1911
+ }
1912
+ if (language) break;
1913
+ }
1914
+ }
1915
+ const cleanedContent = content.trim();
1916
+ return `
1917
+
1918
+ \`\`\`${language}
1919
+ ${cleanedContent}
1920
+ \`\`\`
1921
+
1922
+ `;
1923
+ }
1924
+ });
1925
+ this.turndownService.addRule("inlineCode", {
1926
+ filter: (node) => node.nodeName === "CODE" && node.parentNode?.nodeName !== "PRE",
1927
+ replacement: (content) => {
1928
+ const trimmed = content.trim();
1929
+ if (!trimmed) return "";
1930
+ let delimiter = "`";
1931
+ if (trimmed.includes("`")) {
1932
+ delimiter = "``";
1933
+ if (trimmed.startsWith("`") || trimmed.endsWith("`")) {
1934
+ return `${delimiter} ${trimmed} ${delimiter}`;
1935
+ }
1936
+ }
1937
+ return delimiter + trimmed + delimiter;
1938
+ }
1939
+ });
1940
+ }
1941
+ // --- HTML Preprocessing ---
1942
+ preprocessHTML(html) {
1943
+ try {
1944
+ html = this.cleanupHtml(html);
1945
+ const root = (0, import_node_html_parser.parse)(html, {
1946
+ comment: false,
1947
+ blockTextElements: { script: true, style: true, noscript: true }
1948
+ });
1949
+ if (root.nodeType === 3) {
1950
+ return root.textContent ?? "";
1951
+ } else if (root.nodeType !== 1) {
1952
+ console.warn("Unexpected root node type after parsing:", root.nodeType);
1953
+ return root.toString();
1954
+ }
1955
+ const rootElement = root;
1956
+ PREPROCESSING_REMOVE_SELECTORS.forEach((selector) => {
1957
+ try {
1958
+ rootElement.querySelectorAll(selector).forEach((el) => el.remove());
1959
+ } catch (e) {
1960
+ console.warn(`Skipping invalid selector during preprocessing: ${selector}`, e);
1961
+ }
1962
+ });
1963
+ this.removeHighLinkDensityElements(rootElement, DEFAULT_LINK_DENSITY_THRESHOLD);
1964
+ const metadata = this.extractDocumentMetadata(rootElement);
1965
+ const isForum = this.detectForumPage(rootElement);
1966
+ let contentElement = rootElement;
1967
+ if (isForum) {
1968
+ contentElement = this.extractForumContentElement(rootElement);
1969
+ } else {
1970
+ contentElement = this.extractArticleContentElement(rootElement);
1971
+ }
1972
+ let contentHtml = contentElement instanceof import_node_html_parser.HTMLElement ? contentElement.outerHTML : contentElement.textContent;
1973
+ contentHtml = this.cleanupContentHtml(contentHtml || "");
1974
+ const metadataString = metadata.length > 0 ? metadata.join("\n\n") + "\n\n---\n\n" : "";
1975
+ return metadataString + contentHtml;
1976
+ } catch (error) {
1977
+ console.error("HTML preprocessing failed:", error);
1978
+ return this.cleanupHtml(html);
1979
+ }
1980
+ }
1981
+ cleanupHtml(html) {
1982
+ return html.replace(/AMIL:\[=-,amilft[^\s]*/g, "").replace(/\{\{\s*[^}\s]+\s*}}/g, "").replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, "");
1983
+ }
1984
+ cleanupContentHtml(content) {
1985
+ return content.replace(/\s*data-(?:reactid|reactroot|react-|testid|v-|js-|qa-|cy-)[^=\s]*\s*=\s*(?:"[^"]*"|'[^']*'|\S+)/g, "").replace(/\s*ng-[^=\s]*\s*=\s*(?:"[^"]*"|'[^']*'|\S+)/g, "").replace(/\s*_ngcontent-[^\s]*\s*=""/g, "").replace(/\s*class\s*=\s*"(ng-|mat-)[^"]*"/g, "").replace(/<!--[\s\S]*?-->/g, "").replace(/([ \t])+/g, " ").replace(/\s*\n\s*/g, "\n").trim();
1986
+ }
1987
+ removeHighLinkDensityElements(element, threshold) {
1988
+ const potentialBoilerplate = element.querySelectorAll(
1989
+ "div, nav, ul, aside, section, .sidebar, .widget, .menu, [role='navigation'], [role='menubar']"
1990
+ );
1991
+ for (const el of Array.from(potentialBoilerplate)) {
1992
+ if (!(el instanceof import_node_html_parser.HTMLElement)) continue;
1993
+ const textContent = el.textContent || "";
1994
+ if (textContent.length < MIN_LINK_DENSITY_TEXT_LENGTH) continue;
1995
+ const links = el.querySelectorAll("a");
1996
+ if (links.length < 3) continue;
1997
+ const textLength = textContent.length;
1998
+ let linkTextLength = 0;
1999
+ el.querySelectorAll("a").forEach((link) => {
2000
+ if (link.closest("a") === link) {
2001
+ linkTextLength += link.textContent?.length || 0;
2002
+ }
2003
+ });
2004
+ if (textLength === 0) continue;
2005
+ const density = linkTextLength / textLength;
2006
+ if (density > threshold) {
2007
+ const containsMainContent = el.querySelector('main, article, [role="main"], [role="article"]') !== null;
2008
+ const isMainContent = MAIN_CONTENT_SELECTORS.some((selector) => {
2009
+ try {
2010
+ return el.matches(selector);
2011
+ } catch {
2012
+ return false;
2013
+ }
2014
+ });
2015
+ if (!containsMainContent && !isMainContent) {
2016
+ el.remove();
2017
+ }
2018
+ }
2019
+ }
2020
+ }
2021
+ extractDocumentMetadata(root) {
2022
+ const metadata = [];
2023
+ const addedMeta = /* @__PURE__ */ new Set();
2024
+ const addMeta = (key, value, isTitle = false) => {
2025
+ const cleanedValue = value?.trim();
2026
+ if (cleanedValue && !addedMeta.has(key.toLowerCase())) {
2027
+ if (isTitle) {
2028
+ metadata.unshift(`# ${cleanedValue}`);
2029
+ } else {
2030
+ metadata.push(`**${key}:** ${cleanedValue}`);
2031
+ }
2032
+ addedMeta.add(key.toLowerCase());
2033
+ }
2034
+ };
2035
+ addMeta("Title", root.querySelector("meta[property='og:title']")?.getAttribute("content"), true);
2036
+ addMeta("Title", root.querySelector("meta[name='twitter:title']")?.getAttribute("content"), true);
2037
+ addMeta("Title", root.querySelector("meta[name='DC.title']")?.getAttribute("content"), true);
2038
+ addMeta("Title", root.querySelector("title")?.textContent, true);
2039
+ addMeta("Description", root.querySelector("meta[property='og:description']")?.getAttribute("content"));
2040
+ addMeta("Description", root.querySelector("meta[name='twitter:description']")?.getAttribute("content"));
2041
+ addMeta("Description", root.querySelector("meta[name='description']")?.getAttribute("content"));
2042
+ addMeta("Description", root.querySelector("meta[name='DC.description']")?.getAttribute("content"));
2043
+ addMeta("Author", root.querySelector("meta[name='author']")?.getAttribute("content"));
2044
+ addMeta("Author", root.querySelector("meta[property='article:author']")?.getAttribute("content"));
2045
+ addMeta("Author", root.querySelector("[rel='author']")?.textContent);
2046
+ addMeta("Published", root.querySelector("meta[property='article:published_time']")?.getAttribute("content"));
2047
+ addMeta("Published", root.querySelector("meta[name='publish-date']")?.getAttribute("content"));
2048
+ addMeta("Published", root.querySelector("time[itemprop='datePublished']")?.getAttribute("datetime"));
2049
+ addMeta("Published", root.querySelector("time")?.getAttribute("datetime"));
2050
+ addMeta("URL", root.querySelector("link[rel='canonical']")?.getAttribute("href"));
2051
+ addMeta("URL", root.querySelector("meta[property='og:url']")?.getAttribute("content"));
2052
+ const jsonLdScripts = root.querySelectorAll("script[type='application/ld+json']");
2053
+ if (jsonLdScripts.length > 0) {
2054
+ const jsonLdData = Array.from(jsonLdScripts).map((script) => {
2055
+ try {
2056
+ const textContent = script.textContent;
2057
+ return textContent ? JSON.parse(textContent) : null;
2058
+ } catch (e) {
2059
+ return null;
2060
+ }
2061
+ }).filter((item) => item !== null);
2062
+ if (jsonLdData.length > 0 && !addedMeta.has("json-ld")) {
2063
+ metadata.push("<details><summary>JSON-LD Metadata</summary>\n");
2064
+ metadata.push("```json", JSON.stringify(jsonLdData, null, 2), "```");
2065
+ metadata.push("</details>");
2066
+ addedMeta.add("json-ld");
2067
+ }
2068
+ }
2069
+ return metadata;
2070
+ }
2071
+ detectForumPage(root) {
2072
+ const countMatches = (selectors) => {
2073
+ return selectors.reduce((count, selector) => {
2074
+ try {
2075
+ if (root) {
2076
+ return count + root.querySelectorAll(selector).length;
2077
+ }
2078
+ return count;
2079
+ } catch {
2080
+ return count;
2081
+ }
2082
+ }, 0);
2083
+ };
2084
+ const commentCount = countMatches(FORUM_COMMENT_SELECTORS);
2085
+ const threadCount = countMatches(FORUM_THREAD_SELECTORS);
2086
+ const voteCount = countMatches(FORUM_VOTE_SELECTORS);
2087
+ let isKnownForumHost = false;
2088
+ try {
2089
+ const canonicalUrl = root.querySelector('link[rel="canonical"]')?.getAttribute("href") || root.querySelector('meta[property="og:url"]')?.getAttribute("content");
2090
+ if (canonicalUrl) {
2091
+ const absoluteUrl = new URL(canonicalUrl, "http://example.com").toString();
2092
+ const hostname = new URL(absoluteUrl).hostname.toLowerCase();
2093
+ isKnownForumHost = hostname.includes("reddit.com") || hostname.includes("news.ycombinator.com") || hostname.includes("forum") || hostname.includes("discuss") || hostname.includes("community");
2094
+ }
2095
+ } catch (e) {
2096
+ console.warn("Could not parse URL for forum detection:", e);
2097
+ }
2098
+ return commentCount >= MIN_FORUM_INDICATOR_COUNT || threadCount > 1 || // More than one thread item is stronger indicator
2099
+ voteCount >= MIN_FORUM_INDICATOR_COUNT || isKnownForumHost;
2100
+ }
2101
+ // Tries to find the main content element for an article-like page
2102
+ extractArticleContentElement(root) {
2103
+ let bestCandidate = null;
2104
+ let maxScore = -1;
2105
+ for (const selector of MAIN_CONTENT_SELECTORS) {
2106
+ try {
2107
+ const elements = root.querySelectorAll(selector);
2108
+ for (const element of Array.from(elements)) {
2109
+ if (!(element instanceof import_node_html_parser.HTMLElement)) continue;
2110
+ const textLength = (element.textContent || "").trim().length;
2111
+ if (textLength < 100 && !element.querySelector("img, video, iframe, figure")) continue;
2112
+ let score = textLength;
2113
+ if (["ARTICLE", "MAIN"].includes(element.tagName)) score *= 1.5;
2114
+ if (["main", "article"].includes(element.getAttribute("role") || "")) score *= 1.5;
2115
+ if (["HEADER", "FOOTER", "NAV", "ASIDE"].includes(element.tagName)) score *= 0.3;
2116
+ try {
2117
+ if (
2118
+ /* @ts-expect-error TODO: fix this */
2119
+ element.matches(
2120
+ '.sidebar, .widget, .menu, .nav, .header, .footer, [role="navigation"], [role="complementary"], [role="banner"]'
2121
+ )
2122
+ )
2123
+ score *= 0.2;
2124
+ } catch {
2125
+ }
2126
+ if (this.hasHighLinkDensity(element, 0.6)) {
2127
+ score *= 0.5;
2128
+ }
2129
+ if (element.querySelectorAll("p").length > 2) score *= 1.2;
2130
+ if (element.tagName === "BODY" && maxScore > 200) continue;
2131
+ if (score > maxScore) {
2132
+ maxScore = score;
2133
+ bestCandidate = element;
2134
+ }
2135
+ }
2136
+ } catch (e) {
2137
+ }
2138
+ }
2139
+ return bestCandidate || root;
2140
+ }
2141
+ // Tries to find the main content element(s) for a forum-like page
2142
+ extractForumContentElement(root) {
2143
+ const tempContainer = (0, import_node_html_parser.parse)("<div></div>").firstChild;
2144
+ try {
2145
+ const mainPost = FORUM_MAIN_POST_SELECTORS.map((s) => root.querySelector(s)).find(
2146
+ (el) => el instanceof import_node_html_parser.HTMLElement
2147
+ );
2148
+ if (mainPost) {
2149
+ tempContainer.appendChild(mainPost.clone());
2150
+ }
2151
+ } catch (e) {
2152
+ console.warn("Error finding forum main post:", e);
2153
+ }
2154
+ try {
2155
+ const commentsContainer = FORUM_COMMENTS_CONTAINER_SELECTORS.map((s) => root.querySelector(s)).find(
2156
+ (el) => el instanceof import_node_html_parser.HTMLElement
2157
+ );
2158
+ if (commentsContainer) {
2159
+ const clonedComments = commentsContainer.clone();
2160
+ if (clonedComments instanceof import_node_html_parser.HTMLElement) {
2161
+ FORUM_OBVIOUS_NON_CONTENT_SELECTORS.forEach((selector) => {
2162
+ try {
2163
+ clonedComments.querySelectorAll(selector).forEach((el) => el.remove());
2164
+ } catch {
2165
+ }
2166
+ });
2167
+ tempContainer.appendChild(clonedComments);
2168
+ }
2169
+ }
2170
+ } catch (e) {
2171
+ console.warn("Error finding forum comments container:", e);
2172
+ }
2173
+ if (tempContainer.childNodes.length > 0) {
2174
+ return tempContainer;
2175
+ }
2176
+ const body = root.querySelector("body");
2177
+ if (body) {
2178
+ const clonedBody = body.clone();
2179
+ if (clonedBody instanceof import_node_html_parser.HTMLElement) {
2180
+ FORUM_OBVIOUS_NON_CONTENT_SELECTORS.forEach((selector) => {
2181
+ try {
2182
+ clonedBody.querySelectorAll(selector).forEach((el) => el.remove());
2183
+ } catch {
2184
+ }
2185
+ });
2186
+ this.removeHighLinkDensityElements(clonedBody, DEFAULT_LINK_DENSITY_THRESHOLD);
2187
+ return clonedBody;
2188
+ }
2189
+ }
2190
+ return root;
2191
+ }
2192
+ // Helper function to check link density within an element
2193
+ hasHighLinkDensity(element, threshold) {
2194
+ const textContent = element.textContent || "";
2195
+ if (textContent.length < MIN_LINK_DENSITY_TEXT_LENGTH) return false;
2196
+ const links = element.querySelectorAll("a");
2197
+ if (links.length < 3) return false;
2198
+ const textLength = textContent.length;
2199
+ let linkTextLength = 0;
2200
+ element.querySelectorAll("a").forEach((link) => {
2201
+ if (link.closest("a") === link) {
2202
+ linkTextLength += link.textContent?.length || 0;
2203
+ }
2204
+ });
2205
+ if (textLength === 0) return false;
2206
+ return linkTextLength / textLength > threshold;
2207
+ }
2208
+ // --- Markdown Postprocessing ---
2209
+ postprocessMarkdown(markdown, options) {
2210
+ let processed = markdown;
2211
+ processed = processed.replace(/^(\s*\n)?(#{1,6}\s.*)$/gm, "\n\n$2\n\n");
2212
+ processed = processed.replace(/^(\s*\n)?(([\*\-+>]|\d+\.)\s)/gm, (_match, _p1, p2) => `
2213
+
2214
+ ${p2}`);
2215
+ processed = processed.replace(
2216
+ /(\n([\*\-+]|\d+\.)\s(?:(?!\n\n|\n {2,}|\n\t)[\s\S])*?)\n(?=([\*\-+]|\d+\.)\s)/g,
2217
+ "$1"
2218
+ );
2219
+ processed = processed.replace(/\[\]\([^)]*\)/g, "");
2220
+ processed = processed.replace(/!\[\]\([^)]*\)/g, "");
2221
+ processed = processed.replace(/(!?\[[^\]]*\]\()(\/\/)/g, "$1https://");
2222
+ const maxNewlines = "\n".repeat(POSTPROCESSING_MAX_CONSECUTIVE_NEWLINES + 1);
2223
+ const newlineRegex = new RegExp(`${maxNewlines}+`, "g");
2224
+ processed = processed.replace(newlineRegex, "\n".repeat(POSTPROCESSING_MAX_CONSECUTIVE_NEWLINES));
2225
+ processed = processed.replace(/^[ \t]+|[ \t]+$/gm, "");
2226
+ processed = processed.replace(/^(\s*\n)?(```(.*)\n[\s\S]*?\n```)(\s*\n)?/gm, "\n\n$2\n\n");
2227
+ processed = processed.replace(/^(.{30,})$(\n\1)+/gm, "$1");
2228
+ processed = processed.replace(/(\n---\n)(\S)/g, "$1\n$2");
2229
+ if (options.maxContentLength && processed.length > options.maxContentLength) {
2230
+ const truncatedPoint = processed.lastIndexOf(".", options.maxContentLength - 15);
2231
+ const sliceEnd = truncatedPoint > options.maxContentLength / 2 ? truncatedPoint + 1 : options.maxContentLength;
2232
+ processed = processed.slice(0, sliceEnd) + "... (truncated)";
2233
+ }
2234
+ return processed.trim();
2235
+ }
2236
+ };
2237
+
2238
+ // src/errors.ts
2239
+ init_cjs_shims();
2240
+ var FetchError = class _FetchError extends Error {
2241
+ /** A specific error code (e.g., ERR_NAVIGATION_TIMEOUT, ERR_HTTP_ERROR). */
2242
+ code;
2243
+ /** The original error object, if available. */
2244
+ originalError;
2245
+ /** HTTP status code, if relevant. */
2246
+ statusCode;
2247
+ /**
2248
+ * Creates an instance of FetchError.
2249
+ * @param message The error message.
2250
+ * @param code Optional error code string.
2251
+ * @param originalError Optional original error.
2252
+ * @param statusCode Optional HTTP status code.
2253
+ */
2254
+ constructor(message, code, originalError, statusCode) {
2255
+ super(message);
2256
+ this.name = "FetchError";
2257
+ this.code = code;
2258
+ this.originalError = originalError;
2259
+ this.statusCode = statusCode;
2260
+ if (Error.captureStackTrace) {
2261
+ Error.captureStackTrace(this, _FetchError);
2262
+ }
2263
+ }
2264
+ };
2265
+
2266
+ // src/FetchEngine.ts
2267
+ var FetchEngineHttpError = class extends FetchError {
2268
+ constructor(message, statusCode) {
2269
+ super(message, "ERR_HTTP_ERROR", void 0, statusCode);
2270
+ this.statusCode = statusCode;
2271
+ this.name = "FetchEngineHttpError";
2272
+ }
2273
+ };
2274
+ var FetchEngine = class _FetchEngine {
2275
+ options;
2276
+ static DEFAULT_OPTIONS = {
2277
+ markdown: false
2278
+ };
2279
+ /**
2280
+ * Creates an instance of FetchEngine.
2281
+ * @param options Configuration options for the FetchEngine.
2282
+ */
2283
+ constructor(options = {}) {
2284
+ this.options = { ..._FetchEngine.DEFAULT_OPTIONS, ...options };
2285
+ }
2286
+ /**
2287
+ * Fetches HTML or converts to Markdown from the specified URL.
2288
+ *
2289
+ * @param url The URL to fetch.
2290
+ * @returns A Promise resolving to an HTMLFetchResult object.
2291
+ * @throws {FetchEngineHttpError} If the HTTP response status is not ok (e.g., 404, 500).
2292
+ * @throws {Error} If the content type is not HTML or for other network errors.
2293
+ */
2294
+ async fetchHTML(url, options) {
2295
+ const effectiveOptions = { ...this.options, ...options };
2296
+ let response;
2297
+ try {
2298
+ response = await fetch(url, {
2299
+ redirect: "follow",
2300
+ headers: {
2301
+ // Standard browser-like headers
2302
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
2303
+ Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
2304
+ "Accept-Language": "en-US,en;q=0.9"
2305
+ }
2306
+ });
2307
+ if (!response.ok) {
2308
+ throw new FetchEngineHttpError(`HTTP error! status: ${response.status}`, response.status);
2309
+ }
2310
+ const contentTypeHeader = response.headers.get("content-type");
2311
+ if (!contentTypeHeader || !contentTypeHeader.includes("text/html")) {
2312
+ throw new FetchError("Content-Type is not text/html", "ERR_NON_HTML_CONTENT");
2313
+ }
2314
+ const html = await response.text();
2315
+ const titleMatch = html.match(/<title[^>]*>([^<]+)<\/title>/i);
2316
+ const title = titleMatch ? titleMatch[1].trim() : null;
2317
+ let finalContent = html;
2318
+ let finalContentType = "html";
2319
+ if (effectiveOptions.markdown) {
2320
+ try {
2321
+ const converter = new MarkdownConverter();
2322
+ finalContent = converter.convert(html);
2323
+ finalContentType = "markdown";
2324
+ } catch (conversionError) {
2325
+ console.error(`Markdown conversion failed for ${url} (FetchEngine):`, conversionError);
2326
+ }
2327
+ }
2328
+ return {
2329
+ content: finalContent,
2330
+ contentType: finalContentType,
2331
+ title,
2332
+ url: response.url,
2333
+ // Use the final URL after redirects
2334
+ isFromCache: false,
2335
+ statusCode: response.status,
2336
+ error: void 0
2337
+ };
2338
+ } catch (error) {
2339
+ if (error instanceof FetchEngineHttpError || error instanceof FetchError && error.code === "ERR_NON_HTML_CONTENT") {
2340
+ throw error;
2341
+ }
2342
+ const message = error instanceof Error ? error.message : "Unknown fetch error";
2343
+ throw new FetchError(`Fetch failed: ${message}`, "ERR_FETCH_FAILED", error instanceof Error ? error : void 0);
2344
+ }
2345
+ }
2346
+ /**
2347
+ * Cleans up resources used by the engine.
2348
+ * For FetchEngine, this is a no-op as it doesn't manage persistent resources.
2349
+ * @returns A Promise that resolves when cleanup is complete.
2350
+ */
2351
+ async cleanup() {
2352
+ return Promise.resolve();
2353
+ }
2354
+ /**
2355
+ * Retrieves metrics for the engine.
2356
+ * FetchEngine does not manage browsers, so it returns an empty array.
2357
+ * @returns An empty array.
2358
+ */
2359
+ getMetrics() {
2360
+ return [];
2361
+ }
2362
+ };
2363
+
2364
+ // src/PlaywrightEngine.ts
2365
+ init_cjs_shims();
2366
+
2367
+ // src/browser/PlaywrightBrowserPool.ts
2368
+ init_cjs_shims();
2369
+ var import_playwright = require("playwright");
2370
+ var import_user_agents = __toESM(require("user-agents"), 1);
2371
+ var import_uuid = require("uuid");
2372
+ var import_p_queue = __toESM(require("p-queue"), 1);
2373
+ var import_playwright_extra = require("playwright-extra");
2374
+ var chromiumWithExtras;
2375
+ var StealthPluginInstance;
2376
+ async function loadDependencies() {
2377
+ if (!chromiumWithExtras) {
2378
+ chromiumWithExtras = (0, import_playwright_extra.addExtra)(import_playwright.chromium);
2379
+ const StealthPluginModule = await Promise.resolve().then(() => __toESM(require_puppeteer_extra_plugin_stealth(), 1));
2380
+ const stealthPluginFactory = typeof StealthPluginModule.default === "function" ? StealthPluginModule.default : StealthPluginModule;
2381
+ if (typeof stealthPluginFactory !== "function") {
2382
+ throw new Error("puppeteer-extra-plugin-stealth export is not a function or module structure is unexpected.");
2383
+ }
2384
+ StealthPluginInstance = stealthPluginFactory();
2385
+ chromiumWithExtras.use(StealthPluginInstance);
2386
+ }
2387
+ }
2388
+ var PlaywrightBrowserPool = class _PlaywrightBrowserPool {
2389
+ pool = /* @__PURE__ */ new Set();
2390
+ maxBrowsers;
2391
+ maxPagesPerContext;
2392
+ maxBrowserAge;
2393
+ healthCheckInterval;
2394
+ healthCheckTimer = null;
2395
+ maxIdleTime;
2396
+ isCleaningUp = false;
2397
+ useHeadedMode;
2398
+ blockedDomains;
2399
+ blockedResourceTypes;
2400
+ proxyConfig;
2401
+ static DEFAULT_BLOCKED_DOMAINS = [
2402
+ "doubleclick.net",
2403
+ "google-analytics.com",
2404
+ "googletagmanager.com",
2405
+ "googlesyndication.com",
2406
+ "googleadservices.com",
2407
+ "adservice.google.com",
2408
+ "facebook.net",
2409
+ "fbcdn.net",
2410
+ "connect.facebook.net",
2411
+ "ads-twitter.com",
2412
+ "platform.twitter.com",
2413
+ "analytics.tiktok.com",
2414
+ "ads.tiktok.com",
2415
+ "amazon-adsystem.com",
2416
+ "adnxs.com",
2417
+ "criteo.com",
2418
+ "scorecardresearch.com",
2419
+ "quantserve.com",
2420
+ "rubiconproject.com",
2421
+ "pubmatic.com",
2422
+ "taboola.com",
2423
+ "outbrain.com"
2424
+ ];
2425
+ static DEFAULT_BLOCKED_RESOURCE_TYPES = ["image", "font", "media", "websocket"];
2426
+ acquireQueue = new import_p_queue.default({ concurrency: 1 });
2427
+ constructor(config = {}) {
2428
+ this.maxBrowsers = config.maxBrowsers ?? 2;
2429
+ this.maxPagesPerContext = config.maxPagesPerContext ?? 6;
2430
+ this.maxBrowserAge = config.maxBrowserAge ?? 20 * 60 * 1e3;
2431
+ this.healthCheckInterval = config.healthCheckInterval ?? 60 * 1e3;
2432
+ this.useHeadedMode = config.useHeadedMode ?? false;
2433
+ this.maxIdleTime = config.maxIdleTime ?? 5 * 60 * 1e3;
2434
+ this.blockedDomains = config.blockedDomains && config.blockedDomains.length > 0 ? config.blockedDomains : _PlaywrightBrowserPool.DEFAULT_BLOCKED_DOMAINS;
2435
+ this.blockedResourceTypes = config.blockedResourceTypes && config.blockedResourceTypes.length > 0 ? config.blockedResourceTypes : _PlaywrightBrowserPool.DEFAULT_BLOCKED_RESOURCE_TYPES;
2436
+ this.proxyConfig = config.proxy;
2437
+ }
2438
+ async initialize() {
2439
+ await loadDependencies();
2440
+ if (this.isCleaningUp) return;
2441
+ await this.ensureMinimumInstances();
2442
+ this.scheduleHealthCheck();
2443
+ }
2444
+ scheduleHealthCheck() {
2445
+ if (this.isCleaningUp) return;
2446
+ if (this.healthCheckTimer) {
2447
+ clearTimeout(this.healthCheckTimer);
2448
+ }
2449
+ if (this.healthCheckInterval > 0) {
2450
+ this.healthCheckTimer = setTimeout(() => {
2451
+ this.healthCheck().catch((_err) => {
2452
+ });
2453
+ }, this.healthCheckInterval);
2454
+ }
2455
+ }
2456
+ async ensureMinimumInstances() {
2457
+ if (this.isCleaningUp) return;
2458
+ while (this.pool.size < this.maxBrowsers) {
2459
+ try {
2460
+ await this.createBrowserInstance();
2461
+ } catch (error) {
2462
+ break;
2463
+ }
2464
+ }
2465
+ }
2466
+ async createBrowserInstance() {
2467
+ await loadDependencies();
2468
+ const id = (0, import_uuid.v4)();
2469
+ const launchOptions = {
2470
+ headless: !this.useHeadedMode,
2471
+ args: [
2472
+ "--no-sandbox",
2473
+ "--disable-setuid-sandbox",
2474
+ "--disable-dev-shm-usage",
2475
+ "--disable-accelerated-2d-canvas",
2476
+ "--no-first-run",
2477
+ "--no-zygote",
2478
+ "--disable-gpu",
2479
+ "--mute-audio",
2480
+ "--disable-background-networking"
2481
+ ],
2482
+ proxy: this.proxyConfig
2483
+ };
2484
+ const browser = await chromiumWithExtras.launch(launchOptions);
2485
+ const context = await browser.newContext({
2486
+ userAgent: new import_user_agents.default().toString(),
2487
+ viewport: {
2488
+ width: 1280 + Math.floor(Math.random() * 120),
2489
+ height: 720 + Math.floor(Math.random() * 80)
2490
+ },
2491
+ javaScriptEnabled: true,
2492
+ ignoreHTTPSErrors: true
2493
+ });
2494
+ await context.route("**/*", async (route) => {
2495
+ const request = route.request();
2496
+ const url = request.url();
2497
+ const resourceType = request.resourceType();
2498
+ try {
2499
+ const hostname = new URL(url).hostname.toLowerCase();
2500
+ if (this.blockedDomains.some((domain) => hostname.includes(domain)) || this.blockedResourceTypes.includes(resourceType)) {
2501
+ await route.abort("aborted");
2502
+ } else {
2503
+ await route.continue();
2504
+ }
2505
+ } catch (_e) {
2506
+ await route.continue();
2507
+ }
2508
+ });
2509
+ const now = /* @__PURE__ */ new Date();
2510
+ const metrics = {
2511
+ id,
2512
+ pagesCreated: 0,
2513
+ activePages: 0,
2514
+ lastUsed: now,
2515
+ errors: 0,
2516
+ createdAt: now,
2517
+ isHealthy: true
2518
+ };
2519
+ const instance = {
2520
+ id,
2521
+ browser,
2522
+ context,
2523
+ pages: /* @__PURE__ */ new Set(),
2524
+ metrics,
2525
+ isHealthy: true,
2526
+ disconnectedHandler: () => {
2527
+ }
2528
+ };
2529
+ instance.disconnectedHandler = () => {
2530
+ if (instance.isHealthy) {
2531
+ instance.isHealthy = false;
2532
+ instance.metrics.isHealthy = false;
2533
+ this.healthCheck().catch((_err) => {
2534
+ });
2535
+ }
2536
+ };
2537
+ browser.on("disconnected", instance.disconnectedHandler);
2538
+ this.pool.add(instance);
2539
+ return instance;
2540
+ }
2541
+ acquirePage() {
2542
+ return this.acquireQueue.add(async () => {
2543
+ if (this.isCleaningUp) {
2544
+ throw new Error("Pool is shutting down.");
2545
+ }
2546
+ let bestInstance = null;
2547
+ for (const instance of this.pool) {
2548
+ if (instance.isHealthy && instance.pages.size < this.maxPagesPerContext) {
2549
+ if (!bestInstance || instance.pages.size < bestInstance.pages.size) {
2550
+ bestInstance = instance;
2551
+ }
2552
+ }
2553
+ }
2554
+ if (!bestInstance && this.pool.size < this.maxBrowsers) {
2555
+ try {
2556
+ bestInstance = await this.createBrowserInstance();
2557
+ } catch (error) {
2558
+ throw new Error(`Failed to create new browser instance for acquisition: ${error.message}`);
2559
+ }
2560
+ }
2561
+ if (!bestInstance) {
2562
+ await this.ensureMinimumInstances();
2563
+ for (const instance of this.pool) {
2564
+ if (instance.isHealthy && instance.pages.size < this.maxPagesPerContext) {
2565
+ if (!bestInstance || instance.pages.size < bestInstance.pages.size) {
2566
+ bestInstance = instance;
2567
+ }
2568
+ }
2569
+ }
2570
+ if (!bestInstance) {
2571
+ throw new Error("Failed to acquire Playwright page: No available or creatable browser instance.");
2572
+ }
2573
+ }
2574
+ try {
2575
+ const page = await bestInstance.context.newPage();
2576
+ bestInstance.pages.add(page);
2577
+ bestInstance.metrics.pagesCreated++;
2578
+ bestInstance.metrics.activePages = bestInstance.pages.size;
2579
+ bestInstance.metrics.lastUsed = /* @__PURE__ */ new Date();
2580
+ page.on("close", () => {
2581
+ bestInstance.pages.delete(page);
2582
+ bestInstance.metrics.activePages = bestInstance.pages.size;
2583
+ bestInstance.metrics.lastUsed = /* @__PURE__ */ new Date();
2584
+ });
2585
+ page.on("crash", () => {
2586
+ bestInstance.metrics.errors++;
2587
+ bestInstance.pages.delete(page);
2588
+ bestInstance.isHealthy = false;
2589
+ bestInstance.metrics.isHealthy = false;
2590
+ this.healthCheck().catch((_err) => {
2591
+ });
2592
+ });
2593
+ return page;
2594
+ } catch (error) {
2595
+ bestInstance.metrics.errors++;
2596
+ bestInstance.isHealthy = false;
2597
+ bestInstance.metrics.isHealthy = false;
2598
+ this.healthCheck().catch((_err) => {
2599
+ });
2600
+ throw new Error(`Failed to create new page: ${error.message}`);
2601
+ }
2602
+ });
2603
+ }
2604
+ async healthCheck() {
2605
+ if (this.isCleaningUp) return;
2606
+ const now = /* @__PURE__ */ new Date();
2607
+ const checks = [];
2608
+ for (const instance of this.pool) {
2609
+ checks.push(
2610
+ (async () => {
2611
+ if (!instance.isHealthy) {
2612
+ return;
2613
+ }
2614
+ let shouldRemove = false;
2615
+ let reason = "unknown";
2616
+ if (!instance.browser.isConnected()) {
2617
+ shouldRemove = true;
2618
+ reason = "browser disconnected";
2619
+ }
2620
+ if (!shouldRemove && this.maxBrowserAge > 0 && now.getTime() - instance.metrics.createdAt.getTime() > this.maxBrowserAge) {
2621
+ shouldRemove = true;
2622
+ reason = "max age reached";
2623
+ }
2624
+ if (!shouldRemove && this.pool.size > 1 && // Only remove idle if pool has more than 1
2625
+ instance.pages.size === 0 && this.maxIdleTime > 0 && now.getTime() - instance.metrics.lastUsed.getTime() > this.maxIdleTime) {
2626
+ shouldRemove = true;
2627
+ reason = "idle timeout";
2628
+ }
2629
+ if (shouldRemove) {
2630
+ instance.isHealthy = false;
2631
+ instance.metrics.isHealthy = false;
2632
+ await this.closeAndRemoveInstance(instance, reason);
2633
+ } else {
2634
+ instance.isHealthy = true;
2635
+ instance.metrics.isHealthy = true;
2636
+ }
2637
+ })().catch((_err) => {
2638
+ })
2639
+ );
2640
+ }
2641
+ try {
2642
+ await Promise.allSettled(checks);
2643
+ } finally {
2644
+ await this.ensureMinimumInstances();
2645
+ this.scheduleHealthCheck();
2646
+ }
2647
+ }
2648
+ async closeAndRemoveInstance(instance, _reason) {
2649
+ const removed = this.pool.delete(instance);
2650
+ if (!removed) return;
2651
+ instance.browser.off("disconnected", instance.disconnectedHandler);
2652
+ try {
2653
+ await instance.context.close();
2654
+ } catch (_error) {
2655
+ }
2656
+ try {
2657
+ await instance.browser.close();
2658
+ } catch (_error) {
2659
+ }
2660
+ }
2661
+ async releasePage(page) {
2662
+ if (!page || page.isClosed()) return;
2663
+ let ownerInstance;
2664
+ for (const instance of this.pool) {
2665
+ if (instance.pages.has(page)) {
2666
+ ownerInstance = instance;
2667
+ break;
2668
+ }
2669
+ }
2670
+ try {
2671
+ await page.close();
2672
+ if (ownerInstance) {
2673
+ ownerInstance.pages.delete(page);
2674
+ ownerInstance.metrics.activePages = ownerInstance.pages.size;
2675
+ ownerInstance.metrics.lastUsed = /* @__PURE__ */ new Date();
2676
+ }
2677
+ } catch (error) {
2678
+ if (ownerInstance) {
2679
+ ownerInstance.isHealthy = false;
2680
+ ownerInstance.metrics.isHealthy = false;
2681
+ ownerInstance.metrics.errors++;
2682
+ ownerInstance.pages.delete(page);
2683
+ ownerInstance.metrics.activePages = ownerInstance.pages.size;
2684
+ }
2685
+ }
2686
+ }
2687
+ async cleanup() {
2688
+ if (this.isCleaningUp) return;
2689
+ this.isCleaningUp = true;
2690
+ if (this.healthCheckTimer) {
2691
+ clearTimeout(this.healthCheckTimer);
2692
+ this.healthCheckTimer = null;
2693
+ }
2694
+ this.acquireQueue.clear();
2695
+ await this.acquireQueue.onIdle();
2696
+ const closePromises = [...this.pool].map((instance) => this.closeAndRemoveInstance(instance, "cleanup"));
2697
+ this.pool.clear();
2698
+ await Promise.allSettled(closePromises);
2699
+ this.isCleaningUp = false;
2700
+ }
2701
+ getMetrics() {
2702
+ return [...this.pool].map((instance) => ({
2703
+ ...instance.metrics,
2704
+ activePages: instance.pages.size,
2705
+ isHealthy: instance.isHealthy
2706
+ }));
2707
+ }
2708
+ };
2709
+
2710
+ // src/PlaywrightEngine.ts
2711
+ var import_p_queue2 = __toESM(require("p-queue"), 1);
2712
+ var import_axios = __toESM(require("axios"), 1);
2713
+ function delay(time) {
2714
+ return new Promise((resolve) => setTimeout(resolve, time));
2715
+ }
2716
+ var PlaywrightEngine = class _PlaywrightEngine {
2717
+ browserPool = null;
2718
+ queue;
2719
+ cache = /* @__PURE__ */ new Map();
2720
+ config;
2721
+ // Browser pooling safety flags
2722
+ initializingBrowserPool = false;
2723
+ isUsingHeadedMode = false;
2724
+ // Tracks current pool mode
2725
+ headedFallbackSites = /* @__PURE__ */ new Set();
2726
+ // Stores domains marked for headed mode
2727
+ // Default configuration - Ensure all required fields are present
2728
+ static DEFAULT_CONFIG = {
2729
+ concurrentPages: 3,
2730
+ maxRetries: 3,
2731
+ retryDelay: 5e3,
2732
+ cacheTTL: 15 * 60 * 1e3,
2733
+ useHttpFallback: true,
2734
+ useHeadedModeFallback: false,
2735
+ defaultFastMode: true,
2736
+ simulateHumanBehavior: true,
2737
+ maxBrowsers: 2,
2738
+ maxPagesPerContext: 6,
2739
+ maxBrowserAge: 20 * 60 * 1e3,
2740
+ healthCheckInterval: 60 * 1e3,
2741
+ poolBlockedDomains: [],
2742
+ poolBlockedResourceTypes: [],
2743
+ proxy: void 0,
2744
+ useHeadedMode: false,
2745
+ // ADDED default
2746
+ markdown: true
2747
+ };
2748
+ /**
2749
+ * Creates an instance of PlaywrightEngine.
2750
+ *
2751
+ * @param config Configuration options for the engine and its browser pool.
2752
+ * See `PlaywrightEngineConfig` for details.
2753
+ */
2754
+ constructor(config = {}) {
2755
+ this.config = { ..._PlaywrightEngine.DEFAULT_CONFIG, ...config };
2756
+ this.queue = new import_p_queue2.default({ concurrency: this.config.concurrentPages });
2757
+ }
2758
+ /**
2759
+ * Initialize the browser pool with improved error handling and mode switching.
2760
+ */
2761
+ async initializeBrowserPool(useHeadedMode = false) {
2762
+ if (this.browserPool && this.isUsingHeadedMode === useHeadedMode) {
2763
+ return;
2764
+ }
2765
+ if (this.initializingBrowserPool) {
2766
+ while (this.initializingBrowserPool) {
2767
+ await delay(100);
2768
+ }
2769
+ if (this.browserPool && this.isUsingHeadedMode === useHeadedMode) {
2770
+ return;
2771
+ }
2772
+ }
2773
+ this.initializingBrowserPool = true;
2774
+ try {
2775
+ if (this.browserPool && this.isUsingHeadedMode !== useHeadedMode) {
2776
+ await this.browserPool.cleanup();
2777
+ this.browserPool = null;
2778
+ }
2779
+ this.isUsingHeadedMode = useHeadedMode;
2780
+ this.browserPool = new PlaywrightBrowserPool({
2781
+ maxBrowsers: this.config.maxBrowsers,
2782
+ maxPagesPerContext: this.config.maxPagesPerContext,
2783
+ maxBrowserAge: this.config.maxBrowserAge,
2784
+ healthCheckInterval: this.config.healthCheckInterval,
2785
+ useHeadedMode,
2786
+ blockedDomains: this.config.poolBlockedDomains,
2787
+ blockedResourceTypes: this.config.poolBlockedResourceTypes,
2788
+ proxy: this.config.proxy
2789
+ });
2790
+ await this.browserPool.initialize();
2791
+ } catch (error) {
2792
+ this.browserPool = null;
2793
+ this.isUsingHeadedMode = false;
2794
+ throw error;
2795
+ } finally {
2796
+ this.initializingBrowserPool = false;
2797
+ }
2798
+ }
2799
+ /**
2800
+ * Fallback method using simple HTTP requests via Axios.
2801
+ * Ensures return type matches HTMLFetchResult.
2802
+ */
2803
+ async fetchHTMLWithHttpFallback(url) {
2804
+ try {
2805
+ const response = await import_axios.default.get(url, {
2806
+ headers: {
2807
+ // Use more standard browser-like headers
2808
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
2809
+ Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
2810
+ "Accept-Language": "en-US,en;q=0.9",
2811
+ "Accept-Encoding": "gzip, deflate, br",
2812
+ // Allow compression
2813
+ Referer: "https://www.google.com/",
2814
+ // Common referer
2815
+ "Upgrade-Insecure-Requests": "1",
2816
+ "Sec-Ch-Ua": '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"',
2817
+ "Sec-Ch-Ua-Mobile": "?0",
2818
+ "Sec-Ch-Ua-Platform": '"Windows"',
2819
+ "Sec-Fetch-Dest": "document",
2820
+ "Sec-Fetch-Mode": "navigate",
2821
+ "Sec-Fetch-Site": "cross-site",
2822
+ "Sec-Fetch-User": "?1",
2823
+ Connection: "keep-alive"
2824
+ // Keep connection open
2825
+ // Avoid Cache-Control/Pragma unless specifically needed
2826
+ },
2827
+ maxRedirects: 5,
2828
+ timeout: 3e4,
2829
+ responseType: "text",
2830
+ // Decompress response automatically
2831
+ decompress: true
2832
+ });
2833
+ const titleMatch = response.data.match(/<title[^>]*>([^<]+)<\/title>/i);
2834
+ let title = titleMatch ? titleMatch[1].trim() : "";
2835
+ if (!title && /<html>([^<]+)<\/html>/.test(response.data)) {
2836
+ title = response.data.replace(/<\/?html>/g, "").trim();
2837
+ }
2838
+ const lowerHtml = response.data.toLowerCase();
2839
+ const isChallengeOrBot = /cloudflare|checking your browser|please wait|verification|captcha|attention required/i.test(lowerHtml);
2840
+ if (isChallengeOrBot) {
2841
+ throw new FetchError("Received challenge page via HTTP fallback", "ERR_CHALLENGE_PAGE");
2842
+ }
2843
+ const originalHtml = response.data;
2844
+ let finalContent = originalHtml;
2845
+ let finalContentType = "html";
2846
+ if (this.config.markdown) {
2847
+ try {
2848
+ const converter = new MarkdownConverter();
2849
+ finalContent = converter.convert(originalHtml);
2850
+ finalContentType = "markdown";
2851
+ } catch (conversionError) {
2852
+ console.error(`Markdown conversion failed for ${url} (HTTP fallback):`, conversionError);
2853
+ }
2854
+ }
2855
+ return {
2856
+ content: finalContent,
2857
+ contentType: finalContentType,
2858
+ title,
2859
+ // title is extracted from original HTML
2860
+ url: response.request?.res?.responseUrl || response.config.url || url,
2861
+ isFromCache: false,
2862
+ statusCode: response.status,
2863
+ error: void 0
2864
+ };
2865
+ } catch (error) {
2866
+ if (!(error instanceof FetchError)) {
2867
+ throw new FetchError(`HTTP fallback failed: ${error.message}`, "ERR_HTTP_FALLBACK_FAILED", error);
2868
+ }
2869
+ throw error;
2870
+ }
2871
+ }
2872
+ checkCache(url) {
2873
+ const cached = this.cache.get(url);
2874
+ if (cached && Date.now() - cached.timestamp < this.config.cacheTTL) {
2875
+ return cached.result;
2876
+ }
2877
+ if (cached) {
2878
+ this.cache.delete(url);
2879
+ }
2880
+ return null;
2881
+ }
2882
+ /**
2883
+ * Safely check if a page is still usable and connected.
2884
+ */
2885
+ async isPageValid(page) {
2886
+ if (!page || page.isClosed()) return false;
2887
+ try {
2888
+ if (!page.context().browser()?.isConnected()) return false;
2889
+ await page.evaluate("1 + 1", { timeout: 1e3 });
2890
+ return true;
2891
+ } catch (error) {
2892
+ return false;
2893
+ }
2894
+ }
2895
+ /**
2896
+ * Simulate human-like interactions on the page.
2897
+ */
2898
+ async simulateHumanBehavior(page) {
2899
+ if (!await this.isPageValid(page)) return;
2900
+ try {
2901
+ const viewport = page.viewportSize();
2902
+ if (!viewport) return;
2903
+ await page.mouse.move(Math.random() * viewport.width, Math.random() * viewport.height / 3, { steps: 5 });
2904
+ await delay(150 + Math.random() * 200);
2905
+ await page.mouse.move(
2906
+ Math.random() * viewport.width,
2907
+ viewport.height / 2 + Math.random() * viewport.height / 2,
2908
+ { steps: 10 }
2909
+ );
2910
+ await delay(200 + Math.random() * 300);
2911
+ await page.evaluate(() => {
2912
+ window.scrollBy({
2913
+ top: window.innerHeight * (0.3 + Math.random() * 0.4),
2914
+ behavior: "smooth"
2915
+ });
2916
+ });
2917
+ await delay(400 + Math.random() * 600);
2918
+ await page.evaluate(() => {
2919
+ window.scrollBy({
2920
+ top: window.innerHeight * (0.2 + Math.random() * 0.3),
2921
+ behavior: "smooth"
2922
+ });
2923
+ });
2924
+ await delay(300 + Math.random() * 400);
2925
+ } catch (_error) {
2926
+ }
2927
+ }
2928
+ /**
2929
+ * Adds a result to the in-memory cache.
2930
+ */
2931
+ addToCache(url, result) {
2932
+ if (this.config.cacheTTL <= 0) return;
2933
+ const entry = {
2934
+ result: { ...result, isFromCache: true },
2935
+ // Mark as cached
2936
+ timestamp: Date.now()
2937
+ };
2938
+ this.cache.set(url, entry);
2939
+ }
2940
+ /**
2941
+ * Public method to fetch HTML. Delegates to the internal recursive fetch method.
2942
+ *
2943
+ * @param url The URL to fetch.
2944
+ * @param options Optional settings for this specific fetch operation.
2945
+ * @param options.fastMode Overrides the engine's `defaultFastMode` configuration for this request.
2946
+ * @returns A Promise resolving to an HTMLFetchResult object.
2947
+ * @throws {FetchError} If the fetch fails after all retries or encounters critical errors.
2948
+ */
2949
+ async fetchHTML(url, options = {}) {
2950
+ const fetchConfig = {
2951
+ ...this.config,
2952
+ markdown: options.markdown === void 0 ? this.config.markdown : options.markdown,
2953
+ fastMode: options.fastMode === void 0 ? this.config.defaultFastMode : options.fastMode
2954
+ };
2955
+ return this._fetchRecursive(url, fetchConfig, 0, 0);
2956
+ }
2957
+ /**
2958
+ * Internal recursive method to handle fetching with retries.
2959
+ *
2960
+ * @param url URL to fetch
2961
+ * @param currentConfig The merged configuration including markdown option
2962
+ * @param retryAttempt Current retry attempt number (starts at 0)
2963
+ * @param parentRetryCount Tracks retries related to pool initialization errors (starts at 0)
2964
+ * @returns Promise resolving to HTMLFetchResult
2965
+ */
2966
+ async _fetchRecursive(url, currentConfig, retryAttempt, parentRetryCount) {
2967
+ const useFastMode = currentConfig.fastMode;
2968
+ if (retryAttempt === 0 && parentRetryCount === 0) {
2969
+ const cachedResult = this.checkCache(url);
2970
+ if (cachedResult) {
2971
+ if (currentConfig.markdown && !cachedResult.content.startsWith("#") && !cachedResult.content.includes("\n\n---\n\n")) {
2972
+ try {
2973
+ const converter = new MarkdownConverter();
2974
+ cachedResult.content = converter.convert(cachedResult.content);
2975
+ } catch (e) {
2976
+ console.error("Failed to convert cached result to markdown", e);
2977
+ }
2978
+ } else if (!currentConfig.markdown && (cachedResult.content.startsWith("#") || cachedResult.content.includes("\n\n---\n\n"))) {
2979
+ console.warn("Cached result is Markdown, but HTML was requested. Re-fetching.");
2980
+ this.cache.delete(url);
2981
+ return this._fetchRecursive(url, currentConfig, 0, 0);
2982
+ }
2983
+ return cachedResult;
2984
+ }
2985
+ }
2986
+ try {
2987
+ if (currentConfig.useHttpFallback && retryAttempt === 0 && parentRetryCount === 0) {
2988
+ try {
2989
+ const httpResult = await this.fetchHTMLWithHttpFallback(url);
2990
+ if (this.config.cacheTTL > 0) {
2991
+ this.addToCache(url, httpResult);
2992
+ }
2993
+ return httpResult;
2994
+ } catch (httpError) {
2995
+ if (httpError instanceof FetchError && httpError.code === "ERR_CHALLENGE_PAGE") {
2996
+ } else {
2997
+ }
2998
+ }
2999
+ }
3000
+ const useHeadedMode = currentConfig.useHeadedModeFallback && (retryAttempt >= 2 || this.shouldUseHeadedMode(url)) || currentConfig.useHeadedMode;
3001
+ try {
3002
+ if (!this.browserPool || this.isUsingHeadedMode !== useHeadedMode) {
3003
+ await this.initializeBrowserPool(useHeadedMode);
3004
+ }
3005
+ } catch (initError) {
3006
+ if (parentRetryCount < 1) {
3007
+ await delay(currentConfig.retryDelay);
3008
+ return this._fetchRecursive(url, currentConfig, retryAttempt, parentRetryCount + 1);
3009
+ }
3010
+ throw new FetchError(
3011
+ `Pool init failed: ${initError.message}`,
3012
+ "ERR_POOL_INIT_FAILED",
3013
+ initError
3014
+ );
3015
+ }
3016
+ if (!this.browserPool) {
3017
+ throw new FetchError("Browser pool unavailable.", "ERR_POOL_UNAVAILABLE");
3018
+ }
3019
+ const result = await this.queue.add(
3020
+ () => this.fetchWithPlaywright(url, this.browserPool, useFastMode, currentConfig.markdown)
3021
+ );
3022
+ if (result && this.config.cacheTTL > 0) {
3023
+ this.addToCache(url, result);
3024
+ }
3025
+ if (!result) {
3026
+ throw new FetchError("Playwright fetch queued but no result.", "ERR_QUEUE_NO_RESULT");
3027
+ }
3028
+ return result;
3029
+ } catch (error) {
3030
+ if (useFastMode && retryAttempt === 0 && parentRetryCount === 0) {
3031
+ return this._fetchRecursive(url, { ...currentConfig, fastMode: false }, 0, parentRetryCount);
3032
+ }
3033
+ if (retryAttempt < currentConfig.maxRetries) {
3034
+ await delay(currentConfig.retryDelay);
3035
+ return this._fetchRecursive(url, currentConfig, retryAttempt + 1, parentRetryCount);
3036
+ }
3037
+ const finalError = error instanceof FetchError ? error : new FetchError(`Fetch failed: ${error.message}`, "ERR_FETCH_FAILED", error);
3038
+ throw new FetchError(
3039
+ `Fetch failed after ${currentConfig.maxRetries} retries: ${finalError.message}`,
3040
+ finalError.code,
3041
+ finalError.originalError || error
3042
+ );
3043
+ }
3044
+ }
3045
+ /**
3046
+ * Performs the actual page fetch using a Playwright page from the pool.
3047
+ * Ensures return type matches HTMLFetchResult.
3048
+ */
3049
+ async fetchWithPlaywright(url, pool, fastMode, convertToMarkdown) {
3050
+ let page = null;
3051
+ try {
3052
+ page = await pool.acquirePage();
3053
+ await this.applyBlockingRules(page, fastMode);
3054
+ let response = null;
3055
+ try {
3056
+ response = await page.goto(url, {
3057
+ waitUntil: "domcontentloaded",
3058
+ timeout: 6e4
3059
+ });
3060
+ } catch (navigationError) {
3061
+ throw new FetchError(
3062
+ `Playwright navigation failed: ${navigationError.message}`,
3063
+ "ERR_NAVIGATION",
3064
+ navigationError
3065
+ );
3066
+ }
3067
+ if (!response) {
3068
+ throw new FetchError("Playwright navigation did not return a response.", "ERR_NO_RESPONSE");
3069
+ }
3070
+ if (!response.ok()) {
3071
+ throw new FetchError(
3072
+ `HTTP error status received: ${response.status()}`,
3073
+ "ERR_HTTP_ERROR",
3074
+ void 0,
3075
+ response.status()
3076
+ );
3077
+ }
3078
+ const contentType = response.headers()["content-type"] || "";
3079
+ if (!contentType.includes("html")) {
3080
+ throw new FetchError(`Invalid content type received: ${contentType}`, "ERR_NON_HTML_CONTENT");
3081
+ }
3082
+ if (!fastMode && this.config.simulateHumanBehavior) {
3083
+ await this.simulateHumanBehavior(page);
3084
+ }
3085
+ const html = await page.content();
3086
+ const title = await page.title();
3087
+ const finalUrl = page.url();
3088
+ const status = response?.status();
3089
+ let finalContent = html;
3090
+ let finalContentType = "html";
3091
+ if (convertToMarkdown) {
3092
+ try {
3093
+ const converter = new MarkdownConverter();
3094
+ finalContent = converter.convert(html);
3095
+ finalContentType = "markdown";
3096
+ } catch (conversionError) {
3097
+ console.error(`Markdown conversion failed for ${url} (Playwright):`, conversionError);
3098
+ }
3099
+ }
3100
+ return {
3101
+ content: finalContent,
3102
+ contentType: finalContentType,
3103
+ title: title || null,
3104
+ url: finalUrl,
3105
+ isFromCache: false,
3106
+ statusCode: status,
3107
+ error: void 0
3108
+ };
3109
+ } finally {
3110
+ if (page) {
3111
+ await pool.releasePage(page);
3112
+ }
3113
+ }
3114
+ }
3115
+ async applyBlockingRules(page, fastMode) {
3116
+ const blockedResources = fastMode ? this.config.poolBlockedResourceTypes.concat(["image", "font", "stylesheet", "media"]) : this.config.poolBlockedResourceTypes;
3117
+ const blockedDomains = this.config.poolBlockedDomains;
3118
+ if (blockedResources.length > 0 || blockedDomains.length > 0) {
3119
+ try {
3120
+ await page.route("**/*", (route) => {
3121
+ const resourceType = route.request().resourceType();
3122
+ const requestUrl = route.request().url();
3123
+ if (blockedResources.includes(resourceType)) {
3124
+ return route.abort();
3125
+ }
3126
+ if (blockedDomains.some(
3127
+ (pattern) => new RegExp(pattern.replace(/\./g, "\\.").replace(/\*/g, ".*")).test(requestUrl)
3128
+ )) {
3129
+ return route.abort();
3130
+ }
3131
+ return route.continue();
3132
+ });
3133
+ } catch (_error) {
3134
+ }
3135
+ }
3136
+ }
3137
+ /**
3138
+ * Cleans up resources used by the engine, primarily closing browser instances in the pool.
3139
+ *
3140
+ * It is crucial to call this method when finished with the engine instance to release resources.
3141
+ * @returns A Promise that resolves when cleanup is complete.
3142
+ */
3143
+ async cleanup() {
3144
+ try {
3145
+ await this.queue.onIdle();
3146
+ this.queue.clear();
3147
+ if (this.browserPool) {
3148
+ await this.browserPool.cleanup();
3149
+ this.browserPool = null;
3150
+ }
3151
+ this.isUsingHeadedMode = false;
3152
+ } catch (_error) {
3153
+ }
3154
+ }
3155
+ /**
3156
+ * Retrieves metrics from the underlying browser pool.
3157
+ * @returns An array of BrowserMetrics objects, one for each active browser instance, or an empty array if the pool is not initialized.
3158
+ */
3159
+ getMetrics() {
3160
+ if (this.browserPool) {
3161
+ return this.browserPool.getMetrics();
3162
+ }
3163
+ return [];
3164
+ }
3165
+ // Helper to check if a specific domain is marked for headed mode
3166
+ shouldUseHeadedMode(url) {
3167
+ if (!this.config.useHeadedModeFallback) return false;
3168
+ try {
3169
+ const domain = new URL(url).hostname;
3170
+ return this.headedFallbackSites.has(domain);
3171
+ } catch {
3172
+ return false;
3173
+ }
3174
+ }
3175
+ };
3176
+
3177
+ // src/HybridEngine.ts
3178
+ init_cjs_shims();
3179
+ var HybridEngine = class {
3180
+ fetchEngine;
3181
+ playwrightEngine;
3182
+ config;
3183
+ // Store config for potential per-request PW overrides
3184
+ constructor(config = {}) {
3185
+ this.fetchEngine = new FetchEngine({ markdown: config.markdown });
3186
+ this.playwrightEngine = new PlaywrightEngine(config);
3187
+ this.config = config;
3188
+ }
3189
+ async fetchHTML(url, options = {}) {
3190
+ try {
3191
+ const fetchResult = await this.fetchEngine.fetchHTML(url);
3192
+ return fetchResult;
3193
+ } catch (fetchError) {
3194
+ console.warn(`FetchEngine failed for ${url}: ${fetchError.message}. Falling back to PlaywrightEngine.`);
3195
+ const playwrightOptions = {
3196
+ ...this.config,
3197
+ // Start with base config given to HybridEngine
3198
+ ...options
3199
+ // Override with per-request options
3200
+ };
3201
+ try {
3202
+ const playwrightResult = await this.playwrightEngine.fetchHTML(url, playwrightOptions);
3203
+ return playwrightResult;
3204
+ } catch (playwrightError) {
3205
+ console.error(`PlaywrightEngine fallback failed for ${url}: ${playwrightError.message}`);
3206
+ throw playwrightError;
3207
+ }
3208
+ }
3209
+ }
3210
+ /**
3211
+ * Delegates getMetrics to the PlaywrightEngine.
3212
+ */
3213
+ getMetrics() {
3214
+ return this.playwrightEngine.getMetrics();
3215
+ }
3216
+ /**
3217
+ * Calls cleanup on both underlying engines.
3218
+ */
3219
+ async cleanup() {
3220
+ await Promise.allSettled([
3221
+ this.fetchEngine.cleanup(),
3222
+ // Although a no-op, call for consistency
3223
+ this.playwrightEngine.cleanup()
3224
+ ]);
3225
+ }
3226
+ };
3227
+ // Annotate the CommonJS export names for ESM import in node:
3228
+ 0 && (module.exports = {
3229
+ FetchEngine,
3230
+ HybridEngine,
3231
+ PlaywrightEngine
3232
+ });
3233
+ /*! Bundled license information:
3234
+
3235
+ for-in/index.js:
3236
+ (*!
3237
+ * for-in <https://github.com/jonschlinkert/for-in>
3238
+ *
3239
+ * Copyright (c) 2014-2017, Jon Schlinkert.
3240
+ * Released under the MIT License.
3241
+ *)
3242
+
3243
+ for-own/index.js:
3244
+ (*!
3245
+ * for-own <https://github.com/jonschlinkert/for-own>
3246
+ *
3247
+ * Copyright (c) 2014-2017, Jon Schlinkert.
3248
+ * Released under the MIT License.
3249
+ *)
3250
+
3251
+ is-buffer/index.js:
3252
+ (*!
3253
+ * Determine if an object is a Buffer
3254
+ *
3255
+ * @author Feross Aboukhadijeh <https://feross.org>
3256
+ * @license MIT
3257
+ *)
3258
+
3259
+ merge-deep/index.js:
3260
+ (*!
3261
+ * merge-deep <https://github.com/jonschlinkert/merge-deep>
3262
+ *
3263
+ * Copyright (c) 2014-2015, Jon Schlinkert.
3264
+ * Licensed under the MIT License.
3265
+ *)
3266
+
3267
+ puppeteer-extra-plugin/dist/index.esm.js:
3268
+ (*!
3269
+ * puppeteer-extra-plugin v3.2.2 by berstend
3270
+ * https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin
3271
+ * @license MIT
3272
+ *)
3273
+ */
3274
+ //# sourceMappingURL=index.cjs.map