@firekid/scraper 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2251 @@
1
+ #!/usr/bin/env node
2
+ #!/usr/bin/env node
3
+
4
+ var __defProp = Object.defineProperty;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __esm = (fn, res) => function __init() {
7
+ return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
8
+ };
9
+ var __export = (target, all) => {
10
+ for (var name in all)
11
+ __defProp(target, name, { get: all[name], enumerable: true });
12
+ };
13
+
14
+ // node_modules/tsup/assets/esm_shims.js
15
+ import path from "path";
16
+ import { fileURLToPath } from "url";
17
+ var getFilename, getDirname, __dirname;
18
+ var init_esm_shims = __esm({
19
+ "node_modules/tsup/assets/esm_shims.js"() {
20
+ "use strict";
21
+ getFilename = () => fileURLToPath(import.meta.url);
22
+ getDirname = () => path.dirname(getFilename());
23
+ __dirname = /* @__PURE__ */ getDirname();
24
+ }
25
+ });
26
+
27
+ // src/ghost/seed.ts
28
+ import crypto from "crypto";
29
+ function random(array) {
30
+ return array[Math.floor(Math.random() * array.length)];
31
+ }
32
+ function randomRange(min, max) {
33
+ return Math.random() * (max - min) + min;
34
+ }
35
+ function getNewSeed() {
36
+ const resolution = random(resolutions);
37
+ return {
38
+ id: crypto.randomUUID(),
39
+ chromeVersion: random(chromeVersions),
40
+ screenWidth: resolution.width,
41
+ screenHeight: resolution.height,
42
+ language: random(languages),
43
+ timezone: random(timezones),
44
+ canvasNoise: randomRange(1e-4, 1e-3),
45
+ webglVendor: random(webglVendors),
46
+ webglRenderer: random(webglRenderers),
47
+ audioNoise: randomRange(1e-5, 1e-4),
48
+ fonts: random(fontSets)
49
+ };
50
+ }
51
+ function getSeedForSite(siteHost) {
52
+ if (seedCache.has(siteHost)) {
53
+ return seedCache.get(siteHost);
54
+ }
55
+ const seed = getNewSeed();
56
+ seedCache.set(siteHost, seed);
57
+ return seed;
58
+ }
59
+ var seedCache, chromeVersions, resolutions, languages, timezones, webglVendors, webglRenderers, fontSets;
60
+ var init_seed = __esm({
61
+ "src/ghost/seed.ts"() {
62
+ "use strict";
63
+ init_esm_shims();
64
+ seedCache = /* @__PURE__ */ new Map();
65
+ chromeVersions = [
66
+ "131.0.6778.85",
67
+ "131.0.6778.86",
68
+ "130.0.6723.116",
69
+ "129.0.6668.100"
70
+ ];
71
+ resolutions = [
72
+ { width: 1920, height: 1080 },
73
+ { width: 1366, height: 768 },
74
+ { width: 1536, height: 864 },
75
+ { width: 1440, height: 900 },
76
+ { width: 2560, height: 1440 }
77
+ ];
78
+ languages = ["en-US", "en-GB", "en", "es-ES", "fr-FR", "de-DE"];
79
+ timezones = ["America/New_York", "America/Los_Angeles", "Europe/London", "Europe/Paris"];
80
+ webglVendors = [
81
+ "Google Inc. (NVIDIA)",
82
+ "Google Inc. (Intel)",
83
+ "Google Inc. (AMD)",
84
+ "Google Inc. (Apple)"
85
+ ];
86
+ webglRenderers = [
87
+ "ANGLE (NVIDIA, NVIDIA GeForce RTX 3070 Direct3D11 vs_5_0 ps_5_0, D3D11)",
88
+ "ANGLE (Intel, Intel(R) UHD Graphics 630 Direct3D11 vs_5_0 ps_5_0, D3D11)",
89
+ "ANGLE (AMD, AMD Radeon RX 580 Direct3D11 vs_5_0 ps_5_0, D3D11)"
90
+ ];
91
+ fontSets = [
92
+ ["Arial", "Calibri", "Cambria", "Consolas", "Georgia", "Times New Roman", "Verdana"],
93
+ ["Arial", "Helvetica", "Georgia", "Courier New", "Times", "Comic Sans MS"],
94
+ ["Arial", "Tahoma", "Trebuchet MS", "Verdana", "Georgia", "Palatino Linotype"]
95
+ ];
96
+ }
97
+ });
98
+
99
+ // src/ghost/canvas.ts
100
+ async function applyCanvasSpoof(context, seed) {
101
+ await context.addInitScript((noise) => {
102
+ const originalGetImageData = CanvasRenderingContext2D.prototype.getImageData;
103
+ const originalToDataURL = HTMLCanvasElement.prototype.toDataURL;
104
+ const originalToBlob = HTMLCanvasElement.prototype.toBlob;
105
+ CanvasRenderingContext2D.prototype.getImageData = function(...args) {
106
+ const imageData = originalGetImageData.apply(this, args);
107
+ for (let i = 0; i < imageData.data.length; i += 4) {
108
+ imageData.data[i] += Math.floor(noise * 255 * (Math.random() - 0.5));
109
+ imageData.data[i + 1] += Math.floor(noise * 255 * (Math.random() - 0.5));
110
+ imageData.data[i + 2] += Math.floor(noise * 255 * (Math.random() - 0.5));
111
+ }
112
+ return imageData;
113
+ };
114
+ HTMLCanvasElement.prototype.toDataURL = function(...args) {
115
+ const context2 = this.getContext("2d");
116
+ if (context2) {
117
+ const imageData = context2.getImageData(0, 0, this.width, this.height);
118
+ context2.putImageData(imageData, 0, 0);
119
+ }
120
+ return originalToDataURL.apply(this, args);
121
+ };
122
+ HTMLCanvasElement.prototype.toBlob = function(...args) {
123
+ const context2 = this.getContext("2d");
124
+ if (context2) {
125
+ const imageData = context2.getImageData(0, 0, this.width, this.height);
126
+ context2.putImageData(imageData, 0, 0);
127
+ }
128
+ return originalToBlob.apply(this, args);
129
+ };
130
+ }, seed.canvasNoise);
131
+ }
132
+ var init_canvas = __esm({
133
+ "src/ghost/canvas.ts"() {
134
+ "use strict";
135
+ init_esm_shims();
136
+ }
137
+ });
138
+
139
+ // src/ghost/webgl.ts
140
+ async function applyWebGLSpoof(context, seed) {
141
+ await context.addInitScript((params) => {
142
+ const { vendor, renderer } = params;
143
+ const getParameterProxyHandler = {
144
+ apply(target, thisArg, args) {
145
+ const param = args[0];
146
+ if (param === 37445) {
147
+ return vendor;
148
+ }
149
+ if (param === 37446) {
150
+ return renderer;
151
+ }
152
+ return Reflect.apply(target, thisArg, args);
153
+ }
154
+ };
155
+ const getExtensionProxyHandler = {
156
+ apply(target, thisArg, args) {
157
+ const result = Reflect.apply(target, thisArg, args);
158
+ if (!result) {
159
+ return result;
160
+ }
161
+ if (args[0] === "WEBGL_debug_renderer_info") {
162
+ const getParameterProxy = new Proxy(result.getParameter, getParameterProxyHandler);
163
+ result.getParameter = getParameterProxy;
164
+ }
165
+ return result;
166
+ }
167
+ };
168
+ WebGLRenderingContext.prototype.getParameter = new Proxy(
169
+ WebGLRenderingContext.prototype.getParameter,
170
+ getParameterProxyHandler
171
+ );
172
+ WebGL2RenderingContext.prototype.getParameter = new Proxy(
173
+ WebGL2RenderingContext.prototype.getParameter,
174
+ getParameterProxyHandler
175
+ );
176
+ WebGLRenderingContext.prototype.getExtension = new Proxy(
177
+ WebGLRenderingContext.prototype.getExtension,
178
+ getExtensionProxyHandler
179
+ );
180
+ WebGL2RenderingContext.prototype.getExtension = new Proxy(
181
+ WebGL2RenderingContext.prototype.getExtension,
182
+ getExtensionProxyHandler
183
+ );
184
+ }, { vendor: seed.webglVendor, renderer: seed.webglRenderer });
185
+ }
186
+ var init_webgl = __esm({
187
+ "src/ghost/webgl.ts"() {
188
+ "use strict";
189
+ init_esm_shims();
190
+ }
191
+ });
192
+
193
+ // src/ghost/audio.ts
194
+ async function applyAudioSpoof(context, seed) {
195
+ await context.addInitScript((noise) => {
196
+ const context2 = window.AudioContext || window.webkitAudioContext;
197
+ if (context2) {
198
+ const originalCreateDynamicsCompressor = context2.prototype.createDynamicsCompressor;
199
+ const originalCreateOscillator = context2.prototype.createOscillator;
200
+ context2.prototype.createDynamicsCompressor = function() {
201
+ const compressor = originalCreateDynamicsCompressor.apply(this, arguments);
202
+ if (compressor.reduction) {
203
+ Object.defineProperty(compressor.reduction, "value", {
204
+ get() {
205
+ return this._value + noise * (Math.random() - 0.5);
206
+ },
207
+ set(v) {
208
+ this._value = v;
209
+ }
210
+ });
211
+ }
212
+ return compressor;
213
+ };
214
+ context2.prototype.createOscillator = function() {
215
+ const oscillator = originalCreateOscillator.apply(this, arguments);
216
+ const originalStart = oscillator.start;
217
+ oscillator.start = function() {
218
+ if (oscillator.frequency) {
219
+ oscillator.frequency.value += noise * (Math.random() - 0.5);
220
+ }
221
+ return originalStart.apply(this, arguments);
222
+ };
223
+ return oscillator;
224
+ };
225
+ }
226
+ }, seed.audioNoise);
227
+ }
228
+ var init_audio = __esm({
229
+ "src/ghost/audio.ts"() {
230
+ "use strict";
231
+ init_esm_shims();
232
+ }
233
+ });
234
+
235
+ // src/ghost/fonts.ts
236
+ async function applyFontSpoof(context, seed) {
237
+ await context.addInitScript((fonts) => {
238
+ const originalGetComputedStyle = window.getComputedStyle;
239
+ window.getComputedStyle = function(element, pseudoElt) {
240
+ const styles = originalGetComputedStyle.call(this, element, pseudoElt);
241
+ const originalGetPropertyValue = styles.getPropertyValue;
242
+ styles.getPropertyValue = function(property) {
243
+ if (property === "font-family") {
244
+ const value = originalGetPropertyValue.call(this, property);
245
+ const families = value.split(",").map((f) => f.trim());
246
+ const filtered = families.filter((family) => {
247
+ const cleanFamily = family.replace(/['"]/g, "");
248
+ return fonts.some((f) => cleanFamily.includes(f));
249
+ });
250
+ return filtered.length > 0 ? filtered.join(", ") : value;
251
+ }
252
+ return originalGetPropertyValue.call(this, property);
253
+ };
254
+ return styles;
255
+ };
256
+ }, seed.fonts);
257
+ }
258
+ var init_fonts = __esm({
259
+ "src/ghost/fonts.ts"() {
260
+ "use strict";
261
+ init_esm_shims();
262
+ }
263
+ });
264
+
265
+ // src/ghost/navigator.ts
266
+ async function applyNavigatorSpoof(context, seed) {
267
+ await context.addInitScript((seedData) => {
268
+ Object.defineProperty(navigator, "webdriver", {
269
+ get: () => void 0
270
+ });
271
+ Object.defineProperty(navigator, "plugins", {
272
+ get: () => [1, 2, 3, 4, 5]
273
+ });
274
+ Object.defineProperty(navigator, "languages", {
275
+ get: () => [seedData.language, "en"]
276
+ });
277
+ Object.defineProperty(navigator, "platform", {
278
+ get: () => "Win32"
279
+ });
280
+ Object.defineProperty(navigator, "hardwareConcurrency", {
281
+ get: () => 8
282
+ });
283
+ Object.defineProperty(navigator, "deviceMemory", {
284
+ get: () => 8
285
+ });
286
+ const originalQuery = window.navigator.permissions.query;
287
+ window.navigator.permissions.query = (parameters) => parameters.name === "notifications" ? Promise.resolve({ state: Notification.permission }) : originalQuery(parameters);
288
+ if (window.chrome) {
289
+ delete window.chrome.runtime;
290
+ }
291
+ Object.defineProperty(screen, "width", {
292
+ get: () => seedData.screenWidth
293
+ });
294
+ Object.defineProperty(screen, "height", {
295
+ get: () => seedData.screenHeight
296
+ });
297
+ Object.defineProperty(screen, "availWidth", {
298
+ get: () => seedData.screenWidth
299
+ });
300
+ Object.defineProperty(screen, "availHeight", {
301
+ get: () => seedData.screenHeight - 40
302
+ });
303
+ }, seed);
304
+ }
305
+ var init_navigator = __esm({
306
+ "src/ghost/navigator.ts"() {
307
+ "use strict";
308
+ init_esm_shims();
309
+ }
310
+ });
311
+
312
+ // src/ghost/consistency.ts
313
+ function validateConsistency(seed) {
314
+ if (!seed.id || !seed.chromeVersion) {
315
+ throw new Error("Invalid seed: missing required fields");
316
+ }
317
+ if (seed.screenWidth < 800 || seed.screenHeight < 600) {
318
+ throw new Error("Invalid seed: screen resolution too small");
319
+ }
320
+ if (seed.canvasNoise < 0 || seed.canvasNoise > 1) {
321
+ throw new Error("Invalid seed: canvas noise out of range");
322
+ }
323
+ if (seed.audioNoise < 0 || seed.audioNoise > 1) {
324
+ throw new Error("Invalid seed: audio noise out of range");
325
+ }
326
+ if (!seed.fonts || seed.fonts.length === 0) {
327
+ throw new Error("Invalid seed: no fonts specified");
328
+ }
329
+ }
330
+ var init_consistency = __esm({
331
+ "src/ghost/consistency.ts"() {
332
+ "use strict";
333
+ init_esm_shims();
334
+ }
335
+ });
336
+
337
+ // src/ghost/behavior.ts
338
+ var HumanBehavior;
339
+ var init_behavior = __esm({
340
+ "src/ghost/behavior.ts"() {
341
+ "use strict";
342
+ init_esm_shims();
343
+ HumanBehavior = class {
344
+ seed;
345
+ profile;
346
+ constructor(seed) {
347
+ this.seed = seed;
348
+ this.profile = this.generateProfile();
349
+ }
350
+ generateProfile() {
351
+ return {
352
+ typingSpeed: { min: 50, max: 150 },
353
+ mouseMovements: [],
354
+ scrollPatterns: [],
355
+ pauseDistribution: [],
356
+ clickTiming: []
357
+ };
358
+ }
359
+ async randomDelay(min = 500, max = 2e3) {
360
+ const delay = Math.random() * (max - min) + min;
361
+ await new Promise((r) => setTimeout(r, delay));
362
+ }
363
+ async humanClick(page, selector) {
364
+ const element = await page.locator(selector);
365
+ const box = await element.boundingBox();
366
+ if (box) {
367
+ const x = box.x + Math.random() * box.width;
368
+ const y = box.y + Math.random() * box.height;
369
+ await page.mouse.move(x, y, { steps: 10 });
370
+ await this.randomDelay(100, 300);
371
+ await page.mouse.click(x, y);
372
+ }
373
+ }
374
+ async humanType(page, selector, text2) {
375
+ await page.focus(selector);
376
+ for (const char of text2) {
377
+ await page.keyboard.type(char);
378
+ await this.randomDelay(
379
+ this.profile.typingSpeed.min,
380
+ this.profile.typingSpeed.max
381
+ );
382
+ }
383
+ }
384
+ async randomScroll(page) {
385
+ const scrolls = Math.floor(Math.random() * 3) + 1;
386
+ for (let i = 0; i < scrolls; i++) {
387
+ const scrollY = Math.random() * 500;
388
+ await page.evaluate((y) => window.scrollBy(0, y), scrollY);
389
+ await this.randomDelay(500, 1e3);
390
+ }
391
+ }
392
+ async randomMouseMovement(page) {
393
+ const x = Math.random() * 1920;
394
+ const y = Math.random() * 1080;
395
+ await page.mouse.move(x, y, { steps: 20 });
396
+ await this.randomDelay(200, 500);
397
+ }
398
+ getProfile() {
399
+ return this.profile;
400
+ }
401
+ };
402
+ }
403
+ });
404
+
405
+ // src/config.ts
406
+ import dotenv from "dotenv";
407
+ var config;
408
+ var init_config = __esm({
409
+ "src/config.ts"() {
410
+ "use strict";
411
+ init_esm_shims();
412
+ dotenv.config();
413
+ config = {
414
+ browser: {
415
+ headless: process.env.HEADLESS === "true",
416
+ timeout: parseInt(process.env.BROWSER_TIMEOUT || "30000", 10),
417
+ maxWorkers: parseInt(process.env.MAX_QUEUE_WORKERS || "5", 10)
418
+ },
419
+ cloudflare: {
420
+ bypass: process.env.CF_BYPASS || "auto",
421
+ turnstileSolver: process.env.TURNSTILE_SOLVER || "manual"
422
+ },
423
+ captcha: {
424
+ apiKey: process.env.CAPTCHA_API_KEY || ""
425
+ },
426
+ server: {
427
+ enabled: process.env.API_ENABLED === "true",
428
+ port: parseInt(process.env.API_PORT || "3000", 10),
429
+ apiKey: process.env.API_KEY || ""
430
+ },
431
+ proxy: {
432
+ enabled: process.env.PROXY_ENABLED === "true",
433
+ url: process.env.PROXY_URL || ""
434
+ },
435
+ storage: {
436
+ dataDir: process.env.DATA_DIR || "./data",
437
+ patternsDb: process.env.PATTERNS_DB || "./data/patterns.db",
438
+ sessionsDb: process.env.SESSIONS_DB || "./data/sessions.db"
439
+ },
440
+ logging: {
441
+ level: process.env.LOG_LEVEL || "info"
442
+ },
443
+ recording: {
444
+ autoHideAfterSolve: process.env.AUTO_HIDE_AFTER_SOLVE !== "false",
445
+ recordScreenshots: process.env.RECORD_SCREENSHOTS === "true"
446
+ },
447
+ rateLimit: {
448
+ enabled: process.env.RATE_LIMIT_ENABLED !== "false",
449
+ max: parseInt(process.env.RATE_LIMIT_MAX || "100", 10),
450
+ window: parseInt(process.env.RATE_LIMIT_WINDOW || "3600000", 10)
451
+ },
452
+ advanced: {
453
+ enableTelemetry: process.env.ENABLE_TELEMETRY === "true",
454
+ enableAnalytics: process.env.ENABLE_ANALYTICS === "true"
455
+ }
456
+ };
457
+ }
458
+ });
459
+
460
+ // src/logger/logger.ts
461
+ import winston from "winston";
462
+ function step(url, action, meta) {
463
+ logger.info(`[${url}] ${action}`, meta);
464
+ }
465
+ function highlight(url, data) {
466
+ logger.info(`[${url}] EXTRACTED:`, data);
467
+ }
468
+ var logger;
469
+ var init_logger = __esm({
470
+ "src/logger/logger.ts"() {
471
+ "use strict";
472
+ init_esm_shims();
473
+ init_config();
474
+ logger = winston.createLogger({
475
+ level: config.logging.level,
476
+ format: winston.format.combine(
477
+ winston.format.timestamp({ format: "YYYY-MM-DD HH:mm:ss" }),
478
+ winston.format.errors({ stack: true }),
479
+ winston.format.splat(),
480
+ winston.format.printf(({ level, message, timestamp, ...meta }) => {
481
+ let msg = `${timestamp} [${level.toUpperCase()}] ${message}`;
482
+ if (Object.keys(meta).length > 0) {
483
+ msg += ` ${JSON.stringify(meta)}`;
484
+ }
485
+ return msg;
486
+ })
487
+ ),
488
+ transports: [
489
+ new winston.transports.Console({
490
+ format: winston.format.combine(
491
+ winston.format.colorize(),
492
+ winston.format.printf(({ level, message, timestamp }) => {
493
+ return `${timestamp} ${level}: ${message}`;
494
+ })
495
+ )
496
+ }),
497
+ new winston.transports.File({
498
+ filename: "logs/error.log",
499
+ level: "error"
500
+ }),
501
+ new winston.transports.File({
502
+ filename: "logs/combined.log"
503
+ })
504
+ ]
505
+ });
506
+ }
507
+ });
508
+
509
+ // src/ghost/index.ts
510
+ async function applyGhost(context, options2 = {}) {
511
+ const seed = options2.fresh || !options2.siteHost ? getNewSeed() : getSeedForSite(options2.siteHost);
512
+ validateConsistency(seed);
513
+ logger.info(`[ghost] Applying identity seed: ${seed.id.slice(0, 8)}... | Chrome ${seed.chromeVersion} | ${seed.screenWidth}x${seed.screenHeight}`);
514
+ await applyCanvasSpoof(context, seed);
515
+ await applyWebGLSpoof(context, seed);
516
+ await applyAudioSpoof(context, seed);
517
+ await applyFontSpoof(context, seed);
518
+ await applyNavigatorSpoof(context, seed);
519
+ await context.setExtraHTTPHeaders({
520
+ "Accept-Language": `${seed.language},en;q=0.9`,
521
+ "sec-ch-ua": `"Chromium";v="${seed.chromeVersion.split(".")[0]}", "Google Chrome";v="${seed.chromeVersion.split(".")[0]}", "Not-A.Brand";v="99"`,
522
+ "sec-ch-ua-mobile": "?0",
523
+ "sec-ch-ua-platform": '"Windows"'
524
+ });
525
+ logger.info("[ghost] All fingerprint spoofs applied");
526
+ return new HumanBehavior(seed);
527
+ }
528
+ var init_ghost = __esm({
529
+ "src/ghost/index.ts"() {
530
+ "use strict";
531
+ init_esm_shims();
532
+ init_seed();
533
+ init_canvas();
534
+ init_webgl();
535
+ init_audio();
536
+ init_fonts();
537
+ init_navigator();
538
+ init_consistency();
539
+ init_behavior();
540
+ init_logger();
541
+ init_seed();
542
+ init_behavior();
543
+ }
544
+ });
545
+
546
+ // src/cloudflare/cloudflare.ts
547
+ var CloudflareManager;
548
+ var init_cloudflare = __esm({
549
+ "src/cloudflare/cloudflare.ts"() {
550
+ "use strict";
551
+ init_esm_shims();
552
+ init_logger();
553
+ CloudflareManager = class {
554
+ async detect(page) {
555
+ const url = page.url();
556
+ try {
557
+ const title = await page.title();
558
+ const content = await page.content();
559
+ const cfIndicators = [
560
+ title.includes("Just a moment"),
561
+ title.includes("Attention Required"),
562
+ content.includes("cf-browser-verification"),
563
+ content.includes("cloudflare"),
564
+ content.includes("cf_chl_opt"),
565
+ content.includes("__cf_bm"),
566
+ content.includes("Ray ID")
567
+ ];
568
+ const detected = cfIndicators.some(Boolean);
569
+ if (detected) {
570
+ logger.warn(`[cloudflare] CF protection detected on ${url}`);
571
+ }
572
+ return detected;
573
+ } catch {
574
+ return false;
575
+ }
576
+ }
577
+ async waitForClearance(page, timeoutMs = 3e4) {
578
+ logger.info("[cloudflare] Waiting for CF challenge to resolve...");
579
+ const start = Date.now();
580
+ while (Date.now() - start < timeoutMs) {
581
+ const title = await page.title().catch(() => "");
582
+ const isCFPage = title.includes("Just a moment") || title.includes("Attention Required");
583
+ if (!isCFPage) {
584
+ logger.info("[cloudflare] CF challenge cleared");
585
+ return true;
586
+ }
587
+ await page.waitForTimeout(1e3);
588
+ }
589
+ logger.error("[cloudflare] CF challenge timeout - could not clear in time");
590
+ return false;
591
+ }
592
+ async extractTokens(context) {
593
+ const cookies = await context.cookies();
594
+ const tokens = {};
595
+ for (const cookie of cookies) {
596
+ if (cookie.name === "cf_clearance") tokens.cfClearance = cookie.value;
597
+ if (cookie.name === "__cf_bm") tokens.cfBm = cookie.value;
598
+ }
599
+ if (tokens.cfClearance) {
600
+ logger.info(`[cloudflare] Captured cf_clearance: ${tokens.cfClearance.slice(0, 20)}...`);
601
+ }
602
+ return tokens;
603
+ }
604
+ async detectWAF(page) {
605
+ const content = await page.content().catch(() => "");
606
+ if (content.includes("cloudflare") || content.includes("cf-ray")) return "Cloudflare";
607
+ if (content.includes("akamai") || content.includes("ak_bmsc")) return "Akamai";
608
+ if (content.includes("sucuri")) return "Sucuri";
609
+ if (content.includes("incapsula")) return "Imperva/Incapsula";
610
+ if (content.includes("distil")) return "Distil Networks";
611
+ return null;
612
+ }
613
+ async handleCloudflare(page, url) {
614
+ const isProtected = await this.detect(page);
615
+ if (!isProtected) return true;
616
+ logger.info("[cloudflare] Cloudflare detected");
617
+ const cleared = await this.waitForClearance(page);
618
+ if (cleared) {
619
+ logger.info("[cloudflare] JS challenge auto-cleared");
620
+ return true;
621
+ }
622
+ const hasTurnstile = await this.detectTurnstile(page);
623
+ if (hasTurnstile) {
624
+ logger.info("[cloudflare] Turnstile CAPTCHA detected - opening browser for manual solve");
625
+ return await this.handleTurnstile(page, url);
626
+ }
627
+ logger.warn("[cloudflare] Unknown Cloudflare challenge");
628
+ return false;
629
+ }
630
+ async detectTurnstile(page) {
631
+ const turnstileFrame = await page.locator('iframe[src*="challenges.cloudflare.com"]').count();
632
+ const turnstileDiv = await page.locator('[id*="turnstile"]').count();
633
+ return turnstileFrame > 0 || turnstileDiv > 0;
634
+ }
635
+ async handleTurnstile(page, url) {
636
+ logger.info("[cloudflare] Waiting for manual Turnstile solve...");
637
+ console.log("\n===========================================");
638
+ console.log(" PLEASE SOLVE THE CAPTCHA");
639
+ console.log(" Waiting for you to complete it...");
640
+ console.log("===========================================\n");
641
+ await this.waitForTurnstileSolved(page);
642
+ logger.info("[cloudflare] CAPTCHA solved! Continuing...");
643
+ return true;
644
+ }
645
+ async isTurnstileSolved(page) {
646
+ try {
647
+ const turnstileExists = await page.locator('iframe[src*="challenges.cloudflare.com"]').count();
648
+ if (turnstileExists === 0) return true;
649
+ const hasToken = await page.evaluate(() => {
650
+ const input = document.querySelector('input[name="cf-turnstile-response"]');
651
+ return input && input.value !== "";
652
+ });
653
+ if (hasToken) return true;
654
+ const contentVisible = await page.evaluate(() => {
655
+ const body = document.body;
656
+ return body && !body.classList.contains("no-scroll");
657
+ });
658
+ return contentVisible;
659
+ } catch {
660
+ return false;
661
+ }
662
+ }
663
+ async waitForTurnstileSolved(page) {
664
+ while (true) {
665
+ const solved = await this.isTurnstileSolved(page);
666
+ if (solved) {
667
+ await page.waitForTimeout(2e3);
668
+ return;
669
+ }
670
+ await page.waitForTimeout(1e3);
671
+ }
672
+ }
673
+ };
674
+ }
675
+ });
676
+
677
+ // src/engine/cmd-parser.ts
678
+ import fs from "fs";
679
+ import path2 from "path";
680
+ var CommandParser;
681
+ var init_cmd_parser = __esm({
682
+ "src/engine/cmd-parser.ts"() {
683
+ "use strict";
684
+ init_esm_shims();
685
+ init_logger();
686
+ CommandParser = class {
687
+ variables = {};
688
+ setVariable(key, value) {
689
+ this.variables[key] = value;
690
+ }
691
+ resolve(text2) {
692
+ return text2.replace(/\{\{(\w+)\}\}/g, (_, key) => {
693
+ return this.variables[key] ?? `{{${key}}}`;
694
+ });
695
+ }
696
+ parseLine(line, lineNum) {
697
+ if (line.trim().startsWith("//")) return null;
698
+ const parts = line.trim().split(/\s+/);
699
+ const action = parts[0].toUpperCase();
700
+ const args = parts.slice(1).map((a) => this.resolve(a));
701
+ const validActions = [
702
+ "GOTO",
703
+ "BACK",
704
+ "FORWARD",
705
+ "REFRESH",
706
+ "CLICK",
707
+ "TYPE",
708
+ "PRESS",
709
+ "SELECT",
710
+ "CHECK",
711
+ "UPLOAD",
712
+ "WAIT",
713
+ "WAITLOAD",
714
+ "SCROLL",
715
+ "SCROLLDOWN",
716
+ "SCAN",
717
+ "EXTRACT",
718
+ "SCREENSHOT",
719
+ "PAGINATE",
720
+ "INFINITESCROLL",
721
+ "FETCH",
722
+ "DOWNLOAD",
723
+ "REFERER",
724
+ "BYPASS_CLOUDFLARE",
725
+ "REPEAT",
726
+ "IF",
727
+ "LOOP"
728
+ ];
729
+ if (!validActions.includes(action)) {
730
+ logger.warn(`Unknown action "${action}" at line ${lineNum} - skipping`);
731
+ return null;
732
+ }
733
+ return { action, args, line: lineNum };
734
+ }
735
+ parse(content, filePath = "unknown") {
736
+ const lines = content.split("\n");
737
+ const steps = [];
738
+ let i = 0;
739
+ while (i < lines.length) {
740
+ const raw = lines[i];
741
+ const lineNum = i + 1;
742
+ if (raw.trim().startsWith("//")) {
743
+ i++;
744
+ continue;
745
+ }
746
+ const trimmed = raw.trimEnd();
747
+ if (!trimmed.trim()) {
748
+ i++;
749
+ continue;
750
+ }
751
+ const indent = raw.match(/^(\s*)/)?.[1].length ?? 0;
752
+ if (indent === 0) {
753
+ const step2 = this.parseLine(trimmed, lineNum);
754
+ if (step2) {
755
+ if (step2.action === "REPEAT" || step2.action === "IF" || step2.action === "LOOP") {
756
+ step2.children = [];
757
+ i++;
758
+ while (i < lines.length) {
759
+ const childRaw = lines[i];
760
+ const childIndent = childRaw.match(/^(\s*)/)?.[1].length ?? 0;
761
+ if (childIndent === 0) break;
762
+ const childStep = this.parseLine(childRaw.trim(), i + 1);
763
+ if (childStep) step2.children.push(childStep);
764
+ i++;
765
+ }
766
+ } else {
767
+ i++;
768
+ }
769
+ steps.push(step2);
770
+ } else {
771
+ i++;
772
+ }
773
+ } else {
774
+ i++;
775
+ }
776
+ }
777
+ const site = path2.basename(filePath, ".cmd");
778
+ logger.info(`Parsed ${steps.length} steps from ${filePath}`);
779
+ return { site, steps, raw: content };
780
+ }
781
+ load(filePath) {
782
+ if (!fs.existsSync(filePath)) {
783
+ throw new Error(`File not found: ${filePath}`);
784
+ }
785
+ const content = fs.readFileSync(filePath, "utf8");
786
+ return this.parse(content, filePath);
787
+ }
788
+ findAll(dir = "./commands") {
789
+ if (!fs.existsSync(dir)) return [];
790
+ return fs.readdirSync(dir).filter((f) => f.endsWith(".cmd")).map((f) => path2.join(dir, f));
791
+ }
792
+ };
793
+ }
794
+ });
795
+
796
+ // src/network/smart-fetch.ts
797
+ import fs2 from "fs";
798
+ import path3 from "path";
799
+ var SmartFetch;
800
+ var init_smart_fetch = __esm({
801
+ "src/network/smart-fetch.ts"() {
802
+ "use strict";
803
+ init_esm_shims();
804
+ init_logger();
805
+ SmartFetch = class {
806
+ pageContext = null;
807
+ lastReferer = "";
808
+ setPageContext(page) {
809
+ this.pageContext = page;
810
+ }
811
+ async fetch(options2) {
812
+ const {
813
+ url,
814
+ referer,
815
+ autoReferer = true,
816
+ method = "GET",
817
+ headers = {},
818
+ cookies = {},
819
+ body,
820
+ followRedirects = true,
821
+ timeout = 3e4
822
+ } = options2;
823
+ const finalReferer = referer || (autoReferer && this.pageContext ? this.pageContext.url() : this.lastReferer);
824
+ const finalHeaders = {
825
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
826
+ "Accept": "*/*",
827
+ ...headers
828
+ };
829
+ if (finalReferer) {
830
+ finalHeaders["Referer"] = finalReferer;
831
+ logger.info(`[smart-fetch] Auto-Referer: ${finalReferer}`);
832
+ }
833
+ if (Object.keys(cookies).length > 0) {
834
+ const cookieString = Object.entries(cookies).map(([k, v]) => `${k}=${v}`).join("; ");
835
+ finalHeaders["Cookie"] = cookieString;
836
+ }
837
+ logger.info(`[smart-fetch] ${method} ${url}`);
838
+ try {
839
+ const response = await fetch(url, {
840
+ method,
841
+ headers: finalHeaders,
842
+ body: body ? JSON.stringify(body) : void 0,
843
+ redirect: followRedirects ? "follow" : "manual",
844
+ signal: AbortSignal.timeout(timeout)
845
+ });
846
+ const contentType = response.headers.get("content-type") || "";
847
+ let data;
848
+ if (contentType.includes("application/json")) {
849
+ data = await response.json();
850
+ } else if (contentType.includes("text")) {
851
+ data = await response.text();
852
+ } else {
853
+ data = await response.arrayBuffer();
854
+ }
855
+ this.lastReferer = url;
856
+ const headersObj = {};
857
+ response.headers.forEach((value, key) => {
858
+ headersObj[key] = value;
859
+ });
860
+ return {
861
+ status: response.status,
862
+ headers: headersObj,
863
+ data
864
+ };
865
+ } catch (err) {
866
+ logger.error(`[smart-fetch] Failed: ${err.message}`);
867
+ throw err;
868
+ }
869
+ }
870
+ async download(url, outputPath, referer) {
871
+ logger.info(`[smart-fetch] Downloading ${url} to ${outputPath}`);
872
+ const response = await this.fetch({
873
+ url,
874
+ referer,
875
+ autoReferer: true
876
+ });
877
+ const dir = path3.dirname(outputPath);
878
+ if (!fs2.existsSync(dir)) {
879
+ fs2.mkdirSync(dir, { recursive: true });
880
+ }
881
+ if (response.data instanceof ArrayBuffer) {
882
+ fs2.writeFileSync(outputPath, Buffer.from(response.data));
883
+ } else if (typeof response.data === "string") {
884
+ fs2.writeFileSync(outputPath, response.data);
885
+ } else {
886
+ fs2.writeFileSync(outputPath, JSON.stringify(response.data));
887
+ }
888
+ logger.info(`[smart-fetch] Downloaded to ${outputPath}`);
889
+ }
890
+ getLastReferer() {
891
+ return this.lastReferer;
892
+ }
893
+ };
894
+ }
895
+ });
896
+
897
+ // src/engine/cmd-executor.ts
898
+ var CommandExecutor;
899
+ var init_cmd_executor = __esm({
900
+ "src/engine/cmd-executor.ts"() {
901
+ "use strict";
902
+ init_esm_shims();
903
+ init_logger();
904
+ init_config();
905
+ init_smart_fetch();
906
+ init_cloudflare();
907
+ CommandExecutor = class {
908
+ page;
909
+ url;
910
+ result = {
911
+ success: false,
912
+ skipped: [],
913
+ extracted: [],
914
+ errors: []
915
+ };
916
+ smartFetch;
917
+ cfManager;
918
+ variables = {};
919
+ constructor(page, url) {
920
+ this.page = page;
921
+ this.url = url;
922
+ this.smartFetch = new SmartFetch();
923
+ this.smartFetch.setPageContext(page);
924
+ this.cfManager = new CloudflareManager();
925
+ }
926
+ async execute(cmd) {
927
+ logger.info(`Executing ${cmd.site}.cmd - ${cmd.steps.length} steps`);
928
+ for (const step2 of cmd.steps) {
929
+ await this.runStep(step2);
930
+ }
931
+ if (this.result.errors.length > 0) {
932
+ logger.warn(`Completed with ${this.result.errors.length} skipped steps`);
933
+ for (const err of this.result.errors) {
934
+ logger.warn(` Line ${err.line} - ${err.action}: ${err.error}`);
935
+ }
936
+ }
937
+ this.result.success = true;
938
+ return this.result;
939
+ }
940
+ async runStep(cmdStep) {
941
+ const { action, args, line } = cmdStep;
942
+ step(this.url, `${action} ${args.join(" ")}`, { mode: "cmd", step: action });
943
+ try {
944
+ switch (action) {
945
+ case "GOTO":
946
+ await this.goto(args);
947
+ break;
948
+ case "BACK":
949
+ await this.page.goBack();
950
+ break;
951
+ case "FORWARD":
952
+ await this.page.goForward();
953
+ break;
954
+ case "REFRESH":
955
+ await this.page.reload();
956
+ break;
957
+ case "CLICK":
958
+ await this.click(args);
959
+ break;
960
+ case "TYPE":
961
+ await this.type(args);
962
+ break;
963
+ case "PRESS":
964
+ await this.press(args);
965
+ break;
966
+ case "SELECT":
967
+ await this.select(args);
968
+ break;
969
+ case "CHECK":
970
+ await this.check(args);
971
+ break;
972
+ case "UPLOAD":
973
+ await this.upload(args);
974
+ break;
975
+ case "WAIT":
976
+ await this.wait(args);
977
+ break;
978
+ case "WAITLOAD":
979
+ await this.page.waitForLoadState("networkidle");
980
+ break;
981
+ case "SCROLL":
982
+ await this.scroll(args);
983
+ break;
984
+ case "SCROLLDOWN":
985
+ await this.scrollDown(args);
986
+ break;
987
+ case "SCAN":
988
+ await this.scan(args);
989
+ break;
990
+ case "EXTRACT":
991
+ await this.extract(args);
992
+ break;
993
+ case "SCREENSHOT":
994
+ await this.screenshot(args);
995
+ break;
996
+ case "PAGINATE":
997
+ await this.paginate(args);
998
+ break;
999
+ case "INFINITESCROLL":
1000
+ await this.infiniteScroll();
1001
+ break;
1002
+ case "FETCH":
1003
+ await this.fetch(args);
1004
+ break;
1005
+ case "DOWNLOAD":
1006
+ await this.download(args);
1007
+ break;
1008
+ case "REFERER":
1009
+ await this.setReferer(args);
1010
+ break;
1011
+ case "BYPASS_CLOUDFLARE":
1012
+ await this.bypassCloudflare(args);
1013
+ break;
1014
+ case "REPEAT":
1015
+ await this.repeat(cmdStep);
1016
+ break;
1017
+ case "IF":
1018
+ await this.conditional(cmdStep);
1019
+ break;
1020
+ case "LOOP":
1021
+ await this.loop(cmdStep);
1022
+ break;
1023
+ }
1024
+ } catch (err) {
1025
+ const msg = err instanceof Error ? err.message : String(err);
1026
+ logger.warn(`Line ${line} SKIPPED - ${action}: ${msg}`);
1027
+ this.result.errors.push({ line, action, error: msg });
1028
+ this.result.skipped.push(`Line ${line}: ${action} ${args.join(" ")}`);
1029
+ }
1030
+ }
1031
+ async goto(args) {
1032
+ const url = args[0];
1033
+ if (!url) throw new Error("GOTO requires a URL");
1034
+ await this.page.goto(url, { waitUntil: "domcontentloaded", timeout: config.browser.timeout });
1035
+ step(this.url, `GOTO ${url}`, { mode: "cmd", step: "GOTO", url });
1036
+ }
1037
+ async click(args) {
1038
+ const selector = args[0];
1039
+ if (!selector) throw new Error("CLICK requires a selector");
1040
+ await this.page.waitForSelector(selector, { timeout: 1e4 });
1041
+ await this.page.click(selector);
1042
+ }
1043
+ async type(args) {
1044
+ const selector = args[0];
1045
+ const text2 = args.slice(1).join(" ");
1046
+ if (!selector) throw new Error("TYPE requires a selector");
1047
+ if (!text2) throw new Error("TYPE requires text");
1048
+ await this.page.waitForSelector(selector, { timeout: 1e4 });
1049
+ await this.page.fill(selector, text2);
1050
+ }
1051
+ async press(args) {
1052
+ const key = args[0];
1053
+ if (!key) throw new Error("PRESS requires a key");
1054
+ await this.page.keyboard.press(key);
1055
+ }
1056
+ async select(args) {
1057
+ const selector = args[0];
1058
+ const value = args[1];
1059
+ if (!selector) throw new Error("SELECT requires a selector");
1060
+ if (!value) throw new Error("SELECT requires a value");
1061
+ await this.page.selectOption(selector, value);
1062
+ }
1063
+ async check(args) {
1064
+ const selector = args[0];
1065
+ if (!selector) throw new Error("CHECK requires a selector");
1066
+ await this.page.check(selector);
1067
+ }
1068
+ async upload(args) {
1069
+ const selector = args[0];
1070
+ const filePath = args[1];
1071
+ if (!selector) throw new Error("UPLOAD requires a selector");
1072
+ if (!filePath) throw new Error("UPLOAD requires a file path");
1073
+ await this.page.setInputFiles(selector, filePath);
1074
+ }
1075
+ async wait(args) {
1076
+ const target = args[0];
1077
+ if (!target) throw new Error("WAIT requires a selector or ms value");
1078
+ if (/^\d+$/.test(target)) {
1079
+ await this.page.waitForTimeout(parseInt(target, 10));
1080
+ } else {
1081
+ await this.page.waitForSelector(target, { timeout: config.browser.timeout });
1082
+ }
1083
+ }
1084
+ async scroll(args) {
1085
+ const selector = args[0];
1086
+ if (!selector) throw new Error("SCROLL requires a selector");
1087
+ await this.page.locator(selector).scrollIntoViewIfNeeded();
1088
+ }
1089
+ async scrollDown(args) {
1090
+ const pixels = parseInt(args[0] || "500", 10);
1091
+ await this.page.evaluate((px) => window.scrollBy(0, px), pixels);
1092
+ }
1093
+ async scan(args) {
1094
+ const selector = args[0];
1095
+ if (!selector) throw new Error("SCAN requires a selector");
1096
+ const elements = await this.page.$$(selector);
1097
+ const found = [];
1098
+ for (const el of elements) {
1099
+ const tag = await el.evaluate((e) => e.tagName.toLowerCase());
1100
+ const text2 = await el.textContent();
1101
+ const href = await el.getAttribute("href");
1102
+ const src = await el.getAttribute("src");
1103
+ const id = await el.getAttribute("id");
1104
+ const cls = await el.getAttribute("class");
1105
+ found.push({ tag, text: text2?.trim().slice(0, 100), href, src, id, class: cls });
1106
+ }
1107
+ this.result.extracted.push({ type: "scan", selector, count: found.length, found });
1108
+ highlight(this.url, { type: "scan", selector, count: found.length });
1109
+ step(this.url, `SCAN found ${found.length} elements matching "${selector}"`, { mode: "cmd", step: "SCAN" });
1110
+ }
1111
+ async extract(args) {
1112
+ const selector = args[0];
1113
+ const attr = args[1] || "text";
1114
+ if (!selector) throw new Error("EXTRACT requires a selector");
1115
+ const elements = await this.page.$$(selector);
1116
+ const data = [];
1117
+ for (const el of elements) {
1118
+ if (attr === "text") {
1119
+ const text2 = await el.textContent();
1120
+ data.push(text2?.trim());
1121
+ } else {
1122
+ const value = await el.getAttribute(attr);
1123
+ data.push(value);
1124
+ }
1125
+ }
1126
+ this.result.extracted.push({ selector, attr, count: data.length, data });
1127
+ highlight(this.url, { selector, attr, count: data.length });
1128
+ }
1129
+ async screenshot(args) {
1130
+ const path5 = args[0] || `screenshot-${Date.now()}.png`;
1131
+ await this.page.screenshot({ path: path5, fullPage: true });
1132
+ logger.info(`Screenshot saved: ${path5}`);
1133
+ }
1134
+ async paginate(args) {
1135
+ const selector = args[0];
1136
+ if (!selector) throw new Error("PAGINATE requires a selector");
1137
+ let page = 1;
1138
+ while (true) {
1139
+ try {
1140
+ await this.page.waitForSelector(selector, { timeout: 5e3 });
1141
+ logger.info(`Clicking next page (${page})`);
1142
+ await this.page.click(selector);
1143
+ await this.page.waitForLoadState("networkidle");
1144
+ page++;
1145
+ } catch {
1146
+ logger.info(`Pagination complete - ${page} pages`);
1147
+ break;
1148
+ }
1149
+ }
1150
+ }
1151
+ async infiniteScroll() {
1152
+ let previousHeight = 0;
1153
+ let attempts = 0;
1154
+ const maxAttempts = 50;
1155
+ while (attempts < maxAttempts) {
1156
+ const currentHeight = await this.page.evaluate(() => document.body.scrollHeight);
1157
+ if (currentHeight === previousHeight) {
1158
+ break;
1159
+ }
1160
+ await this.page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
1161
+ await this.page.waitForTimeout(1e3);
1162
+ previousHeight = currentHeight;
1163
+ attempts++;
1164
+ }
1165
+ logger.info(`Infinite scroll complete - ${attempts} scrolls`);
1166
+ }
1167
+ async fetch(args) {
1168
+ const url = args[0];
1169
+ const varName = args[1];
1170
+ if (!url) throw new Error("FETCH requires a URL");
1171
+ const response = await this.smartFetch.fetch({
1172
+ url,
1173
+ autoReferer: true
1174
+ });
1175
+ if (varName) {
1176
+ this.variables[varName] = response.data;
1177
+ logger.info(`Saved response to variable: ${varName}`);
1178
+ }
1179
+ this.result.extracted.push({
1180
+ type: "fetch",
1181
+ url,
1182
+ status: response.status,
1183
+ data: response.data
1184
+ });
1185
+ }
1186
+ async download(args) {
1187
+ const url = args[0];
1188
+ const outputPath = args[1] || `./downloads/${Date.now()}.bin`;
1189
+ const referer = args[2];
1190
+ if (!url) throw new Error("DOWNLOAD requires a URL");
1191
+ logger.info(`Downloading: ${url}`);
1192
+ await this.smartFetch.download(url, outputPath, referer);
1193
+ this.result.extracted.push({
1194
+ type: "download",
1195
+ url,
1196
+ path: outputPath
1197
+ });
1198
+ }
1199
+ async setReferer(args) {
1200
+ const referer = args[0];
1201
+ if (!referer) throw new Error("REFERER requires a URL");
1202
+ logger.info(`Set manual Referer: ${referer}`);
1203
+ }
1204
+ async bypassCloudflare(args) {
1205
+ const mode = args[0] || "auto";
1206
+ await this.cfManager.handleCloudflare(this.page, this.url);
1207
+ }
1208
+ async repeat(stepCmd) {
1209
+ const selector = stepCmd.args[0];
1210
+ if (!selector) throw new Error("REPEAT requires a selector");
1211
+ if (!stepCmd.children?.length) throw new Error("REPEAT has no child commands");
1212
+ const elements = await this.page.$$(selector);
1213
+ step(this.url, `REPEAT ${elements.length}x over "${selector}"`, { mode: "cmd", step: "REPEAT" });
1214
+ for (let i = 0; i < elements.length; i++) {
1215
+ step(this.url, ` REPEAT iteration ${i + 1}/${elements.length}`, { mode: "cmd" });
1216
+ for (const child of stepCmd.children) {
1217
+ await this.runStep(child);
1218
+ }
1219
+ }
1220
+ }
1221
+ async conditional(stepCmd) {
1222
+ const selector = stepCmd.args[0];
1223
+ if (!selector) throw new Error("IF requires a selector");
1224
+ if (!stepCmd.children?.length) throw new Error("IF has no child commands");
1225
+ const exists = await this.page.locator(selector).count() > 0;
1226
+ if (exists && stepCmd.children) {
1227
+ for (const child of stepCmd.children) {
1228
+ await this.runStep(child);
1229
+ }
1230
+ }
1231
+ }
1232
+ async loop(stepCmd) {
1233
+ const count = parseInt(stepCmd.args[0] || "1", 10);
1234
+ if (!stepCmd.children?.length) throw new Error("LOOP has no child commands");
1235
+ for (let i = 0; i < count; i++) {
1236
+ step(this.url, ` LOOP iteration ${i + 1}/${count}`, { mode: "cmd" });
1237
+ for (const child of stepCmd.children) {
1238
+ await this.runStep(child);
1239
+ }
1240
+ }
1241
+ }
1242
+ };
1243
+ }
1244
+ });
1245
+
1246
+ // src/modes/downloader.ts
1247
+ var DownloaderMode;
1248
+ var init_downloader = __esm({
1249
+ "src/modes/downloader.ts"() {
1250
+ "use strict";
1251
+ init_esm_shims();
1252
+ init_logger();
1253
+ init_smart_fetch();
1254
+ DownloaderMode = class {
1255
+ page;
1256
+ smartFetch;
1257
+ constructor(page) {
1258
+ this.page = page;
1259
+ this.smartFetch = new SmartFetch();
1260
+ this.smartFetch.setPageContext(page);
1261
+ }
1262
+ async execute(url) {
1263
+ logger.info("[downloader-mode] Analyzing download flow...");
1264
+ const flow = await this.detectDownloadFlow();
1265
+ if (!flow) {
1266
+ return {
1267
+ success: false,
1268
+ data: {},
1269
+ errors: ["No download flow detected"],
1270
+ timestamp: Date.now()
1271
+ };
1272
+ }
1273
+ logger.info(`[downloader-mode] Flow type: ${flow.type}`);
1274
+ try {
1275
+ let downloadedFiles = [];
1276
+ if (flow.type === "DIRECT" && flow.links) {
1277
+ downloadedFiles = await this.downloadDirectLinks(flow.links);
1278
+ } else if (flow.type === "BUTTON_CLICK" && flow.button) {
1279
+ downloadedFiles = await this.downloadViaButton(flow.button);
1280
+ }
1281
+ return {
1282
+ success: true,
1283
+ data: {
1284
+ flow: flow.type,
1285
+ files: downloadedFiles
1286
+ },
1287
+ errors: [],
1288
+ timestamp: Date.now()
1289
+ };
1290
+ } catch (err) {
1291
+ return {
1292
+ success: false,
1293
+ data: {},
1294
+ errors: [err.message],
1295
+ timestamp: Date.now()
1296
+ };
1297
+ }
1298
+ }
1299
+ async detectDownloadFlow() {
1300
+ const directLinks = await this.findDirectLinks();
1301
+ if (directLinks.length > 0) {
1302
+ return {
1303
+ type: "DIRECT",
1304
+ steps: ["Found direct download links"],
1305
+ links: directLinks
1306
+ };
1307
+ }
1308
+ const downloadButton = await this.findDownloadButton();
1309
+ if (downloadButton) {
1310
+ return {
1311
+ type: "BUTTON_CLICK",
1312
+ steps: ["Click download button", "Wait for file"],
1313
+ button: downloadButton
1314
+ };
1315
+ }
1316
+ return null;
1317
+ }
1318
+ async findDirectLinks() {
1319
+ return await this.page.evaluate(() => {
1320
+ const links = Array.from(document.querySelectorAll("a[href]"));
1321
+ const fileLinks = [];
1322
+ const fileExtensions = /\.(mp4|mp3|pdf|zip|rar|exe|dmg|apk|avi|mkv|mov|wav|flac)$/i;
1323
+ for (const link of links) {
1324
+ const href = link.href;
1325
+ if (href && fileExtensions.test(href)) {
1326
+ const selector = link.id ? `#${link.id}` : `a[href="${href}"]`;
1327
+ const ext = href.match(fileExtensions)?.[1];
1328
+ fileLinks.push({
1329
+ url: href,
1330
+ selector,
1331
+ extension: ext
1332
+ });
1333
+ }
1334
+ }
1335
+ return fileLinks;
1336
+ });
1337
+ }
1338
+ async findDownloadButton() {
1339
+ const buttons = await this.page.evaluate(() => {
1340
+ const candidates = Array.from(document.querySelectorAll('button, a, [class*="download"]'));
1341
+ const scored = [];
1342
+ for (const el of candidates) {
1343
+ const text2 = (el.textContent || "").toLowerCase();
1344
+ const className = (el.className || "").toLowerCase();
1345
+ const href = el.href;
1346
+ let score = 0;
1347
+ if (text2.includes("download")) score += 10;
1348
+ if (className.includes("download")) score += 10;
1349
+ if (el.hasAttribute("download")) score += 20;
1350
+ if (href && href.includes("download")) score += 5;
1351
+ if (text2.includes("get")) score += 3;
1352
+ if (text2.includes("save")) score += 3;
1353
+ if (score > 0) {
1354
+ const selector = el.id ? `#${el.id}` : el.tagName.toLowerCase();
1355
+ scored.push({ selector, score, text: text2, href });
1356
+ }
1357
+ }
1358
+ return scored.sort((a, b) => b.score - a.score);
1359
+ });
1360
+ return buttons.length > 0 ? buttons[0] : null;
1361
+ }
1362
+ async downloadDirectLinks(links) {
1363
+ const downloaded = [];
1364
+ for (const link of links) {
1365
+ const filename = `download-${Date.now()}.${link.extension || "bin"}`;
1366
+ const outputPath = `./downloads/${filename}`;
1367
+ logger.info(`[downloader-mode] Downloading ${link.url}`);
1368
+ await this.smartFetch.download(link.url, outputPath);
1369
+ downloaded.push(outputPath);
1370
+ }
1371
+ return downloaded;
1372
+ }
1373
+ async downloadViaButton(button) {
1374
+ logger.info(`[downloader-mode] Clicking download button: ${button.selector}`);
1375
+ const [download] = await Promise.all([
1376
+ this.page.waitForEvent("download"),
1377
+ this.page.click(button.selector)
1378
+ ]);
1379
+ const filename = download.suggestedFilename();
1380
+ const outputPath = `./downloads/${filename}`;
1381
+ await download.saveAs(outputPath);
1382
+ logger.info(`[downloader-mode] Saved to ${outputPath}`);
1383
+ return [outputPath];
1384
+ }
1385
+ };
1386
+ }
1387
+ });
1388
+
1389
+ // src/modes/scrape.ts
1390
+ var ScrapeMode;
1391
+ var init_scrape = __esm({
1392
+ "src/modes/scrape.ts"() {
1393
+ "use strict";
1394
+ init_esm_shims();
1395
+ init_logger();
1396
+ ScrapeMode = class {
1397
+ page;
1398
+ constructor(page) {
1399
+ this.page = page;
1400
+ }
1401
+ async execute(url) {
1402
+ logger.info("[scrape-mode] Extracting content...");
1403
+ try {
1404
+ const data = await this.extractContent();
1405
+ return {
1406
+ success: true,
1407
+ data,
1408
+ errors: [],
1409
+ timestamp: Date.now()
1410
+ };
1411
+ } catch (err) {
1412
+ return {
1413
+ success: false,
1414
+ data: {},
1415
+ errors: [err.message],
1416
+ timestamp: Date.now()
1417
+ };
1418
+ }
1419
+ }
1420
+ async extractContent() {
1421
+ return await this.page.evaluate(() => {
1422
+ const result = {};
1423
+ const title = document.querySelector("h1")?.textContent?.trim();
1424
+ if (title) result.title = title;
1425
+ const description = document.querySelector('meta[name="description"]')?.getAttribute("content");
1426
+ if (description) result.description = description;
1427
+ const images = Array.from(document.querySelectorAll("img[src]")).map((img) => img.src).filter(Boolean);
1428
+ if (images.length > 0) result.images = images;
1429
+ const links = Array.from(document.querySelectorAll("a[href]")).map((a) => ({
1430
+ text: a.textContent?.trim(),
1431
+ href: a.href
1432
+ })).filter((l) => l.text && l.href);
1433
+ if (links.length > 0) result.links = links;
1434
+ const paragraphs = Array.from(document.querySelectorAll("p")).map((p) => p.textContent?.trim()).filter(Boolean);
1435
+ if (paragraphs.length > 0) result.content = paragraphs;
1436
+ return result;
1437
+ });
1438
+ }
1439
+ };
1440
+ }
1441
+ });
1442
+
1443
+ // src/modes/navigator.ts
1444
+ var NavigatorMode;
1445
+ var init_navigator2 = __esm({
1446
+ "src/modes/navigator.ts"() {
1447
+ "use strict";
1448
+ init_esm_shims();
1449
+ init_logger();
1450
+ NavigatorMode = class {
1451
+ page;
1452
+ constructor(page) {
1453
+ this.page = page;
1454
+ }
1455
+ async execute(url) {
1456
+ logger.info("[navigator-mode] Mapping site structure...");
1457
+ try {
1458
+ const siteMap = await this.buildSiteMap();
1459
+ return {
1460
+ success: true,
1461
+ data: siteMap,
1462
+ errors: [],
1463
+ timestamp: Date.now()
1464
+ };
1465
+ } catch (err) {
1466
+ return {
1467
+ success: false,
1468
+ data: {},
1469
+ errors: [err.message],
1470
+ timestamp: Date.now()
1471
+ };
1472
+ }
1473
+ }
1474
+ async buildSiteMap() {
1475
+ const currentUrl = this.page.url();
1476
+ const structure = await this.page.evaluate(() => {
1477
+ const nav = document.querySelector("nav");
1478
+ const menu = document.querySelector('[class*="menu"]');
1479
+ const header = document.querySelector("header");
1480
+ const navLinks = nav || menu || header;
1481
+ const links = [];
1482
+ if (navLinks) {
1483
+ const anchors = navLinks.querySelectorAll("a[href]");
1484
+ for (const a of anchors) {
1485
+ const text2 = a.textContent?.trim();
1486
+ const href = a.href;
1487
+ if (text2 && href) {
1488
+ links.push({ text: text2, href });
1489
+ }
1490
+ }
1491
+ }
1492
+ return {
1493
+ title: document.title,
1494
+ url: window.location.href,
1495
+ navigation: links,
1496
+ sections: Array.from(document.querySelectorAll("section, article")).length
1497
+ };
1498
+ });
1499
+ return structure;
1500
+ }
1501
+ };
1502
+ }
1503
+ });
1504
+
1505
+ // src/modes/auto.ts
1506
+ var auto_exports = {};
1507
+ __export(auto_exports, {
1508
+ AutoMode: () => AutoMode
1509
+ });
1510
+ var AutoMode;
1511
+ var init_auto = __esm({
1512
+ "src/modes/auto.ts"() {
1513
+ "use strict";
1514
+ init_esm_shims();
1515
+ init_logger();
1516
+ init_downloader();
1517
+ init_scrape();
1518
+ init_navigator2();
1519
+ AutoMode = class {
1520
+ page;
1521
+ constructor(page) {
1522
+ this.page = page;
1523
+ }
1524
+ async execute(url) {
1525
+ logger.info("[auto-mode] Analyzing site...");
1526
+ const mode = await this.detectBestMode();
1527
+ logger.info(`[auto-mode] Selected mode: ${mode}`);
1528
+ let result;
1529
+ switch (mode) {
1530
+ case "downloader":
1531
+ const downloader = new DownloaderMode(this.page);
1532
+ result = await downloader.execute(url);
1533
+ break;
1534
+ case "scrape":
1535
+ const scraper = new ScrapeMode(this.page);
1536
+ result = await scraper.execute(url);
1537
+ break;
1538
+ case "navigator":
1539
+ const navigator2 = new NavigatorMode(this.page);
1540
+ result = await navigator2.execute(url);
1541
+ break;
1542
+ default:
1543
+ result = {
1544
+ success: false,
1545
+ data: {},
1546
+ errors: ["Unknown mode"],
1547
+ timestamp: Date.now()
1548
+ };
1549
+ }
1550
+ return result;
1551
+ }
1552
+ async detectBestMode() {
1553
+ const indicators = await this.page.evaluate(() => {
1554
+ const hasDownloadButton = !!document.querySelector('a[download], button:has-text("Download")');
1555
+ const hasVideoPlayer = !!document.querySelector('video, iframe[src*="youtube"], iframe[src*="vimeo"]');
1556
+ const hasFileLinks = !!document.querySelector('a[href$=".mp4"], a[href$=".pdf"], a[href$=".zip"]');
1557
+ const hasPagination = !!document.querySelector('.pagination, .next, [class*="page-"]');
1558
+ const hasInfiniteScroll = document.body.scrollHeight > window.innerHeight * 3;
1559
+ const hasForm = !!document.querySelector("form");
1560
+ const hasSearch = !!document.querySelector('input[type="search"], input[placeholder*="search"]');
1561
+ return {
1562
+ hasDownloadButton,
1563
+ hasVideoPlayer,
1564
+ hasFileLinks,
1565
+ hasPagination,
1566
+ hasInfiniteScroll,
1567
+ hasForm,
1568
+ hasSearch
1569
+ };
1570
+ });
1571
+ if (indicators.hasDownloadButton || indicators.hasVideoPlayer || indicators.hasFileLinks) {
1572
+ return "downloader";
1573
+ }
1574
+ if (indicators.hasPagination || indicators.hasInfiniteScroll) {
1575
+ return "scrape";
1576
+ }
1577
+ return "navigator";
1578
+ }
1579
+ };
1580
+ }
1581
+ });
1582
+
1583
+ // src/core/scraper.ts
1584
+ import { chromium } from "playwright";
1585
+ var FirekidScraper;
1586
+ var init_scraper = __esm({
1587
+ "src/core/scraper.ts"() {
1588
+ "use strict";
1589
+ init_esm_shims();
1590
+ init_ghost();
1591
+ init_cloudflare();
1592
+ init_cmd_parser();
1593
+ init_cmd_executor();
1594
+ init_logger();
1595
+ init_config();
1596
+ FirekidScraper = class {
1597
+ config;
1598
+ browser = null;
1599
+ context = null;
1600
+ page = null;
1601
+ cfManager;
1602
+ constructor(userConfig = {}) {
1603
+ this.config = {
1604
+ headless: userConfig.headless ?? config.browser.headless,
1605
+ bypassCloudflare: userConfig.bypassCloudflare ?? true,
1606
+ maxWorkers: userConfig.maxWorkers ?? config.browser.maxWorkers,
1607
+ timeout: userConfig.timeout ?? config.browser.timeout,
1608
+ dataDir: userConfig.dataDir ?? config.storage.dataDir,
1609
+ logLevel: userConfig.logLevel ?? config.logging.level
1610
+ };
1611
+ this.cfManager = new CloudflareManager();
1612
+ }
1613
+ async init() {
1614
+ if (this.browser) return;
1615
+ logger.info("Initializing Firekid Scraper...");
1616
+ this.browser = await chromium.launch({
1617
+ headless: this.config.headless,
1618
+ args: [
1619
+ "--disable-blink-features=AutomationControlled",
1620
+ "--no-sandbox"
1621
+ ]
1622
+ });
1623
+ this.context = await this.browser.newContext({
1624
+ viewport: { width: 1920, height: 1080 },
1625
+ userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
1626
+ });
1627
+ await applyGhost(this.context);
1628
+ this.page = await this.context.newPage();
1629
+ logger.info("Firekid Scraper initialized");
1630
+ }
1631
+ async goto(url) {
1632
+ await this.init();
1633
+ if (!this.page) throw new Error("Page not initialized");
1634
+ logger.info(`Navigating to ${url}`);
1635
+ await this.page.goto(url, {
1636
+ waitUntil: "domcontentloaded",
1637
+ timeout: this.config.timeout
1638
+ });
1639
+ if (this.config.bypassCloudflare) {
1640
+ await this.cfManager.handleCloudflare(this.page, url);
1641
+ }
1642
+ }
1643
+ async extract(url, selectors) {
1644
+ await this.goto(url);
1645
+ if (!this.page) throw new Error("Page not initialized");
1646
+ const data = {};
1647
+ for (const [key, selector] of Object.entries(selectors)) {
1648
+ try {
1649
+ const element = await this.page.locator(selector).first();
1650
+ const text2 = await element.textContent();
1651
+ data[key] = text2?.trim() || null;
1652
+ } catch (err) {
1653
+ logger.warn(`Failed to extract ${key} with selector ${selector}`);
1654
+ data[key] = null;
1655
+ }
1656
+ }
1657
+ return data;
1658
+ }
1659
+ async auto(url) {
1660
+ await this.goto(url);
1661
+ if (!this.page) throw new Error("Page not initialized");
1662
+ logger.info("Running auto mode...");
1663
+ const { AutoMode: AutoMode2 } = await Promise.resolve().then(() => (init_auto(), auto_exports));
1664
+ const autoMode = new AutoMode2(this.page);
1665
+ return await autoMode.execute(url);
1666
+ }
1667
+ async runCommandFile(filePath) {
1668
+ await this.init();
1669
+ if (!this.page) throw new Error("Page not initialized");
1670
+ const parser = new CommandParser();
1671
+ const cmdFile = parser.load(filePath);
1672
+ logger.info(`Executing command file: ${cmdFile.site}`);
1673
+ const executor = new CommandExecutor(this.page, cmdFile.steps[0]?.args[0] || "");
1674
+ const result = await executor.execute(cmdFile);
1675
+ return {
1676
+ success: result.success,
1677
+ data: result.extracted,
1678
+ errors: result.errors.map((e) => e.error),
1679
+ timestamp: Date.now()
1680
+ };
1681
+ }
1682
+ async close() {
1683
+ if (this.page) await this.page.close();
1684
+ if (this.context) await this.context.close();
1685
+ if (this.browser) await this.browser.close();
1686
+ this.page = null;
1687
+ this.context = null;
1688
+ this.browser = null;
1689
+ logger.info("Firekid Scraper closed");
1690
+ }
1691
+ getPage() {
1692
+ return this.page;
1693
+ }
1694
+ getBrowser() {
1695
+ return this.browser;
1696
+ }
1697
+ getContext() {
1698
+ return this.context;
1699
+ }
1700
+ };
1701
+ }
1702
+ });
1703
+
1704
+ // src/server/app.ts
1705
+ var app_exports = {};
1706
+ __export(app_exports, {
1707
+ startServer: () => startServer
1708
+ });
1709
+ import express from "express";
1710
+ import { rateLimit } from "express-rate-limit";
1711
+ async function startServer(port = 3e3) {
1712
+ app.listen(port, () => {
1713
+ logger.info(`[server] Firekid API server running on port ${port}`);
1714
+ console.log(`
1715
+ Firekid Scraper API Server`);
1716
+ console.log(`Port: ${port}`);
1717
+ console.log(`Health: http://localhost:${port}/health
1718
+ `);
1719
+ });
1720
+ }
1721
+ var app, limiter;
1722
+ var init_app = __esm({
1723
+ "src/server/app.ts"() {
1724
+ "use strict";
1725
+ init_esm_shims();
1726
+ init_scraper();
1727
+ init_logger();
1728
+ init_config();
1729
+ app = express();
1730
+ app.use(express.json());
1731
+ limiter = rateLimit({
1732
+ windowMs: config.rateLimit.window,
1733
+ max: config.rateLimit.max,
1734
+ message: "Too many requests, please try again later"
1735
+ });
1736
+ if (config.rateLimit.enabled) {
1737
+ app.use(limiter);
1738
+ }
1739
+ app.use((req, res, next) => {
1740
+ const apiKey = req.headers["x-api-key"];
1741
+ if (config.server.apiKey && apiKey !== config.server.apiKey) {
1742
+ return res.status(401).json({ error: "Invalid API key" });
1743
+ }
1744
+ next();
1745
+ });
1746
+ app.post("/scrape", async (req, res) => {
1747
+ const { url, mode = "auto", selectors } = req.body;
1748
+ if (!url) {
1749
+ return res.status(400).json({ error: "URL is required" });
1750
+ }
1751
+ const scraper = new FirekidScraper();
1752
+ try {
1753
+ let result;
1754
+ if (mode === "auto") {
1755
+ result = await scraper.auto(url);
1756
+ } else if (mode === "extract" && selectors) {
1757
+ result = await scraper.extract(url, selectors);
1758
+ } else {
1759
+ return res.status(400).json({ error: "Invalid mode or missing parameters" });
1760
+ }
1761
+ await scraper.close();
1762
+ res.json({ success: true, result });
1763
+ } catch (err) {
1764
+ await scraper.close();
1765
+ logger.error("[api] Scraping error:", err);
1766
+ res.status(500).json({ error: err.message });
1767
+ }
1768
+ });
1769
+ app.post("/command", async (req, res) => {
1770
+ const { filePath } = req.body;
1771
+ if (!filePath) {
1772
+ return res.status(400).json({ error: "File path is required" });
1773
+ }
1774
+ const scraper = new FirekidScraper();
1775
+ try {
1776
+ const result = await scraper.runCommandFile(filePath);
1777
+ await scraper.close();
1778
+ res.json({ success: true, result });
1779
+ } catch (err) {
1780
+ await scraper.close();
1781
+ logger.error("[api] Command execution error:", err);
1782
+ res.status(500).json({ error: err.message });
1783
+ }
1784
+ });
1785
+ app.get("/health", (req, res) => {
1786
+ res.json({ status: "ok", timestamp: Date.now() });
1787
+ });
1788
+ }
1789
+ });
1790
+
1791
+ // bin/firekid-scraper.ts
1792
+ init_esm_shims();
1793
+ init_scraper();
1794
+ import { Command } from "commander";
1795
+ import { intro, outro, text, select, spinner } from "@clack/prompts";
1796
+
1797
+ // src/recorder/recorder.ts
1798
+ init_esm_shims();
1799
+ import { chromium as chromium2 } from "playwright";
1800
+
1801
+ // src/recorder/selector-generator.ts
1802
+ init_esm_shims();
1803
+ var SelectorGenerator = class {
1804
+ async generate(page, element) {
1805
+ const selectors = [];
1806
+ if (element.id) {
1807
+ selectors.push(`#${element.id}`);
1808
+ }
1809
+ if (element.className && typeof element.className === "string") {
1810
+ const classes = element.className.split(" ").filter(Boolean);
1811
+ if (classes.length > 0) {
1812
+ selectors.push(`.${classes.join(".")}`);
1813
+ }
1814
+ }
1815
+ if (element.tagName) {
1816
+ const tag = element.tagName.toLowerCase();
1817
+ if (element.type) {
1818
+ selectors.push(`${tag}[type="${element.type}"]`);
1819
+ }
1820
+ if (element.href) {
1821
+ selectors.push(`${tag}[href*="${this.simplifyUrl(element.href)}"]`);
1822
+ }
1823
+ if (element.textContent) {
1824
+ const text2 = element.textContent.trim().slice(0, 30);
1825
+ if (text2) {
1826
+ selectors.push(`${tag}:has-text("${text2}")`);
1827
+ }
1828
+ }
1829
+ }
1830
+ const primary = selectors[0] || "body";
1831
+ const fallbacks = selectors.slice(1);
1832
+ return { primary, fallbacks };
1833
+ }
1834
+ simplifyUrl(url) {
1835
+ try {
1836
+ const parsed = new URL(url);
1837
+ return parsed.pathname;
1838
+ } catch {
1839
+ return url;
1840
+ }
1841
+ }
1842
+ async generateCssPath(page, element) {
1843
+ return await page.evaluate((el) => {
1844
+ const path5 = [];
1845
+ let current = el;
1846
+ while (current && current.nodeType === Node.ELEMENT_NODE) {
1847
+ let selector = current.nodeName.toLowerCase();
1848
+ if (current.id) {
1849
+ selector += `#${current.id}`;
1850
+ path5.unshift(selector);
1851
+ break;
1852
+ } else {
1853
+ let sibling = current;
1854
+ let nth = 1;
1855
+ while (sibling.previousElementSibling) {
1856
+ sibling = sibling.previousElementSibling;
1857
+ if (sibling.nodeName === current.nodeName) {
1858
+ nth++;
1859
+ }
1860
+ }
1861
+ if (nth > 1) {
1862
+ selector += `:nth-of-type(${nth})`;
1863
+ }
1864
+ }
1865
+ path5.unshift(selector);
1866
+ current = current.parentNode;
1867
+ }
1868
+ return path5.join(" > ");
1869
+ }, element);
1870
+ }
1871
+ };
1872
+
1873
+ // src/recorder/pattern-detector.ts
1874
+ init_esm_shims();
1875
+ init_logger();
1876
+ var PatternDetector = class {
1877
+ analyze(actions) {
1878
+ const patterns = {};
1879
+ patterns.hasFormSubmission = this.detectFormSubmission(actions);
1880
+ patterns.hasPagination = this.detectPagination(actions);
1881
+ patterns.hasInfiniteScroll = this.detectInfiniteScroll(actions);
1882
+ patterns.hasDownloadFlow = this.detectDownloadFlow(actions);
1883
+ patterns.hasLogin = this.detectLogin(actions);
1884
+ patterns.hasSearch = this.detectSearch(actions);
1885
+ logger.info("[pattern-detector] Detected patterns:", patterns);
1886
+ return patterns;
1887
+ }
1888
+ detectFormSubmission(actions) {
1889
+ const typeActions = actions.filter((a) => a.type === "type");
1890
+ const clickActions = actions.filter((a) => a.type === "click");
1891
+ if (typeActions.length < 2) return null;
1892
+ const fields = typeActions.map((action) => ({
1893
+ selector: action.selectors.primary,
1894
+ type: action.fieldType || "text",
1895
+ placeholder: action.element?.placeholder || ""
1896
+ }));
1897
+ const submitButton = clickActions.find(
1898
+ (a) => a.element?.textContent?.toLowerCase().includes("submit") || a.element?.textContent?.toLowerCase().includes("login") || a.element?.type === "submit"
1899
+ );
1900
+ if (!submitButton) return null;
1901
+ return {
1902
+ type: "FORM_SUBMISSION",
1903
+ fields,
1904
+ submitButton: submitButton.selectors.primary
1905
+ };
1906
+ }
1907
+ detectPagination(actions) {
1908
+ const clickActions = actions.filter((a) => a.type === "click");
1909
+ const nextClicks = clickActions.filter(
1910
+ (a) => a.element?.textContent?.toLowerCase().includes("next") || a.element?.className?.toLowerCase().includes("next") || a.element?.href?.includes("page")
1911
+ );
1912
+ if (nextClicks.length < 2) return null;
1913
+ const firstNext = nextClicks[0];
1914
+ const sameSelector = nextClicks.every(
1915
+ (a) => a.selectors.primary === firstNext.selectors.primary
1916
+ );
1917
+ if (sameSelector) {
1918
+ return {
1919
+ type: "PAGINATION",
1920
+ nextButton: firstNext.selectors.primary,
1921
+ timesClicked: nextClicks.length
1922
+ };
1923
+ }
1924
+ return null;
1925
+ }
1926
+ detectInfiniteScroll(actions) {
1927
+ const scrollActions = actions.filter((a) => a.type === "scroll");
1928
+ if (scrollActions.length > 5) {
1929
+ return {
1930
+ type: "INFINITE_SCROLL",
1931
+ totalScrolls: scrollActions.length
1932
+ };
1933
+ }
1934
+ return null;
1935
+ }
1936
+ detectDownloadFlow(actions) {
1937
+ return actions.some(
1938
+ (a) => a.element?.textContent?.toLowerCase().includes("download") || a.element?.href?.includes("download") || a.element?.href?.match(/\.(mp4|mp3|pdf|zip|rar)$/i)
1939
+ );
1940
+ }
1941
+ detectLogin(actions) {
1942
+ const typeActions = actions.filter((a) => a.type === "type");
1943
+ const hasPassword = typeActions.some(
1944
+ (a) => a.fieldType === "password" || a.selectors.primary.includes("password")
1945
+ );
1946
+ const hasUsername = typeActions.some(
1947
+ (a) => a.fieldType === "email" || a.fieldType === "text" || a.selectors.primary.includes("email") || a.selectors.primary.includes("username")
1948
+ );
1949
+ return hasPassword && hasUsername;
1950
+ }
1951
+ detectSearch(actions) {
1952
+ return actions.some(
1953
+ (a) => a.type === "type" && (a.selectors.primary.includes("search") || a.element?.placeholder?.toLowerCase().includes("search"))
1954
+ );
1955
+ }
1956
+ };
1957
+
1958
+ // src/recorder/cmd-generator.ts
1959
+ init_esm_shims();
1960
+ var CmdGenerator = class {
1961
+ generate(url, actions, patterns) {
1962
+ const lines = [];
1963
+ lines.push(`GOTO ${url}`);
1964
+ lines.push(`WAITLOAD`);
1965
+ lines.push("");
1966
+ if (patterns.hasLogin) {
1967
+ lines.push("LOGIN DETECTED");
1968
+ }
1969
+ if (patterns.hasSearch) {
1970
+ lines.push("SEARCH DETECTED");
1971
+ }
1972
+ if (patterns.hasFormSubmission) {
1973
+ const form = patterns.hasFormSubmission;
1974
+ lines.push("");
1975
+ form.fields.forEach((field) => {
1976
+ lines.push(`WAIT ${field.selector}`);
1977
+ lines.push(`TYPE ${field.selector} YOUR_${field.type.toUpperCase()}_HERE`);
1978
+ });
1979
+ lines.push(`CLICK ${form.submitButton}`);
1980
+ lines.push("WAITLOAD");
1981
+ }
1982
+ const uniqueActions = this.deduplicateActions(actions);
1983
+ uniqueActions.forEach((action) => {
1984
+ if (action.type === "click") {
1985
+ lines.push(`CLICK ${action.selectors.primary}`);
1986
+ } else if (action.type === "type" && action.value) {
1987
+ lines.push(`TYPE ${action.selectors.primary} ${action.value}`);
1988
+ }
1989
+ });
1990
+ if (patterns.hasPagination) {
1991
+ const pagination = patterns.hasPagination;
1992
+ lines.push("");
1993
+ lines.push(`PAGINATE ${pagination.nextButton}`);
1994
+ }
1995
+ if (patterns.hasInfiniteScroll) {
1996
+ lines.push("");
1997
+ lines.push("INFINITESCROLL");
1998
+ }
1999
+ if (patterns.hasDownloadFlow) {
2000
+ lines.push("");
2001
+ lines.push("DOWNLOAD DETECTED");
2002
+ }
2003
+ return lines.join("\n");
2004
+ }
2005
+ deduplicateActions(actions) {
2006
+ const seen = /* @__PURE__ */ new Set();
2007
+ const unique = [];
2008
+ for (const action of actions) {
2009
+ const key = `${action.type}:${action.selectors.primary}`;
2010
+ if (!seen.has(key)) {
2011
+ seen.add(key);
2012
+ unique.push(action);
2013
+ }
2014
+ }
2015
+ return unique;
2016
+ }
2017
+ };
2018
+
2019
+ // src/recorder/recorder.ts
2020
+ init_logger();
2021
+ import fs3 from "fs";
2022
+ import path4 from "path";
2023
+ var ActionRecorder = class {
2024
+ browser = null;
2025
+ page = null;
2026
+ actions = [];
2027
+ isRecording = false;
2028
+ selectorGen;
2029
+ patternDetector;
2030
+ cmdGenerator;
2031
+ startUrl = "";
2032
+ constructor() {
2033
+ this.selectorGen = new SelectorGenerator();
2034
+ this.patternDetector = new PatternDetector();
2035
+ this.cmdGenerator = new CmdGenerator();
2036
+ }
2037
+ async startRecording(url) {
2038
+ this.startUrl = url;
2039
+ this.actions = [];
2040
+ this.isRecording = true;
2041
+ logger.info("[recorder] Starting recording session...");
2042
+ console.log("\n===========================================");
2043
+ console.log(" RECORDING MODE ACTIVATED");
2044
+ console.log(` URL: ${url}`);
2045
+ console.log(" Perform your actions in the browser...");
2046
+ console.log(" Close the browser when done");
2047
+ console.log("===========================================\n");
2048
+ this.browser = await chromium2.launch({ headless: false });
2049
+ const context = await this.browser.newContext();
2050
+ this.page = await context.newPage();
2051
+ await this.attachListeners(this.page);
2052
+ await this.page.goto(url);
2053
+ await this.page.waitForEvent("close");
2054
+ await this.stopRecording();
2055
+ }
2056
+ async attachListeners(page) {
2057
+ await page.exposeFunction("__recordClick", async (x, y) => {
2058
+ if (!this.isRecording) return;
2059
+ const element = await page.evaluate((coords) => {
2060
+ const el = document.elementFromPoint(coords.x, coords.y);
2061
+ if (!el) return null;
2062
+ return {
2063
+ tagName: el.tagName.toLowerCase(),
2064
+ id: el.id,
2065
+ className: el.className,
2066
+ textContent: el.textContent?.slice(0, 50),
2067
+ href: el.href,
2068
+ type: el.type
2069
+ };
2070
+ }, { x, y });
2071
+ if (element) {
2072
+ const selectors = await this.selectorGen.generate(page, element);
2073
+ this.actions.push({
2074
+ type: "click",
2075
+ selectors,
2076
+ timestamp: Date.now(),
2077
+ element
2078
+ });
2079
+ logger.info(`[recorder] Recorded CLICK on ${selectors.primary}`);
2080
+ }
2081
+ });
2082
+ await page.exposeFunction("__recordType", async (selector, value) => {
2083
+ if (!this.isRecording) return;
2084
+ const element = await page.evaluate((sel) => {
2085
+ const el = document.querySelector(sel);
2086
+ if (!el) return null;
2087
+ return {
2088
+ tagName: el.tagName.toLowerCase(),
2089
+ id: el.id,
2090
+ type: el.type,
2091
+ placeholder: el.placeholder
2092
+ };
2093
+ }, selector);
2094
+ if (element) {
2095
+ const selectors = await this.selectorGen.generate(page, element);
2096
+ this.actions.push({
2097
+ type: "type",
2098
+ selectors,
2099
+ value,
2100
+ timestamp: Date.now(),
2101
+ element,
2102
+ fieldType: element.type
2103
+ });
2104
+ logger.info(`[recorder] Recorded TYPE in ${selectors.primary}: "${value.slice(0, 20)}..."`);
2105
+ }
2106
+ });
2107
+ await page.addInitScript(() => {
2108
+ document.addEventListener("click", (e) => {
2109
+ const target = e.target;
2110
+ if (target) {
2111
+ window.__recordClick(e.clientX, e.clientY);
2112
+ }
2113
+ });
2114
+ document.addEventListener("input", (e) => {
2115
+ const target = e.target;
2116
+ if (target && target.tagName === "INPUT") {
2117
+ setTimeout(() => {
2118
+ window.__recordType(target.id || target.name, target.value);
2119
+ }, 500);
2120
+ }
2121
+ });
2122
+ });
2123
+ }
2124
+ async stopRecording() {
2125
+ this.isRecording = false;
2126
+ logger.info(`[recorder] Recording stopped - ${this.actions.length} actions captured`);
2127
+ const patterns = this.patternDetector.analyze(this.actions);
2128
+ const cmdFile = this.cmdGenerator.generate(this.startUrl, this.actions, patterns);
2129
+ const outputDir = "./commands";
2130
+ if (!fs3.existsSync(outputDir)) {
2131
+ fs3.mkdirSync(outputDir, { recursive: true });
2132
+ }
2133
+ const filename = `recorded-${Date.now()}.cmd`;
2134
+ const filepath = path4.join(outputDir, filename);
2135
+ fs3.writeFileSync(filepath, cmdFile);
2136
+ console.log("\n===========================================");
2137
+ console.log(" RECORDING COMPLETE");
2138
+ console.log(` Saved to: ${filepath}`);
2139
+ console.log(` Actions: ${this.actions.length}`);
2140
+ console.log("===========================================\n");
2141
+ if (this.browser) {
2142
+ await this.browser.close();
2143
+ }
2144
+ }
2145
+ getActions() {
2146
+ return this.actions;
2147
+ }
2148
+ };
2149
+
2150
+ // bin/firekid-scraper.ts
2151
+ init_logger();
2152
+ import { readFileSync } from "fs";
2153
+ import { join } from "path";
2154
+ var packageJson = JSON.parse(
2155
+ readFileSync(join(__dirname, "../../package.json"), "utf-8")
2156
+ );
2157
+ var program = new Command();
2158
+ program.name("firekid-scraper").description("The most advanced web scraping machine ever built").version(packageJson.version);
2159
+ program.option("-u, --url <url>", "URL to scrape").option("-m, --mode <mode>", "Scraping mode (auto, downloader, scrape, navigator)").option("--cmd <file>", "Run command file").option("--record", "Record browser actions").option("--auto", "Use intelligent auto mode").option("--headless", "Run in headless mode").option("--server", "Start API server").option("-p, --port <port>", "API server port", "3000");
2160
+ program.parse();
2161
+ var options = program.opts();
2162
+ async function main() {
2163
+ intro("Firekid Scraper");
2164
+ if (options.server) {
2165
+ const { startServer: startServer2 } = await Promise.resolve().then(() => (init_app(), app_exports));
2166
+ await startServer2(parseInt(options.port, 10));
2167
+ return;
2168
+ }
2169
+ const scraper = new FirekidScraper({
2170
+ headless: options.headless !== false
2171
+ });
2172
+ if (options.record) {
2173
+ const url = options.url || await text({
2174
+ message: "Enter URL to record:",
2175
+ placeholder: "https://example.com"
2176
+ });
2177
+ const s = spinner();
2178
+ s.start("Starting recorder...");
2179
+ const recorder = new ActionRecorder();
2180
+ await recorder.startRecording(url);
2181
+ s.stop("Recording complete!");
2182
+ outro("Command file generated");
2183
+ return;
2184
+ }
2185
+ if (options.cmd) {
2186
+ const s = spinner();
2187
+ s.start(`Running command file: ${options.cmd}`);
2188
+ await scraper.runCommandFile(options.cmd);
2189
+ s.stop("Execution complete!");
2190
+ outro("Done");
2191
+ return;
2192
+ }
2193
+ if (options.auto || options.url) {
2194
+ const url = options.url || await text({
2195
+ message: "Enter URL to scrape:",
2196
+ placeholder: "https://example.com"
2197
+ });
2198
+ const s = spinner();
2199
+ s.start("Scraping...");
2200
+ const result = await scraper.auto(url);
2201
+ s.stop("Scraping complete!");
2202
+ console.log("\nResults:", result);
2203
+ outro("Done");
2204
+ return;
2205
+ }
2206
+ const action = await select({
2207
+ message: "What would you like to do?",
2208
+ options: [
2209
+ { value: "auto", label: "Auto scrape a URL" },
2210
+ { value: "record", label: "Record browser actions" },
2211
+ { value: "command", label: "Run command file" },
2212
+ { value: "server", label: "Start API server" }
2213
+ ]
2214
+ });
2215
+ if (action === "auto") {
2216
+ const url = await text({
2217
+ message: "Enter URL:",
2218
+ placeholder: "https://example.com"
2219
+ });
2220
+ const s = spinner();
2221
+ s.start("Scraping...");
2222
+ const result = await scraper.auto(url);
2223
+ s.stop("Complete!");
2224
+ console.log("\nResults:", result);
2225
+ } else if (action === "record") {
2226
+ const url = await text({
2227
+ message: "Enter URL to record:",
2228
+ placeholder: "https://example.com"
2229
+ });
2230
+ const recorder = new ActionRecorder();
2231
+ await recorder.startRecording(url);
2232
+ } else if (action === "command") {
2233
+ const file = await text({
2234
+ message: "Enter command file path:",
2235
+ placeholder: "commands/mysite.cmd"
2236
+ });
2237
+ await scraper.runCommandFile(file);
2238
+ } else if (action === "server") {
2239
+ const port = await text({
2240
+ message: "Enter port:",
2241
+ placeholder: "3000"
2242
+ });
2243
+ const { startServer: startServer2 } = await Promise.resolve().then(() => (init_app(), app_exports));
2244
+ await startServer2(parseInt(port, 10));
2245
+ }
2246
+ outro("Done");
2247
+ }
2248
+ main().catch((err) => {
2249
+ logger.error("CLI error:", err);
2250
+ process.exit(1);
2251
+ });