@firekid/scraper 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +767 -0
- package/dist/bin/firekid-scraper.cjs +2264 -0
- package/dist/bin/firekid-scraper.d.mts +1 -0
- package/dist/bin/firekid-scraper.d.ts +1 -0
- package/dist/bin/firekid-scraper.js +2251 -0
- package/dist/index.cjs +2145 -0
- package/dist/index.d.mts +366 -0
- package/dist/index.d.ts +366 -0
- package/dist/index.js +2107 -0
- package/package.json +90 -0
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,2145 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __create = Object.create;
|
|
3
|
+
var __defProp = Object.defineProperty;
|
|
4
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
7
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
+
var __esm = (fn, res) => function __init() {
|
|
9
|
+
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
|
|
10
|
+
};
|
|
11
|
+
var __export = (target, all) => {
|
|
12
|
+
for (var name in all)
|
|
13
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
14
|
+
};
|
|
15
|
+
var __copyProps = (to, from, except, desc) => {
|
|
16
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
17
|
+
for (let key of __getOwnPropNames(from))
|
|
18
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
19
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
20
|
+
}
|
|
21
|
+
return to;
|
|
22
|
+
};
|
|
23
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
24
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
25
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
26
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
27
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
28
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
29
|
+
mod
|
|
30
|
+
));
|
|
31
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
32
|
+
|
|
33
|
+
// node_modules/tsup/assets/cjs_shims.js
|
|
34
|
+
var init_cjs_shims = __esm({
|
|
35
|
+
"node_modules/tsup/assets/cjs_shims.js"() {
|
|
36
|
+
"use strict";
|
|
37
|
+
}
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
// src/config.ts
|
|
41
|
+
function getConfig() {
|
|
42
|
+
return config;
|
|
43
|
+
}
|
|
44
|
+
function updateConfig(updates) {
|
|
45
|
+
Object.assign(config, updates);
|
|
46
|
+
}
|
|
47
|
+
var import_dotenv, config;
|
|
48
|
+
var init_config = __esm({
|
|
49
|
+
"src/config.ts"() {
|
|
50
|
+
"use strict";
|
|
51
|
+
init_cjs_shims();
|
|
52
|
+
import_dotenv = __toESM(require("dotenv"));
|
|
53
|
+
import_dotenv.default.config();
|
|
54
|
+
config = {
|
|
55
|
+
browser: {
|
|
56
|
+
headless: process.env.HEADLESS === "true",
|
|
57
|
+
timeout: parseInt(process.env.BROWSER_TIMEOUT || "30000", 10),
|
|
58
|
+
maxWorkers: parseInt(process.env.MAX_QUEUE_WORKERS || "5", 10)
|
|
59
|
+
},
|
|
60
|
+
cloudflare: {
|
|
61
|
+
bypass: process.env.CF_BYPASS || "auto",
|
|
62
|
+
turnstileSolver: process.env.TURNSTILE_SOLVER || "manual"
|
|
63
|
+
},
|
|
64
|
+
captcha: {
|
|
65
|
+
apiKey: process.env.CAPTCHA_API_KEY || ""
|
|
66
|
+
},
|
|
67
|
+
server: {
|
|
68
|
+
enabled: process.env.API_ENABLED === "true",
|
|
69
|
+
port: parseInt(process.env.API_PORT || "3000", 10),
|
|
70
|
+
apiKey: process.env.API_KEY || ""
|
|
71
|
+
},
|
|
72
|
+
proxy: {
|
|
73
|
+
enabled: process.env.PROXY_ENABLED === "true",
|
|
74
|
+
url: process.env.PROXY_URL || ""
|
|
75
|
+
},
|
|
76
|
+
storage: {
|
|
77
|
+
dataDir: process.env.DATA_DIR || "./data",
|
|
78
|
+
patternsDb: process.env.PATTERNS_DB || "./data/patterns.db",
|
|
79
|
+
sessionsDb: process.env.SESSIONS_DB || "./data/sessions.db"
|
|
80
|
+
},
|
|
81
|
+
logging: {
|
|
82
|
+
level: process.env.LOG_LEVEL || "info"
|
|
83
|
+
},
|
|
84
|
+
recording: {
|
|
85
|
+
autoHideAfterSolve: process.env.AUTO_HIDE_AFTER_SOLVE !== "false",
|
|
86
|
+
recordScreenshots: process.env.RECORD_SCREENSHOTS === "true"
|
|
87
|
+
},
|
|
88
|
+
rateLimit: {
|
|
89
|
+
enabled: process.env.RATE_LIMIT_ENABLED !== "false",
|
|
90
|
+
max: parseInt(process.env.RATE_LIMIT_MAX || "100", 10),
|
|
91
|
+
window: parseInt(process.env.RATE_LIMIT_WINDOW || "3600000", 10)
|
|
92
|
+
},
|
|
93
|
+
advanced: {
|
|
94
|
+
enableTelemetry: process.env.ENABLE_TELEMETRY === "true",
|
|
95
|
+
enableAnalytics: process.env.ENABLE_ANALYTICS === "true"
|
|
96
|
+
}
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
// src/logger/logger.ts
|
|
102
|
+
function setLogLevel(level) {
|
|
103
|
+
logger.level = level;
|
|
104
|
+
}
|
|
105
|
+
function step(url, action, meta) {
|
|
106
|
+
logger.info(`[${url}] ${action}`, meta);
|
|
107
|
+
}
|
|
108
|
+
function highlight(url, data) {
|
|
109
|
+
logger.info(`[${url}] EXTRACTED:`, data);
|
|
110
|
+
}
|
|
111
|
+
var import_winston, logger;
|
|
112
|
+
var init_logger = __esm({
|
|
113
|
+
"src/logger/logger.ts"() {
|
|
114
|
+
"use strict";
|
|
115
|
+
init_cjs_shims();
|
|
116
|
+
import_winston = __toESM(require("winston"));
|
|
117
|
+
init_config();
|
|
118
|
+
logger = import_winston.default.createLogger({
|
|
119
|
+
level: config.logging.level,
|
|
120
|
+
format: import_winston.default.format.combine(
|
|
121
|
+
import_winston.default.format.timestamp({ format: "YYYY-MM-DD HH:mm:ss" }),
|
|
122
|
+
import_winston.default.format.errors({ stack: true }),
|
|
123
|
+
import_winston.default.format.splat(),
|
|
124
|
+
import_winston.default.format.printf(({ level, message, timestamp, ...meta }) => {
|
|
125
|
+
let msg = `${timestamp} [${level.toUpperCase()}] ${message}`;
|
|
126
|
+
if (Object.keys(meta).length > 0) {
|
|
127
|
+
msg += ` ${JSON.stringify(meta)}`;
|
|
128
|
+
}
|
|
129
|
+
return msg;
|
|
130
|
+
})
|
|
131
|
+
),
|
|
132
|
+
transports: [
|
|
133
|
+
new import_winston.default.transports.Console({
|
|
134
|
+
format: import_winston.default.format.combine(
|
|
135
|
+
import_winston.default.format.colorize(),
|
|
136
|
+
import_winston.default.format.printf(({ level, message, timestamp }) => {
|
|
137
|
+
return `${timestamp} ${level}: ${message}`;
|
|
138
|
+
})
|
|
139
|
+
)
|
|
140
|
+
}),
|
|
141
|
+
new import_winston.default.transports.File({
|
|
142
|
+
filename: "logs/error.log",
|
|
143
|
+
level: "error"
|
|
144
|
+
}),
|
|
145
|
+
new import_winston.default.transports.File({
|
|
146
|
+
filename: "logs/combined.log"
|
|
147
|
+
})
|
|
148
|
+
]
|
|
149
|
+
});
|
|
150
|
+
}
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
// src/network/smart-fetch.ts
|
|
154
|
+
var import_fs2, import_path2, SmartFetch;
|
|
155
|
+
var init_smart_fetch = __esm({
|
|
156
|
+
"src/network/smart-fetch.ts"() {
|
|
157
|
+
"use strict";
|
|
158
|
+
init_cjs_shims();
|
|
159
|
+
init_logger();
|
|
160
|
+
import_fs2 = __toESM(require("fs"));
|
|
161
|
+
import_path2 = __toESM(require("path"));
|
|
162
|
+
SmartFetch = class {
|
|
163
|
+
pageContext = null;
|
|
164
|
+
lastReferer = "";
|
|
165
|
+
setPageContext(page) {
|
|
166
|
+
this.pageContext = page;
|
|
167
|
+
}
|
|
168
|
+
async fetch(options) {
|
|
169
|
+
const {
|
|
170
|
+
url,
|
|
171
|
+
referer,
|
|
172
|
+
autoReferer = true,
|
|
173
|
+
method = "GET",
|
|
174
|
+
headers = {},
|
|
175
|
+
cookies = {},
|
|
176
|
+
body,
|
|
177
|
+
followRedirects = true,
|
|
178
|
+
timeout = 3e4
|
|
179
|
+
} = options;
|
|
180
|
+
const finalReferer = referer || (autoReferer && this.pageContext ? this.pageContext.url() : this.lastReferer);
|
|
181
|
+
const finalHeaders = {
|
|
182
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
|
|
183
|
+
"Accept": "*/*",
|
|
184
|
+
...headers
|
|
185
|
+
};
|
|
186
|
+
if (finalReferer) {
|
|
187
|
+
finalHeaders["Referer"] = finalReferer;
|
|
188
|
+
logger.info(`[smart-fetch] Auto-Referer: ${finalReferer}`);
|
|
189
|
+
}
|
|
190
|
+
if (Object.keys(cookies).length > 0) {
|
|
191
|
+
const cookieString = Object.entries(cookies).map(([k, v]) => `${k}=${v}`).join("; ");
|
|
192
|
+
finalHeaders["Cookie"] = cookieString;
|
|
193
|
+
}
|
|
194
|
+
logger.info(`[smart-fetch] ${method} ${url}`);
|
|
195
|
+
try {
|
|
196
|
+
const response = await fetch(url, {
|
|
197
|
+
method,
|
|
198
|
+
headers: finalHeaders,
|
|
199
|
+
body: body ? JSON.stringify(body) : void 0,
|
|
200
|
+
redirect: followRedirects ? "follow" : "manual",
|
|
201
|
+
signal: AbortSignal.timeout(timeout)
|
|
202
|
+
});
|
|
203
|
+
const contentType = response.headers.get("content-type") || "";
|
|
204
|
+
let data;
|
|
205
|
+
if (contentType.includes("application/json")) {
|
|
206
|
+
data = await response.json();
|
|
207
|
+
} else if (contentType.includes("text")) {
|
|
208
|
+
data = await response.text();
|
|
209
|
+
} else {
|
|
210
|
+
data = await response.arrayBuffer();
|
|
211
|
+
}
|
|
212
|
+
this.lastReferer = url;
|
|
213
|
+
const headersObj = {};
|
|
214
|
+
response.headers.forEach((value, key) => {
|
|
215
|
+
headersObj[key] = value;
|
|
216
|
+
});
|
|
217
|
+
return {
|
|
218
|
+
status: response.status,
|
|
219
|
+
headers: headersObj,
|
|
220
|
+
data
|
|
221
|
+
};
|
|
222
|
+
} catch (err) {
|
|
223
|
+
logger.error(`[smart-fetch] Failed: ${err.message}`);
|
|
224
|
+
throw err;
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
async download(url, outputPath, referer) {
|
|
228
|
+
logger.info(`[smart-fetch] Downloading ${url} to ${outputPath}`);
|
|
229
|
+
const response = await this.fetch({
|
|
230
|
+
url,
|
|
231
|
+
referer,
|
|
232
|
+
autoReferer: true
|
|
233
|
+
});
|
|
234
|
+
const dir = import_path2.default.dirname(outputPath);
|
|
235
|
+
if (!import_fs2.default.existsSync(dir)) {
|
|
236
|
+
import_fs2.default.mkdirSync(dir, { recursive: true });
|
|
237
|
+
}
|
|
238
|
+
if (response.data instanceof ArrayBuffer) {
|
|
239
|
+
import_fs2.default.writeFileSync(outputPath, Buffer.from(response.data));
|
|
240
|
+
} else if (typeof response.data === "string") {
|
|
241
|
+
import_fs2.default.writeFileSync(outputPath, response.data);
|
|
242
|
+
} else {
|
|
243
|
+
import_fs2.default.writeFileSync(outputPath, JSON.stringify(response.data));
|
|
244
|
+
}
|
|
245
|
+
logger.info(`[smart-fetch] Downloaded to ${outputPath}`);
|
|
246
|
+
}
|
|
247
|
+
getLastReferer() {
|
|
248
|
+
return this.lastReferer;
|
|
249
|
+
}
|
|
250
|
+
};
|
|
251
|
+
}
|
|
252
|
+
});
|
|
253
|
+
|
|
254
|
+
// src/modes/downloader.ts
|
|
255
|
+
var DownloaderMode;
|
|
256
|
+
var init_downloader = __esm({
|
|
257
|
+
"src/modes/downloader.ts"() {
|
|
258
|
+
"use strict";
|
|
259
|
+
init_cjs_shims();
|
|
260
|
+
init_logger();
|
|
261
|
+
init_smart_fetch();
|
|
262
|
+
DownloaderMode = class {
|
|
263
|
+
page;
|
|
264
|
+
smartFetch;
|
|
265
|
+
constructor(page) {
|
|
266
|
+
this.page = page;
|
|
267
|
+
this.smartFetch = new SmartFetch();
|
|
268
|
+
this.smartFetch.setPageContext(page);
|
|
269
|
+
}
|
|
270
|
+
async execute(url) {
|
|
271
|
+
logger.info("[downloader-mode] Analyzing download flow...");
|
|
272
|
+
const flow = await this.detectDownloadFlow();
|
|
273
|
+
if (!flow) {
|
|
274
|
+
return {
|
|
275
|
+
success: false,
|
|
276
|
+
data: {},
|
|
277
|
+
errors: ["No download flow detected"],
|
|
278
|
+
timestamp: Date.now()
|
|
279
|
+
};
|
|
280
|
+
}
|
|
281
|
+
logger.info(`[downloader-mode] Flow type: ${flow.type}`);
|
|
282
|
+
try {
|
|
283
|
+
let downloadedFiles = [];
|
|
284
|
+
if (flow.type === "DIRECT" && flow.links) {
|
|
285
|
+
downloadedFiles = await this.downloadDirectLinks(flow.links);
|
|
286
|
+
} else if (flow.type === "BUTTON_CLICK" && flow.button) {
|
|
287
|
+
downloadedFiles = await this.downloadViaButton(flow.button);
|
|
288
|
+
}
|
|
289
|
+
return {
|
|
290
|
+
success: true,
|
|
291
|
+
data: {
|
|
292
|
+
flow: flow.type,
|
|
293
|
+
files: downloadedFiles
|
|
294
|
+
},
|
|
295
|
+
errors: [],
|
|
296
|
+
timestamp: Date.now()
|
|
297
|
+
};
|
|
298
|
+
} catch (err) {
|
|
299
|
+
return {
|
|
300
|
+
success: false,
|
|
301
|
+
data: {},
|
|
302
|
+
errors: [err.message],
|
|
303
|
+
timestamp: Date.now()
|
|
304
|
+
};
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
async detectDownloadFlow() {
|
|
308
|
+
const directLinks = await this.findDirectLinks();
|
|
309
|
+
if (directLinks.length > 0) {
|
|
310
|
+
return {
|
|
311
|
+
type: "DIRECT",
|
|
312
|
+
steps: ["Found direct download links"],
|
|
313
|
+
links: directLinks
|
|
314
|
+
};
|
|
315
|
+
}
|
|
316
|
+
const downloadButton = await this.findDownloadButton();
|
|
317
|
+
if (downloadButton) {
|
|
318
|
+
return {
|
|
319
|
+
type: "BUTTON_CLICK",
|
|
320
|
+
steps: ["Click download button", "Wait for file"],
|
|
321
|
+
button: downloadButton
|
|
322
|
+
};
|
|
323
|
+
}
|
|
324
|
+
return null;
|
|
325
|
+
}
|
|
326
|
+
async findDirectLinks() {
|
|
327
|
+
return await this.page.evaluate(() => {
|
|
328
|
+
const links = Array.from(document.querySelectorAll("a[href]"));
|
|
329
|
+
const fileLinks = [];
|
|
330
|
+
const fileExtensions = /\.(mp4|mp3|pdf|zip|rar|exe|dmg|apk|avi|mkv|mov|wav|flac)$/i;
|
|
331
|
+
for (const link of links) {
|
|
332
|
+
const href = link.href;
|
|
333
|
+
if (href && fileExtensions.test(href)) {
|
|
334
|
+
const selector = link.id ? `#${link.id}` : `a[href="${href}"]`;
|
|
335
|
+
const ext = href.match(fileExtensions)?.[1];
|
|
336
|
+
fileLinks.push({
|
|
337
|
+
url: href,
|
|
338
|
+
selector,
|
|
339
|
+
extension: ext
|
|
340
|
+
});
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
return fileLinks;
|
|
344
|
+
});
|
|
345
|
+
}
|
|
346
|
+
async findDownloadButton() {
|
|
347
|
+
const buttons = await this.page.evaluate(() => {
|
|
348
|
+
const candidates = Array.from(document.querySelectorAll('button, a, [class*="download"]'));
|
|
349
|
+
const scored = [];
|
|
350
|
+
for (const el of candidates) {
|
|
351
|
+
const text = (el.textContent || "").toLowerCase();
|
|
352
|
+
const className = (el.className || "").toLowerCase();
|
|
353
|
+
const href = el.href;
|
|
354
|
+
let score = 0;
|
|
355
|
+
if (text.includes("download")) score += 10;
|
|
356
|
+
if (className.includes("download")) score += 10;
|
|
357
|
+
if (el.hasAttribute("download")) score += 20;
|
|
358
|
+
if (href && href.includes("download")) score += 5;
|
|
359
|
+
if (text.includes("get")) score += 3;
|
|
360
|
+
if (text.includes("save")) score += 3;
|
|
361
|
+
if (score > 0) {
|
|
362
|
+
const selector = el.id ? `#${el.id}` : el.tagName.toLowerCase();
|
|
363
|
+
scored.push({ selector, score, text, href });
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
return scored.sort((a, b) => b.score - a.score);
|
|
367
|
+
});
|
|
368
|
+
return buttons.length > 0 ? buttons[0] : null;
|
|
369
|
+
}
|
|
370
|
+
async downloadDirectLinks(links) {
|
|
371
|
+
const downloaded = [];
|
|
372
|
+
for (const link of links) {
|
|
373
|
+
const filename = `download-${Date.now()}.${link.extension || "bin"}`;
|
|
374
|
+
const outputPath = `./downloads/${filename}`;
|
|
375
|
+
logger.info(`[downloader-mode] Downloading ${link.url}`);
|
|
376
|
+
await this.smartFetch.download(link.url, outputPath);
|
|
377
|
+
downloaded.push(outputPath);
|
|
378
|
+
}
|
|
379
|
+
return downloaded;
|
|
380
|
+
}
|
|
381
|
+
async downloadViaButton(button) {
|
|
382
|
+
logger.info(`[downloader-mode] Clicking download button: ${button.selector}`);
|
|
383
|
+
const [download] = await Promise.all([
|
|
384
|
+
this.page.waitForEvent("download"),
|
|
385
|
+
this.page.click(button.selector)
|
|
386
|
+
]);
|
|
387
|
+
const filename = download.suggestedFilename();
|
|
388
|
+
const outputPath = `./downloads/${filename}`;
|
|
389
|
+
await download.saveAs(outputPath);
|
|
390
|
+
logger.info(`[downloader-mode] Saved to ${outputPath}`);
|
|
391
|
+
return [outputPath];
|
|
392
|
+
}
|
|
393
|
+
};
|
|
394
|
+
}
|
|
395
|
+
});
|
|
396
|
+
|
|
397
|
+
// src/modes/scrape.ts
|
|
398
|
+
var ScrapeMode;
|
|
399
|
+
var init_scrape = __esm({
|
|
400
|
+
"src/modes/scrape.ts"() {
|
|
401
|
+
"use strict";
|
|
402
|
+
init_cjs_shims();
|
|
403
|
+
init_logger();
|
|
404
|
+
ScrapeMode = class {
|
|
405
|
+
page;
|
|
406
|
+
constructor(page) {
|
|
407
|
+
this.page = page;
|
|
408
|
+
}
|
|
409
|
+
async execute(url) {
|
|
410
|
+
logger.info("[scrape-mode] Extracting content...");
|
|
411
|
+
try {
|
|
412
|
+
const data = await this.extractContent();
|
|
413
|
+
return {
|
|
414
|
+
success: true,
|
|
415
|
+
data,
|
|
416
|
+
errors: [],
|
|
417
|
+
timestamp: Date.now()
|
|
418
|
+
};
|
|
419
|
+
} catch (err) {
|
|
420
|
+
return {
|
|
421
|
+
success: false,
|
|
422
|
+
data: {},
|
|
423
|
+
errors: [err.message],
|
|
424
|
+
timestamp: Date.now()
|
|
425
|
+
};
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
async extractContent() {
|
|
429
|
+
return await this.page.evaluate(() => {
|
|
430
|
+
const result = {};
|
|
431
|
+
const title = document.querySelector("h1")?.textContent?.trim();
|
|
432
|
+
if (title) result.title = title;
|
|
433
|
+
const description = document.querySelector('meta[name="description"]')?.getAttribute("content");
|
|
434
|
+
if (description) result.description = description;
|
|
435
|
+
const images = Array.from(document.querySelectorAll("img[src]")).map((img) => img.src).filter(Boolean);
|
|
436
|
+
if (images.length > 0) result.images = images;
|
|
437
|
+
const links = Array.from(document.querySelectorAll("a[href]")).map((a) => ({
|
|
438
|
+
text: a.textContent?.trim(),
|
|
439
|
+
href: a.href
|
|
440
|
+
})).filter((l) => l.text && l.href);
|
|
441
|
+
if (links.length > 0) result.links = links;
|
|
442
|
+
const paragraphs = Array.from(document.querySelectorAll("p")).map((p) => p.textContent?.trim()).filter(Boolean);
|
|
443
|
+
if (paragraphs.length > 0) result.content = paragraphs;
|
|
444
|
+
return result;
|
|
445
|
+
});
|
|
446
|
+
}
|
|
447
|
+
};
|
|
448
|
+
}
|
|
449
|
+
});
|
|
450
|
+
|
|
451
|
+
// src/modes/navigator.ts
|
|
452
|
+
var NavigatorMode;
|
|
453
|
+
var init_navigator = __esm({
|
|
454
|
+
"src/modes/navigator.ts"() {
|
|
455
|
+
"use strict";
|
|
456
|
+
init_cjs_shims();
|
|
457
|
+
init_logger();
|
|
458
|
+
NavigatorMode = class {
|
|
459
|
+
page;
|
|
460
|
+
constructor(page) {
|
|
461
|
+
this.page = page;
|
|
462
|
+
}
|
|
463
|
+
async execute(url) {
|
|
464
|
+
logger.info("[navigator-mode] Mapping site structure...");
|
|
465
|
+
try {
|
|
466
|
+
const siteMap = await this.buildSiteMap();
|
|
467
|
+
return {
|
|
468
|
+
success: true,
|
|
469
|
+
data: siteMap,
|
|
470
|
+
errors: [],
|
|
471
|
+
timestamp: Date.now()
|
|
472
|
+
};
|
|
473
|
+
} catch (err) {
|
|
474
|
+
return {
|
|
475
|
+
success: false,
|
|
476
|
+
data: {},
|
|
477
|
+
errors: [err.message],
|
|
478
|
+
timestamp: Date.now()
|
|
479
|
+
};
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
async buildSiteMap() {
|
|
483
|
+
const currentUrl = this.page.url();
|
|
484
|
+
const structure = await this.page.evaluate(() => {
|
|
485
|
+
const nav = document.querySelector("nav");
|
|
486
|
+
const menu = document.querySelector('[class*="menu"]');
|
|
487
|
+
const header = document.querySelector("header");
|
|
488
|
+
const navLinks = nav || menu || header;
|
|
489
|
+
const links = [];
|
|
490
|
+
if (navLinks) {
|
|
491
|
+
const anchors = navLinks.querySelectorAll("a[href]");
|
|
492
|
+
for (const a of anchors) {
|
|
493
|
+
const text = a.textContent?.trim();
|
|
494
|
+
const href = a.href;
|
|
495
|
+
if (text && href) {
|
|
496
|
+
links.push({ text, href });
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
return {
|
|
501
|
+
title: document.title,
|
|
502
|
+
url: window.location.href,
|
|
503
|
+
navigation: links,
|
|
504
|
+
sections: Array.from(document.querySelectorAll("section, article")).length
|
|
505
|
+
};
|
|
506
|
+
});
|
|
507
|
+
return structure;
|
|
508
|
+
}
|
|
509
|
+
};
|
|
510
|
+
}
|
|
511
|
+
});
|
|
512
|
+
|
|
513
|
+
// src/modes/auto.ts
|
|
514
|
+
var auto_exports = {};
|
|
515
|
+
__export(auto_exports, {
|
|
516
|
+
AutoMode: () => AutoMode
|
|
517
|
+
});
|
|
518
|
+
var AutoMode;
|
|
519
|
+
var init_auto = __esm({
|
|
520
|
+
"src/modes/auto.ts"() {
|
|
521
|
+
"use strict";
|
|
522
|
+
init_cjs_shims();
|
|
523
|
+
init_logger();
|
|
524
|
+
init_downloader();
|
|
525
|
+
init_scrape();
|
|
526
|
+
init_navigator();
|
|
527
|
+
AutoMode = class {
|
|
528
|
+
page;
|
|
529
|
+
constructor(page) {
|
|
530
|
+
this.page = page;
|
|
531
|
+
}
|
|
532
|
+
async execute(url) {
|
|
533
|
+
logger.info("[auto-mode] Analyzing site...");
|
|
534
|
+
const mode = await this.detectBestMode();
|
|
535
|
+
logger.info(`[auto-mode] Selected mode: ${mode}`);
|
|
536
|
+
let result;
|
|
537
|
+
switch (mode) {
|
|
538
|
+
case "downloader":
|
|
539
|
+
const downloader = new DownloaderMode(this.page);
|
|
540
|
+
result = await downloader.execute(url);
|
|
541
|
+
break;
|
|
542
|
+
case "scrape":
|
|
543
|
+
const scraper = new ScrapeMode(this.page);
|
|
544
|
+
result = await scraper.execute(url);
|
|
545
|
+
break;
|
|
546
|
+
case "navigator":
|
|
547
|
+
const navigator2 = new NavigatorMode(this.page);
|
|
548
|
+
result = await navigator2.execute(url);
|
|
549
|
+
break;
|
|
550
|
+
default:
|
|
551
|
+
result = {
|
|
552
|
+
success: false,
|
|
553
|
+
data: {},
|
|
554
|
+
errors: ["Unknown mode"],
|
|
555
|
+
timestamp: Date.now()
|
|
556
|
+
};
|
|
557
|
+
}
|
|
558
|
+
return result;
|
|
559
|
+
}
|
|
560
|
+
async detectBestMode() {
|
|
561
|
+
const indicators = await this.page.evaluate(() => {
|
|
562
|
+
const hasDownloadButton = !!document.querySelector('a[download], button:has-text("Download")');
|
|
563
|
+
const hasVideoPlayer = !!document.querySelector('video, iframe[src*="youtube"], iframe[src*="vimeo"]');
|
|
564
|
+
const hasFileLinks = !!document.querySelector('a[href$=".mp4"], a[href$=".pdf"], a[href$=".zip"]');
|
|
565
|
+
const hasPagination = !!document.querySelector('.pagination, .next, [class*="page-"]');
|
|
566
|
+
const hasInfiniteScroll = document.body.scrollHeight > window.innerHeight * 3;
|
|
567
|
+
const hasForm = !!document.querySelector("form");
|
|
568
|
+
const hasSearch = !!document.querySelector('input[type="search"], input[placeholder*="search"]');
|
|
569
|
+
return {
|
|
570
|
+
hasDownloadButton,
|
|
571
|
+
hasVideoPlayer,
|
|
572
|
+
hasFileLinks,
|
|
573
|
+
hasPagination,
|
|
574
|
+
hasInfiniteScroll,
|
|
575
|
+
hasForm,
|
|
576
|
+
hasSearch
|
|
577
|
+
};
|
|
578
|
+
});
|
|
579
|
+
if (indicators.hasDownloadButton || indicators.hasVideoPlayer || indicators.hasFileLinks) {
|
|
580
|
+
return "downloader";
|
|
581
|
+
}
|
|
582
|
+
if (indicators.hasPagination || indicators.hasInfiniteScroll) {
|
|
583
|
+
return "scrape";
|
|
584
|
+
}
|
|
585
|
+
return "navigator";
|
|
586
|
+
}
|
|
587
|
+
};
|
|
588
|
+
}
|
|
589
|
+
});
|
|
590
|
+
|
|
591
|
+
// src/index.ts
|
|
592
|
+
var src_exports = {};
|
|
593
|
+
__export(src_exports, {
|
|
594
|
+
ActionRecorder: () => ActionRecorder,
|
|
595
|
+
CloudflareManager: () => CloudflareManager,
|
|
596
|
+
CommandExecutor: () => CommandExecutor,
|
|
597
|
+
CommandParser: () => CommandParser,
|
|
598
|
+
FirekidScraper: () => FirekidScraper,
|
|
599
|
+
PatternCache: () => PatternCache,
|
|
600
|
+
SmartFetch: () => SmartFetch,
|
|
601
|
+
applyGhost: () => applyGhost,
|
|
602
|
+
config: () => config,
|
|
603
|
+
getConfig: () => getConfig,
|
|
604
|
+
getNewSeed: () => getNewSeed,
|
|
605
|
+
getSeedForSite: () => getSeedForSite,
|
|
606
|
+
logger: () => logger,
|
|
607
|
+
setLogLevel: () => setLogLevel,
|
|
608
|
+
updateConfig: () => updateConfig
|
|
609
|
+
});
|
|
610
|
+
module.exports = __toCommonJS(src_exports);
|
|
611
|
+
init_cjs_shims();
|
|
612
|
+
|
|
613
|
+
// src/core/scraper.ts
|
|
614
|
+
init_cjs_shims();
|
|
615
|
+
var import_playwright = require("playwright");
|
|
616
|
+
|
|
617
|
+
// src/ghost/index.ts
|
|
618
|
+
init_cjs_shims();
|
|
619
|
+
|
|
620
|
+
// src/ghost/seed.ts
|
|
621
|
+
init_cjs_shims();
|
|
622
|
+
var import_crypto = __toESM(require("crypto"));
|
|
623
|
+
var seedCache = /* @__PURE__ */ new Map();
|
|
624
|
+
var chromeVersions = [
|
|
625
|
+
"131.0.6778.85",
|
|
626
|
+
"131.0.6778.86",
|
|
627
|
+
"130.0.6723.116",
|
|
628
|
+
"129.0.6668.100"
|
|
629
|
+
];
|
|
630
|
+
var resolutions = [
|
|
631
|
+
{ width: 1920, height: 1080 },
|
|
632
|
+
{ width: 1366, height: 768 },
|
|
633
|
+
{ width: 1536, height: 864 },
|
|
634
|
+
{ width: 1440, height: 900 },
|
|
635
|
+
{ width: 2560, height: 1440 }
|
|
636
|
+
];
|
|
637
|
+
var languages = ["en-US", "en-GB", "en", "es-ES", "fr-FR", "de-DE"];
|
|
638
|
+
var timezones = ["America/New_York", "America/Los_Angeles", "Europe/London", "Europe/Paris"];
|
|
639
|
+
var webglVendors = [
|
|
640
|
+
"Google Inc. (NVIDIA)",
|
|
641
|
+
"Google Inc. (Intel)",
|
|
642
|
+
"Google Inc. (AMD)",
|
|
643
|
+
"Google Inc. (Apple)"
|
|
644
|
+
];
|
|
645
|
+
var webglRenderers = [
|
|
646
|
+
"ANGLE (NVIDIA, NVIDIA GeForce RTX 3070 Direct3D11 vs_5_0 ps_5_0, D3D11)",
|
|
647
|
+
"ANGLE (Intel, Intel(R) UHD Graphics 630 Direct3D11 vs_5_0 ps_5_0, D3D11)",
|
|
648
|
+
"ANGLE (AMD, AMD Radeon RX 580 Direct3D11 vs_5_0 ps_5_0, D3D11)"
|
|
649
|
+
];
|
|
650
|
+
var fontSets = [
|
|
651
|
+
["Arial", "Calibri", "Cambria", "Consolas", "Georgia", "Times New Roman", "Verdana"],
|
|
652
|
+
["Arial", "Helvetica", "Georgia", "Courier New", "Times", "Comic Sans MS"],
|
|
653
|
+
["Arial", "Tahoma", "Trebuchet MS", "Verdana", "Georgia", "Palatino Linotype"]
|
|
654
|
+
];
|
|
655
|
+
function random(array) {
|
|
656
|
+
return array[Math.floor(Math.random() * array.length)];
|
|
657
|
+
}
|
|
658
|
+
function randomRange(min, max) {
|
|
659
|
+
return Math.random() * (max - min) + min;
|
|
660
|
+
}
|
|
661
|
+
function getNewSeed() {
|
|
662
|
+
const resolution = random(resolutions);
|
|
663
|
+
return {
|
|
664
|
+
id: import_crypto.default.randomUUID(),
|
|
665
|
+
chromeVersion: random(chromeVersions),
|
|
666
|
+
screenWidth: resolution.width,
|
|
667
|
+
screenHeight: resolution.height,
|
|
668
|
+
language: random(languages),
|
|
669
|
+
timezone: random(timezones),
|
|
670
|
+
canvasNoise: randomRange(1e-4, 1e-3),
|
|
671
|
+
webglVendor: random(webglVendors),
|
|
672
|
+
webglRenderer: random(webglRenderers),
|
|
673
|
+
audioNoise: randomRange(1e-5, 1e-4),
|
|
674
|
+
fonts: random(fontSets)
|
|
675
|
+
};
|
|
676
|
+
}
|
|
677
|
+
function getSeedForSite(siteHost) {
|
|
678
|
+
if (seedCache.has(siteHost)) {
|
|
679
|
+
return seedCache.get(siteHost);
|
|
680
|
+
}
|
|
681
|
+
const seed = getNewSeed();
|
|
682
|
+
seedCache.set(siteHost, seed);
|
|
683
|
+
return seed;
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
// src/ghost/canvas.ts
|
|
687
|
+
init_cjs_shims();
|
|
688
|
+
async function applyCanvasSpoof(context, seed) {
|
|
689
|
+
await context.addInitScript((noise) => {
|
|
690
|
+
const originalGetImageData = CanvasRenderingContext2D.prototype.getImageData;
|
|
691
|
+
const originalToDataURL = HTMLCanvasElement.prototype.toDataURL;
|
|
692
|
+
const originalToBlob = HTMLCanvasElement.prototype.toBlob;
|
|
693
|
+
CanvasRenderingContext2D.prototype.getImageData = function(...args) {
|
|
694
|
+
const imageData = originalGetImageData.apply(this, args);
|
|
695
|
+
for (let i = 0; i < imageData.data.length; i += 4) {
|
|
696
|
+
imageData.data[i] += Math.floor(noise * 255 * (Math.random() - 0.5));
|
|
697
|
+
imageData.data[i + 1] += Math.floor(noise * 255 * (Math.random() - 0.5));
|
|
698
|
+
imageData.data[i + 2] += Math.floor(noise * 255 * (Math.random() - 0.5));
|
|
699
|
+
}
|
|
700
|
+
return imageData;
|
|
701
|
+
};
|
|
702
|
+
HTMLCanvasElement.prototype.toDataURL = function(...args) {
|
|
703
|
+
const context2 = this.getContext("2d");
|
|
704
|
+
if (context2) {
|
|
705
|
+
const imageData = context2.getImageData(0, 0, this.width, this.height);
|
|
706
|
+
context2.putImageData(imageData, 0, 0);
|
|
707
|
+
}
|
|
708
|
+
return originalToDataURL.apply(this, args);
|
|
709
|
+
};
|
|
710
|
+
HTMLCanvasElement.prototype.toBlob = function(...args) {
|
|
711
|
+
const context2 = this.getContext("2d");
|
|
712
|
+
if (context2) {
|
|
713
|
+
const imageData = context2.getImageData(0, 0, this.width, this.height);
|
|
714
|
+
context2.putImageData(imageData, 0, 0);
|
|
715
|
+
}
|
|
716
|
+
return originalToBlob.apply(this, args);
|
|
717
|
+
};
|
|
718
|
+
}, seed.canvasNoise);
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
// src/ghost/webgl.ts
|
|
722
|
+
init_cjs_shims();
|
|
723
|
+
async function applyWebGLSpoof(context, seed) {
|
|
724
|
+
await context.addInitScript((params) => {
|
|
725
|
+
const { vendor, renderer } = params;
|
|
726
|
+
const getParameterProxyHandler = {
|
|
727
|
+
apply(target, thisArg, args) {
|
|
728
|
+
const param = args[0];
|
|
729
|
+
if (param === 37445) {
|
|
730
|
+
return vendor;
|
|
731
|
+
}
|
|
732
|
+
if (param === 37446) {
|
|
733
|
+
return renderer;
|
|
734
|
+
}
|
|
735
|
+
return Reflect.apply(target, thisArg, args);
|
|
736
|
+
}
|
|
737
|
+
};
|
|
738
|
+
const getExtensionProxyHandler = {
|
|
739
|
+
apply(target, thisArg, args) {
|
|
740
|
+
const result = Reflect.apply(target, thisArg, args);
|
|
741
|
+
if (!result) {
|
|
742
|
+
return result;
|
|
743
|
+
}
|
|
744
|
+
if (args[0] === "WEBGL_debug_renderer_info") {
|
|
745
|
+
const getParameterProxy = new Proxy(result.getParameter, getParameterProxyHandler);
|
|
746
|
+
result.getParameter = getParameterProxy;
|
|
747
|
+
}
|
|
748
|
+
return result;
|
|
749
|
+
}
|
|
750
|
+
};
|
|
751
|
+
WebGLRenderingContext.prototype.getParameter = new Proxy(
|
|
752
|
+
WebGLRenderingContext.prototype.getParameter,
|
|
753
|
+
getParameterProxyHandler
|
|
754
|
+
);
|
|
755
|
+
WebGL2RenderingContext.prototype.getParameter = new Proxy(
|
|
756
|
+
WebGL2RenderingContext.prototype.getParameter,
|
|
757
|
+
getParameterProxyHandler
|
|
758
|
+
);
|
|
759
|
+
WebGLRenderingContext.prototype.getExtension = new Proxy(
|
|
760
|
+
WebGLRenderingContext.prototype.getExtension,
|
|
761
|
+
getExtensionProxyHandler
|
|
762
|
+
);
|
|
763
|
+
WebGL2RenderingContext.prototype.getExtension = new Proxy(
|
|
764
|
+
WebGL2RenderingContext.prototype.getExtension,
|
|
765
|
+
getExtensionProxyHandler
|
|
766
|
+
);
|
|
767
|
+
}, { vendor: seed.webglVendor, renderer: seed.webglRenderer });
|
|
768
|
+
}
|
|
769
|
+
|
|
770
|
+
// src/ghost/audio.ts
|
|
771
|
+
init_cjs_shims();
|
|
772
|
+
async function applyAudioSpoof(context, seed) {
|
|
773
|
+
await context.addInitScript((noise) => {
|
|
774
|
+
const context2 = window.AudioContext || window.webkitAudioContext;
|
|
775
|
+
if (context2) {
|
|
776
|
+
const originalCreateDynamicsCompressor = context2.prototype.createDynamicsCompressor;
|
|
777
|
+
const originalCreateOscillator = context2.prototype.createOscillator;
|
|
778
|
+
context2.prototype.createDynamicsCompressor = function() {
|
|
779
|
+
const compressor = originalCreateDynamicsCompressor.apply(this, arguments);
|
|
780
|
+
if (compressor.reduction) {
|
|
781
|
+
Object.defineProperty(compressor.reduction, "value", {
|
|
782
|
+
get() {
|
|
783
|
+
return this._value + noise * (Math.random() - 0.5);
|
|
784
|
+
},
|
|
785
|
+
set(v) {
|
|
786
|
+
this._value = v;
|
|
787
|
+
}
|
|
788
|
+
});
|
|
789
|
+
}
|
|
790
|
+
return compressor;
|
|
791
|
+
};
|
|
792
|
+
context2.prototype.createOscillator = function() {
|
|
793
|
+
const oscillator = originalCreateOscillator.apply(this, arguments);
|
|
794
|
+
const originalStart = oscillator.start;
|
|
795
|
+
oscillator.start = function() {
|
|
796
|
+
if (oscillator.frequency) {
|
|
797
|
+
oscillator.frequency.value += noise * (Math.random() - 0.5);
|
|
798
|
+
}
|
|
799
|
+
return originalStart.apply(this, arguments);
|
|
800
|
+
};
|
|
801
|
+
return oscillator;
|
|
802
|
+
};
|
|
803
|
+
}
|
|
804
|
+
}, seed.audioNoise);
|
|
805
|
+
}
|
|
806
|
+
|
|
807
|
+
// src/ghost/fonts.ts
|
|
808
|
+
init_cjs_shims();
|
|
809
|
+
async function applyFontSpoof(context, seed) {
|
|
810
|
+
await context.addInitScript((fonts) => {
|
|
811
|
+
const originalGetComputedStyle = window.getComputedStyle;
|
|
812
|
+
window.getComputedStyle = function(element, pseudoElt) {
|
|
813
|
+
const styles = originalGetComputedStyle.call(this, element, pseudoElt);
|
|
814
|
+
const originalGetPropertyValue = styles.getPropertyValue;
|
|
815
|
+
styles.getPropertyValue = function(property) {
|
|
816
|
+
if (property === "font-family") {
|
|
817
|
+
const value = originalGetPropertyValue.call(this, property);
|
|
818
|
+
const families = value.split(",").map((f) => f.trim());
|
|
819
|
+
const filtered = families.filter((family) => {
|
|
820
|
+
const cleanFamily = family.replace(/['"]/g, "");
|
|
821
|
+
return fonts.some((f) => cleanFamily.includes(f));
|
|
822
|
+
});
|
|
823
|
+
return filtered.length > 0 ? filtered.join(", ") : value;
|
|
824
|
+
}
|
|
825
|
+
return originalGetPropertyValue.call(this, property);
|
|
826
|
+
};
|
|
827
|
+
return styles;
|
|
828
|
+
};
|
|
829
|
+
}, seed.fonts);
|
|
830
|
+
}
|
|
831
|
+
|
|
832
|
+
// src/ghost/navigator.ts
|
|
833
|
+
init_cjs_shims();
|
|
834
|
+
async function applyNavigatorSpoof(context, seed) {
|
|
835
|
+
await context.addInitScript((seedData) => {
|
|
836
|
+
Object.defineProperty(navigator, "webdriver", {
|
|
837
|
+
get: () => void 0
|
|
838
|
+
});
|
|
839
|
+
Object.defineProperty(navigator, "plugins", {
|
|
840
|
+
get: () => [1, 2, 3, 4, 5]
|
|
841
|
+
});
|
|
842
|
+
Object.defineProperty(navigator, "languages", {
|
|
843
|
+
get: () => [seedData.language, "en"]
|
|
844
|
+
});
|
|
845
|
+
Object.defineProperty(navigator, "platform", {
|
|
846
|
+
get: () => "Win32"
|
|
847
|
+
});
|
|
848
|
+
Object.defineProperty(navigator, "hardwareConcurrency", {
|
|
849
|
+
get: () => 8
|
|
850
|
+
});
|
|
851
|
+
Object.defineProperty(navigator, "deviceMemory", {
|
|
852
|
+
get: () => 8
|
|
853
|
+
});
|
|
854
|
+
const originalQuery = window.navigator.permissions.query;
|
|
855
|
+
window.navigator.permissions.query = (parameters) => parameters.name === "notifications" ? Promise.resolve({ state: Notification.permission }) : originalQuery(parameters);
|
|
856
|
+
if (window.chrome) {
|
|
857
|
+
delete window.chrome.runtime;
|
|
858
|
+
}
|
|
859
|
+
Object.defineProperty(screen, "width", {
|
|
860
|
+
get: () => seedData.screenWidth
|
|
861
|
+
});
|
|
862
|
+
Object.defineProperty(screen, "height", {
|
|
863
|
+
get: () => seedData.screenHeight
|
|
864
|
+
});
|
|
865
|
+
Object.defineProperty(screen, "availWidth", {
|
|
866
|
+
get: () => seedData.screenWidth
|
|
867
|
+
});
|
|
868
|
+
Object.defineProperty(screen, "availHeight", {
|
|
869
|
+
get: () => seedData.screenHeight - 40
|
|
870
|
+
});
|
|
871
|
+
}, seed);
|
|
872
|
+
}
|
|
873
|
+
|
|
874
|
+
// src/ghost/consistency.ts
|
|
875
|
+
init_cjs_shims();
|
|
876
|
+
function validateConsistency(seed) {
|
|
877
|
+
if (!seed.id || !seed.chromeVersion) {
|
|
878
|
+
throw new Error("Invalid seed: missing required fields");
|
|
879
|
+
}
|
|
880
|
+
if (seed.screenWidth < 800 || seed.screenHeight < 600) {
|
|
881
|
+
throw new Error("Invalid seed: screen resolution too small");
|
|
882
|
+
}
|
|
883
|
+
if (seed.canvasNoise < 0 || seed.canvasNoise > 1) {
|
|
884
|
+
throw new Error("Invalid seed: canvas noise out of range");
|
|
885
|
+
}
|
|
886
|
+
if (seed.audioNoise < 0 || seed.audioNoise > 1) {
|
|
887
|
+
throw new Error("Invalid seed: audio noise out of range");
|
|
888
|
+
}
|
|
889
|
+
if (!seed.fonts || seed.fonts.length === 0) {
|
|
890
|
+
throw new Error("Invalid seed: no fonts specified");
|
|
891
|
+
}
|
|
892
|
+
}
|
|
893
|
+
|
|
894
|
+
// src/ghost/behavior.ts
|
|
895
|
+
init_cjs_shims();
|
|
896
|
+
var HumanBehavior = class {
|
|
897
|
+
seed;
|
|
898
|
+
profile;
|
|
899
|
+
constructor(seed) {
|
|
900
|
+
this.seed = seed;
|
|
901
|
+
this.profile = this.generateProfile();
|
|
902
|
+
}
|
|
903
|
+
generateProfile() {
|
|
904
|
+
return {
|
|
905
|
+
typingSpeed: { min: 50, max: 150 },
|
|
906
|
+
mouseMovements: [],
|
|
907
|
+
scrollPatterns: [],
|
|
908
|
+
pauseDistribution: [],
|
|
909
|
+
clickTiming: []
|
|
910
|
+
};
|
|
911
|
+
}
|
|
912
|
+
async randomDelay(min = 500, max = 2e3) {
|
|
913
|
+
const delay = Math.random() * (max - min) + min;
|
|
914
|
+
await new Promise((r) => setTimeout(r, delay));
|
|
915
|
+
}
|
|
916
|
+
async humanClick(page, selector) {
|
|
917
|
+
const element = await page.locator(selector);
|
|
918
|
+
const box = await element.boundingBox();
|
|
919
|
+
if (box) {
|
|
920
|
+
const x = box.x + Math.random() * box.width;
|
|
921
|
+
const y = box.y + Math.random() * box.height;
|
|
922
|
+
await page.mouse.move(x, y, { steps: 10 });
|
|
923
|
+
await this.randomDelay(100, 300);
|
|
924
|
+
await page.mouse.click(x, y);
|
|
925
|
+
}
|
|
926
|
+
}
|
|
927
|
+
async humanType(page, selector, text) {
|
|
928
|
+
await page.focus(selector);
|
|
929
|
+
for (const char of text) {
|
|
930
|
+
await page.keyboard.type(char);
|
|
931
|
+
await this.randomDelay(
|
|
932
|
+
this.profile.typingSpeed.min,
|
|
933
|
+
this.profile.typingSpeed.max
|
|
934
|
+
);
|
|
935
|
+
}
|
|
936
|
+
}
|
|
937
|
+
async randomScroll(page) {
|
|
938
|
+
const scrolls = Math.floor(Math.random() * 3) + 1;
|
|
939
|
+
for (let i = 0; i < scrolls; i++) {
|
|
940
|
+
const scrollY = Math.random() * 500;
|
|
941
|
+
await page.evaluate((y) => window.scrollBy(0, y), scrollY);
|
|
942
|
+
await this.randomDelay(500, 1e3);
|
|
943
|
+
}
|
|
944
|
+
}
|
|
945
|
+
async randomMouseMovement(page) {
|
|
946
|
+
const x = Math.random() * 1920;
|
|
947
|
+
const y = Math.random() * 1080;
|
|
948
|
+
await page.mouse.move(x, y, { steps: 20 });
|
|
949
|
+
await this.randomDelay(200, 500);
|
|
950
|
+
}
|
|
951
|
+
getProfile() {
|
|
952
|
+
return this.profile;
|
|
953
|
+
}
|
|
954
|
+
};
|
|
955
|
+
|
|
956
|
+
// src/ghost/index.ts
|
|
957
|
+
init_logger();
|
|
958
|
+
async function applyGhost(context, options = {}) {
|
|
959
|
+
const seed = options.fresh || !options.siteHost ? getNewSeed() : getSeedForSite(options.siteHost);
|
|
960
|
+
validateConsistency(seed);
|
|
961
|
+
logger.info(`[ghost] Applying identity seed: ${seed.id.slice(0, 8)}... | Chrome ${seed.chromeVersion} | ${seed.screenWidth}x${seed.screenHeight}`);
|
|
962
|
+
await applyCanvasSpoof(context, seed);
|
|
963
|
+
await applyWebGLSpoof(context, seed);
|
|
964
|
+
await applyAudioSpoof(context, seed);
|
|
965
|
+
await applyFontSpoof(context, seed);
|
|
966
|
+
await applyNavigatorSpoof(context, seed);
|
|
967
|
+
await context.setExtraHTTPHeaders({
|
|
968
|
+
"Accept-Language": `${seed.language},en;q=0.9`,
|
|
969
|
+
"sec-ch-ua": `"Chromium";v="${seed.chromeVersion.split(".")[0]}", "Google Chrome";v="${seed.chromeVersion.split(".")[0]}", "Not-A.Brand";v="99"`,
|
|
970
|
+
"sec-ch-ua-mobile": "?0",
|
|
971
|
+
"sec-ch-ua-platform": '"Windows"'
|
|
972
|
+
});
|
|
973
|
+
logger.info("[ghost] All fingerprint spoofs applied");
|
|
974
|
+
return new HumanBehavior(seed);
|
|
975
|
+
}
|
|
976
|
+
|
|
977
|
+
// src/cloudflare/cloudflare.ts
|
|
978
|
+
init_cjs_shims();
|
|
979
|
+
init_logger();
|
|
980
|
+
var CloudflareManager = class {
|
|
981
|
+
async detect(page) {
|
|
982
|
+
const url = page.url();
|
|
983
|
+
try {
|
|
984
|
+
const title = await page.title();
|
|
985
|
+
const content = await page.content();
|
|
986
|
+
const cfIndicators = [
|
|
987
|
+
title.includes("Just a moment"),
|
|
988
|
+
title.includes("Attention Required"),
|
|
989
|
+
content.includes("cf-browser-verification"),
|
|
990
|
+
content.includes("cloudflare"),
|
|
991
|
+
content.includes("cf_chl_opt"),
|
|
992
|
+
content.includes("__cf_bm"),
|
|
993
|
+
content.includes("Ray ID")
|
|
994
|
+
];
|
|
995
|
+
const detected = cfIndicators.some(Boolean);
|
|
996
|
+
if (detected) {
|
|
997
|
+
logger.warn(`[cloudflare] CF protection detected on ${url}`);
|
|
998
|
+
}
|
|
999
|
+
return detected;
|
|
1000
|
+
} catch {
|
|
1001
|
+
return false;
|
|
1002
|
+
}
|
|
1003
|
+
}
|
|
1004
|
+
async waitForClearance(page, timeoutMs = 3e4) {
|
|
1005
|
+
logger.info("[cloudflare] Waiting for CF challenge to resolve...");
|
|
1006
|
+
const start = Date.now();
|
|
1007
|
+
while (Date.now() - start < timeoutMs) {
|
|
1008
|
+
const title = await page.title().catch(() => "");
|
|
1009
|
+
const isCFPage = title.includes("Just a moment") || title.includes("Attention Required");
|
|
1010
|
+
if (!isCFPage) {
|
|
1011
|
+
logger.info("[cloudflare] CF challenge cleared");
|
|
1012
|
+
return true;
|
|
1013
|
+
}
|
|
1014
|
+
await page.waitForTimeout(1e3);
|
|
1015
|
+
}
|
|
1016
|
+
logger.error("[cloudflare] CF challenge timeout - could not clear in time");
|
|
1017
|
+
return false;
|
|
1018
|
+
}
|
|
1019
|
+
async extractTokens(context) {
|
|
1020
|
+
const cookies = await context.cookies();
|
|
1021
|
+
const tokens = {};
|
|
1022
|
+
for (const cookie of cookies) {
|
|
1023
|
+
if (cookie.name === "cf_clearance") tokens.cfClearance = cookie.value;
|
|
1024
|
+
if (cookie.name === "__cf_bm") tokens.cfBm = cookie.value;
|
|
1025
|
+
}
|
|
1026
|
+
if (tokens.cfClearance) {
|
|
1027
|
+
logger.info(`[cloudflare] Captured cf_clearance: ${tokens.cfClearance.slice(0, 20)}...`);
|
|
1028
|
+
}
|
|
1029
|
+
return tokens;
|
|
1030
|
+
}
|
|
1031
|
+
async detectWAF(page) {
|
|
1032
|
+
const content = await page.content().catch(() => "");
|
|
1033
|
+
if (content.includes("cloudflare") || content.includes("cf-ray")) return "Cloudflare";
|
|
1034
|
+
if (content.includes("akamai") || content.includes("ak_bmsc")) return "Akamai";
|
|
1035
|
+
if (content.includes("sucuri")) return "Sucuri";
|
|
1036
|
+
if (content.includes("incapsula")) return "Imperva/Incapsula";
|
|
1037
|
+
if (content.includes("distil")) return "Distil Networks";
|
|
1038
|
+
return null;
|
|
1039
|
+
}
|
|
1040
|
+
async handleCloudflare(page, url) {
|
|
1041
|
+
const isProtected = await this.detect(page);
|
|
1042
|
+
if (!isProtected) return true;
|
|
1043
|
+
logger.info("[cloudflare] Cloudflare detected");
|
|
1044
|
+
const cleared = await this.waitForClearance(page);
|
|
1045
|
+
if (cleared) {
|
|
1046
|
+
logger.info("[cloudflare] JS challenge auto-cleared");
|
|
1047
|
+
return true;
|
|
1048
|
+
}
|
|
1049
|
+
const hasTurnstile = await this.detectTurnstile(page);
|
|
1050
|
+
if (hasTurnstile) {
|
|
1051
|
+
logger.info("[cloudflare] Turnstile CAPTCHA detected - opening browser for manual solve");
|
|
1052
|
+
return await this.handleTurnstile(page, url);
|
|
1053
|
+
}
|
|
1054
|
+
logger.warn("[cloudflare] Unknown Cloudflare challenge");
|
|
1055
|
+
return false;
|
|
1056
|
+
}
|
|
1057
|
+
async detectTurnstile(page) {
|
|
1058
|
+
const turnstileFrame = await page.locator('iframe[src*="challenges.cloudflare.com"]').count();
|
|
1059
|
+
const turnstileDiv = await page.locator('[id*="turnstile"]').count();
|
|
1060
|
+
return turnstileFrame > 0 || turnstileDiv > 0;
|
|
1061
|
+
}
|
|
1062
|
+
async handleTurnstile(page, url) {
|
|
1063
|
+
logger.info("[cloudflare] Waiting for manual Turnstile solve...");
|
|
1064
|
+
console.log("\n===========================================");
|
|
1065
|
+
console.log(" PLEASE SOLVE THE CAPTCHA");
|
|
1066
|
+
console.log(" Waiting for you to complete it...");
|
|
1067
|
+
console.log("===========================================\n");
|
|
1068
|
+
await this.waitForTurnstileSolved(page);
|
|
1069
|
+
logger.info("[cloudflare] CAPTCHA solved! Continuing...");
|
|
1070
|
+
return true;
|
|
1071
|
+
}
|
|
1072
|
+
async isTurnstileSolved(page) {
|
|
1073
|
+
try {
|
|
1074
|
+
const turnstileExists = await page.locator('iframe[src*="challenges.cloudflare.com"]').count();
|
|
1075
|
+
if (turnstileExists === 0) return true;
|
|
1076
|
+
const hasToken = await page.evaluate(() => {
|
|
1077
|
+
const input = document.querySelector('input[name="cf-turnstile-response"]');
|
|
1078
|
+
return input && input.value !== "";
|
|
1079
|
+
});
|
|
1080
|
+
if (hasToken) return true;
|
|
1081
|
+
const contentVisible = await page.evaluate(() => {
|
|
1082
|
+
const body = document.body;
|
|
1083
|
+
return body && !body.classList.contains("no-scroll");
|
|
1084
|
+
});
|
|
1085
|
+
return contentVisible;
|
|
1086
|
+
} catch {
|
|
1087
|
+
return false;
|
|
1088
|
+
}
|
|
1089
|
+
}
|
|
1090
|
+
async waitForTurnstileSolved(page) {
|
|
1091
|
+
while (true) {
|
|
1092
|
+
const solved = await this.isTurnstileSolved(page);
|
|
1093
|
+
if (solved) {
|
|
1094
|
+
await page.waitForTimeout(2e3);
|
|
1095
|
+
return;
|
|
1096
|
+
}
|
|
1097
|
+
await page.waitForTimeout(1e3);
|
|
1098
|
+
}
|
|
1099
|
+
}
|
|
1100
|
+
};
|
|
1101
|
+
|
|
1102
|
+
// src/engine/cmd-parser.ts
|
|
1103
|
+
init_cjs_shims();
|
|
1104
|
+
var import_fs = __toESM(require("fs"));
|
|
1105
|
+
var import_path = __toESM(require("path"));
|
|
1106
|
+
init_logger();
|
|
1107
|
+
var CommandParser = class {
|
|
1108
|
+
variables = {};
|
|
1109
|
+
setVariable(key, value) {
|
|
1110
|
+
this.variables[key] = value;
|
|
1111
|
+
}
|
|
1112
|
+
resolve(text) {
|
|
1113
|
+
return text.replace(/\{\{(\w+)\}\}/g, (_, key) => {
|
|
1114
|
+
return this.variables[key] ?? `{{${key}}}`;
|
|
1115
|
+
});
|
|
1116
|
+
}
|
|
1117
|
+
parseLine(line, lineNum) {
|
|
1118
|
+
if (line.trim().startsWith("//")) return null;
|
|
1119
|
+
const parts = line.trim().split(/\s+/);
|
|
1120
|
+
const action = parts[0].toUpperCase();
|
|
1121
|
+
const args = parts.slice(1).map((a) => this.resolve(a));
|
|
1122
|
+
const validActions = [
|
|
1123
|
+
"GOTO",
|
|
1124
|
+
"BACK",
|
|
1125
|
+
"FORWARD",
|
|
1126
|
+
"REFRESH",
|
|
1127
|
+
"CLICK",
|
|
1128
|
+
"TYPE",
|
|
1129
|
+
"PRESS",
|
|
1130
|
+
"SELECT",
|
|
1131
|
+
"CHECK",
|
|
1132
|
+
"UPLOAD",
|
|
1133
|
+
"WAIT",
|
|
1134
|
+
"WAITLOAD",
|
|
1135
|
+
"SCROLL",
|
|
1136
|
+
"SCROLLDOWN",
|
|
1137
|
+
"SCAN",
|
|
1138
|
+
"EXTRACT",
|
|
1139
|
+
"SCREENSHOT",
|
|
1140
|
+
"PAGINATE",
|
|
1141
|
+
"INFINITESCROLL",
|
|
1142
|
+
"FETCH",
|
|
1143
|
+
"DOWNLOAD",
|
|
1144
|
+
"REFERER",
|
|
1145
|
+
"BYPASS_CLOUDFLARE",
|
|
1146
|
+
"REPEAT",
|
|
1147
|
+
"IF",
|
|
1148
|
+
"LOOP"
|
|
1149
|
+
];
|
|
1150
|
+
if (!validActions.includes(action)) {
|
|
1151
|
+
logger.warn(`Unknown action "${action}" at line ${lineNum} - skipping`);
|
|
1152
|
+
return null;
|
|
1153
|
+
}
|
|
1154
|
+
return { action, args, line: lineNum };
|
|
1155
|
+
}
|
|
1156
|
+
parse(content, filePath = "unknown") {
|
|
1157
|
+
const lines = content.split("\n");
|
|
1158
|
+
const steps = [];
|
|
1159
|
+
let i = 0;
|
|
1160
|
+
while (i < lines.length) {
|
|
1161
|
+
const raw = lines[i];
|
|
1162
|
+
const lineNum = i + 1;
|
|
1163
|
+
if (raw.trim().startsWith("//")) {
|
|
1164
|
+
i++;
|
|
1165
|
+
continue;
|
|
1166
|
+
}
|
|
1167
|
+
const trimmed = raw.trimEnd();
|
|
1168
|
+
if (!trimmed.trim()) {
|
|
1169
|
+
i++;
|
|
1170
|
+
continue;
|
|
1171
|
+
}
|
|
1172
|
+
const indent = raw.match(/^(\s*)/)?.[1].length ?? 0;
|
|
1173
|
+
if (indent === 0) {
|
|
1174
|
+
const step2 = this.parseLine(trimmed, lineNum);
|
|
1175
|
+
if (step2) {
|
|
1176
|
+
if (step2.action === "REPEAT" || step2.action === "IF" || step2.action === "LOOP") {
|
|
1177
|
+
step2.children = [];
|
|
1178
|
+
i++;
|
|
1179
|
+
while (i < lines.length) {
|
|
1180
|
+
const childRaw = lines[i];
|
|
1181
|
+
const childIndent = childRaw.match(/^(\s*)/)?.[1].length ?? 0;
|
|
1182
|
+
if (childIndent === 0) break;
|
|
1183
|
+
const childStep = this.parseLine(childRaw.trim(), i + 1);
|
|
1184
|
+
if (childStep) step2.children.push(childStep);
|
|
1185
|
+
i++;
|
|
1186
|
+
}
|
|
1187
|
+
} else {
|
|
1188
|
+
i++;
|
|
1189
|
+
}
|
|
1190
|
+
steps.push(step2);
|
|
1191
|
+
} else {
|
|
1192
|
+
i++;
|
|
1193
|
+
}
|
|
1194
|
+
} else {
|
|
1195
|
+
i++;
|
|
1196
|
+
}
|
|
1197
|
+
}
|
|
1198
|
+
const site = import_path.default.basename(filePath, ".cmd");
|
|
1199
|
+
logger.info(`Parsed ${steps.length} steps from ${filePath}`);
|
|
1200
|
+
return { site, steps, raw: content };
|
|
1201
|
+
}
|
|
1202
|
+
load(filePath) {
|
|
1203
|
+
if (!import_fs.default.existsSync(filePath)) {
|
|
1204
|
+
throw new Error(`File not found: ${filePath}`);
|
|
1205
|
+
}
|
|
1206
|
+
const content = import_fs.default.readFileSync(filePath, "utf8");
|
|
1207
|
+
return this.parse(content, filePath);
|
|
1208
|
+
}
|
|
1209
|
+
findAll(dir = "./commands") {
|
|
1210
|
+
if (!import_fs.default.existsSync(dir)) return [];
|
|
1211
|
+
return import_fs.default.readdirSync(dir).filter((f) => f.endsWith(".cmd")).map((f) => import_path.default.join(dir, f));
|
|
1212
|
+
}
|
|
1213
|
+
};
|
|
1214
|
+
|
|
1215
|
+
// src/engine/cmd-executor.ts
|
|
1216
|
+
init_cjs_shims();
|
|
1217
|
+
init_logger();
|
|
1218
|
+
init_config();
|
|
1219
|
+
init_smart_fetch();
|
|
1220
|
+
var CommandExecutor = class {
|
|
1221
|
+
page;
|
|
1222
|
+
url;
|
|
1223
|
+
result = {
|
|
1224
|
+
success: false,
|
|
1225
|
+
skipped: [],
|
|
1226
|
+
extracted: [],
|
|
1227
|
+
errors: []
|
|
1228
|
+
};
|
|
1229
|
+
smartFetch;
|
|
1230
|
+
cfManager;
|
|
1231
|
+
variables = {};
|
|
1232
|
+
constructor(page, url) {
|
|
1233
|
+
this.page = page;
|
|
1234
|
+
this.url = url;
|
|
1235
|
+
this.smartFetch = new SmartFetch();
|
|
1236
|
+
this.smartFetch.setPageContext(page);
|
|
1237
|
+
this.cfManager = new CloudflareManager();
|
|
1238
|
+
}
|
|
1239
|
+
async execute(cmd) {
|
|
1240
|
+
logger.info(`Executing ${cmd.site}.cmd - ${cmd.steps.length} steps`);
|
|
1241
|
+
for (const step2 of cmd.steps) {
|
|
1242
|
+
await this.runStep(step2);
|
|
1243
|
+
}
|
|
1244
|
+
if (this.result.errors.length > 0) {
|
|
1245
|
+
logger.warn(`Completed with ${this.result.errors.length} skipped steps`);
|
|
1246
|
+
for (const err of this.result.errors) {
|
|
1247
|
+
logger.warn(` Line ${err.line} - ${err.action}: ${err.error}`);
|
|
1248
|
+
}
|
|
1249
|
+
}
|
|
1250
|
+
this.result.success = true;
|
|
1251
|
+
return this.result;
|
|
1252
|
+
}
|
|
1253
|
+
async runStep(cmdStep) {
|
|
1254
|
+
const { action, args, line } = cmdStep;
|
|
1255
|
+
step(this.url, `${action} ${args.join(" ")}`, { mode: "cmd", step: action });
|
|
1256
|
+
try {
|
|
1257
|
+
switch (action) {
|
|
1258
|
+
case "GOTO":
|
|
1259
|
+
await this.goto(args);
|
|
1260
|
+
break;
|
|
1261
|
+
case "BACK":
|
|
1262
|
+
await this.page.goBack();
|
|
1263
|
+
break;
|
|
1264
|
+
case "FORWARD":
|
|
1265
|
+
await this.page.goForward();
|
|
1266
|
+
break;
|
|
1267
|
+
case "REFRESH":
|
|
1268
|
+
await this.page.reload();
|
|
1269
|
+
break;
|
|
1270
|
+
case "CLICK":
|
|
1271
|
+
await this.click(args);
|
|
1272
|
+
break;
|
|
1273
|
+
case "TYPE":
|
|
1274
|
+
await this.type(args);
|
|
1275
|
+
break;
|
|
1276
|
+
case "PRESS":
|
|
1277
|
+
await this.press(args);
|
|
1278
|
+
break;
|
|
1279
|
+
case "SELECT":
|
|
1280
|
+
await this.select(args);
|
|
1281
|
+
break;
|
|
1282
|
+
case "CHECK":
|
|
1283
|
+
await this.check(args);
|
|
1284
|
+
break;
|
|
1285
|
+
case "UPLOAD":
|
|
1286
|
+
await this.upload(args);
|
|
1287
|
+
break;
|
|
1288
|
+
case "WAIT":
|
|
1289
|
+
await this.wait(args);
|
|
1290
|
+
break;
|
|
1291
|
+
case "WAITLOAD":
|
|
1292
|
+
await this.page.waitForLoadState("networkidle");
|
|
1293
|
+
break;
|
|
1294
|
+
case "SCROLL":
|
|
1295
|
+
await this.scroll(args);
|
|
1296
|
+
break;
|
|
1297
|
+
case "SCROLLDOWN":
|
|
1298
|
+
await this.scrollDown(args);
|
|
1299
|
+
break;
|
|
1300
|
+
case "SCAN":
|
|
1301
|
+
await this.scan(args);
|
|
1302
|
+
break;
|
|
1303
|
+
case "EXTRACT":
|
|
1304
|
+
await this.extract(args);
|
|
1305
|
+
break;
|
|
1306
|
+
case "SCREENSHOT":
|
|
1307
|
+
await this.screenshot(args);
|
|
1308
|
+
break;
|
|
1309
|
+
case "PAGINATE":
|
|
1310
|
+
await this.paginate(args);
|
|
1311
|
+
break;
|
|
1312
|
+
case "INFINITESCROLL":
|
|
1313
|
+
await this.infiniteScroll();
|
|
1314
|
+
break;
|
|
1315
|
+
case "FETCH":
|
|
1316
|
+
await this.fetch(args);
|
|
1317
|
+
break;
|
|
1318
|
+
case "DOWNLOAD":
|
|
1319
|
+
await this.download(args);
|
|
1320
|
+
break;
|
|
1321
|
+
case "REFERER":
|
|
1322
|
+
await this.setReferer(args);
|
|
1323
|
+
break;
|
|
1324
|
+
case "BYPASS_CLOUDFLARE":
|
|
1325
|
+
await this.bypassCloudflare(args);
|
|
1326
|
+
break;
|
|
1327
|
+
case "REPEAT":
|
|
1328
|
+
await this.repeat(cmdStep);
|
|
1329
|
+
break;
|
|
1330
|
+
case "IF":
|
|
1331
|
+
await this.conditional(cmdStep);
|
|
1332
|
+
break;
|
|
1333
|
+
case "LOOP":
|
|
1334
|
+
await this.loop(cmdStep);
|
|
1335
|
+
break;
|
|
1336
|
+
}
|
|
1337
|
+
} catch (err) {
|
|
1338
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
1339
|
+
logger.warn(`Line ${line} SKIPPED - ${action}: ${msg}`);
|
|
1340
|
+
this.result.errors.push({ line, action, error: msg });
|
|
1341
|
+
this.result.skipped.push(`Line ${line}: ${action} ${args.join(" ")}`);
|
|
1342
|
+
}
|
|
1343
|
+
}
|
|
1344
|
+
async goto(args) {
|
|
1345
|
+
const url = args[0];
|
|
1346
|
+
if (!url) throw new Error("GOTO requires a URL");
|
|
1347
|
+
await this.page.goto(url, { waitUntil: "domcontentloaded", timeout: config.browser.timeout });
|
|
1348
|
+
step(this.url, `GOTO ${url}`, { mode: "cmd", step: "GOTO", url });
|
|
1349
|
+
}
|
|
1350
|
+
async click(args) {
|
|
1351
|
+
const selector = args[0];
|
|
1352
|
+
if (!selector) throw new Error("CLICK requires a selector");
|
|
1353
|
+
await this.page.waitForSelector(selector, { timeout: 1e4 });
|
|
1354
|
+
await this.page.click(selector);
|
|
1355
|
+
}
|
|
1356
|
+
async type(args) {
|
|
1357
|
+
const selector = args[0];
|
|
1358
|
+
const text = args.slice(1).join(" ");
|
|
1359
|
+
if (!selector) throw new Error("TYPE requires a selector");
|
|
1360
|
+
if (!text) throw new Error("TYPE requires text");
|
|
1361
|
+
await this.page.waitForSelector(selector, { timeout: 1e4 });
|
|
1362
|
+
await this.page.fill(selector, text);
|
|
1363
|
+
}
|
|
1364
|
+
async press(args) {
|
|
1365
|
+
const key = args[0];
|
|
1366
|
+
if (!key) throw new Error("PRESS requires a key");
|
|
1367
|
+
await this.page.keyboard.press(key);
|
|
1368
|
+
}
|
|
1369
|
+
async select(args) {
|
|
1370
|
+
const selector = args[0];
|
|
1371
|
+
const value = args[1];
|
|
1372
|
+
if (!selector) throw new Error("SELECT requires a selector");
|
|
1373
|
+
if (!value) throw new Error("SELECT requires a value");
|
|
1374
|
+
await this.page.selectOption(selector, value);
|
|
1375
|
+
}
|
|
1376
|
+
async check(args) {
|
|
1377
|
+
const selector = args[0];
|
|
1378
|
+
if (!selector) throw new Error("CHECK requires a selector");
|
|
1379
|
+
await this.page.check(selector);
|
|
1380
|
+
}
|
|
1381
|
+
async upload(args) {
|
|
1382
|
+
const selector = args[0];
|
|
1383
|
+
const filePath = args[1];
|
|
1384
|
+
if (!selector) throw new Error("UPLOAD requires a selector");
|
|
1385
|
+
if (!filePath) throw new Error("UPLOAD requires a file path");
|
|
1386
|
+
await this.page.setInputFiles(selector, filePath);
|
|
1387
|
+
}
|
|
1388
|
+
async wait(args) {
|
|
1389
|
+
const target = args[0];
|
|
1390
|
+
if (!target) throw new Error("WAIT requires a selector or ms value");
|
|
1391
|
+
if (/^\d+$/.test(target)) {
|
|
1392
|
+
await this.page.waitForTimeout(parseInt(target, 10));
|
|
1393
|
+
} else {
|
|
1394
|
+
await this.page.waitForSelector(target, { timeout: config.browser.timeout });
|
|
1395
|
+
}
|
|
1396
|
+
}
|
|
1397
|
+
async scroll(args) {
|
|
1398
|
+
const selector = args[0];
|
|
1399
|
+
if (!selector) throw new Error("SCROLL requires a selector");
|
|
1400
|
+
await this.page.locator(selector).scrollIntoViewIfNeeded();
|
|
1401
|
+
}
|
|
1402
|
+
async scrollDown(args) {
|
|
1403
|
+
const pixels = parseInt(args[0] || "500", 10);
|
|
1404
|
+
await this.page.evaluate((px) => window.scrollBy(0, px), pixels);
|
|
1405
|
+
}
|
|
1406
|
+
async scan(args) {
|
|
1407
|
+
const selector = args[0];
|
|
1408
|
+
if (!selector) throw new Error("SCAN requires a selector");
|
|
1409
|
+
const elements = await this.page.$$(selector);
|
|
1410
|
+
const found = [];
|
|
1411
|
+
for (const el of elements) {
|
|
1412
|
+
const tag = await el.evaluate((e) => e.tagName.toLowerCase());
|
|
1413
|
+
const text = await el.textContent();
|
|
1414
|
+
const href = await el.getAttribute("href");
|
|
1415
|
+
const src = await el.getAttribute("src");
|
|
1416
|
+
const id = await el.getAttribute("id");
|
|
1417
|
+
const cls = await el.getAttribute("class");
|
|
1418
|
+
found.push({ tag, text: text?.trim().slice(0, 100), href, src, id, class: cls });
|
|
1419
|
+
}
|
|
1420
|
+
this.result.extracted.push({ type: "scan", selector, count: found.length, found });
|
|
1421
|
+
highlight(this.url, { type: "scan", selector, count: found.length });
|
|
1422
|
+
step(this.url, `SCAN found ${found.length} elements matching "${selector}"`, { mode: "cmd", step: "SCAN" });
|
|
1423
|
+
}
|
|
1424
|
+
async extract(args) {
|
|
1425
|
+
const selector = args[0];
|
|
1426
|
+
const attr = args[1] || "text";
|
|
1427
|
+
if (!selector) throw new Error("EXTRACT requires a selector");
|
|
1428
|
+
const elements = await this.page.$$(selector);
|
|
1429
|
+
const data = [];
|
|
1430
|
+
for (const el of elements) {
|
|
1431
|
+
if (attr === "text") {
|
|
1432
|
+
const text = await el.textContent();
|
|
1433
|
+
data.push(text?.trim());
|
|
1434
|
+
} else {
|
|
1435
|
+
const value = await el.getAttribute(attr);
|
|
1436
|
+
data.push(value);
|
|
1437
|
+
}
|
|
1438
|
+
}
|
|
1439
|
+
this.result.extracted.push({ selector, attr, count: data.length, data });
|
|
1440
|
+
highlight(this.url, { selector, attr, count: data.length });
|
|
1441
|
+
}
|
|
1442
|
+
async screenshot(args) {
|
|
1443
|
+
const path5 = args[0] || `screenshot-${Date.now()}.png`;
|
|
1444
|
+
await this.page.screenshot({ path: path5, fullPage: true });
|
|
1445
|
+
logger.info(`Screenshot saved: ${path5}`);
|
|
1446
|
+
}
|
|
1447
|
+
async paginate(args) {
|
|
1448
|
+
const selector = args[0];
|
|
1449
|
+
if (!selector) throw new Error("PAGINATE requires a selector");
|
|
1450
|
+
let page = 1;
|
|
1451
|
+
while (true) {
|
|
1452
|
+
try {
|
|
1453
|
+
await this.page.waitForSelector(selector, { timeout: 5e3 });
|
|
1454
|
+
logger.info(`Clicking next page (${page})`);
|
|
1455
|
+
await this.page.click(selector);
|
|
1456
|
+
await this.page.waitForLoadState("networkidle");
|
|
1457
|
+
page++;
|
|
1458
|
+
} catch {
|
|
1459
|
+
logger.info(`Pagination complete - ${page} pages`);
|
|
1460
|
+
break;
|
|
1461
|
+
}
|
|
1462
|
+
}
|
|
1463
|
+
}
|
|
1464
|
+
async infiniteScroll() {
|
|
1465
|
+
let previousHeight = 0;
|
|
1466
|
+
let attempts = 0;
|
|
1467
|
+
const maxAttempts = 50;
|
|
1468
|
+
while (attempts < maxAttempts) {
|
|
1469
|
+
const currentHeight = await this.page.evaluate(() => document.body.scrollHeight);
|
|
1470
|
+
if (currentHeight === previousHeight) {
|
|
1471
|
+
break;
|
|
1472
|
+
}
|
|
1473
|
+
await this.page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
|
1474
|
+
await this.page.waitForTimeout(1e3);
|
|
1475
|
+
previousHeight = currentHeight;
|
|
1476
|
+
attempts++;
|
|
1477
|
+
}
|
|
1478
|
+
logger.info(`Infinite scroll complete - ${attempts} scrolls`);
|
|
1479
|
+
}
|
|
1480
|
+
async fetch(args) {
|
|
1481
|
+
const url = args[0];
|
|
1482
|
+
const varName = args[1];
|
|
1483
|
+
if (!url) throw new Error("FETCH requires a URL");
|
|
1484
|
+
const response = await this.smartFetch.fetch({
|
|
1485
|
+
url,
|
|
1486
|
+
autoReferer: true
|
|
1487
|
+
});
|
|
1488
|
+
if (varName) {
|
|
1489
|
+
this.variables[varName] = response.data;
|
|
1490
|
+
logger.info(`Saved response to variable: ${varName}`);
|
|
1491
|
+
}
|
|
1492
|
+
this.result.extracted.push({
|
|
1493
|
+
type: "fetch",
|
|
1494
|
+
url,
|
|
1495
|
+
status: response.status,
|
|
1496
|
+
data: response.data
|
|
1497
|
+
});
|
|
1498
|
+
}
|
|
1499
|
+
async download(args) {
|
|
1500
|
+
const url = args[0];
|
|
1501
|
+
const outputPath = args[1] || `./downloads/${Date.now()}.bin`;
|
|
1502
|
+
const referer = args[2];
|
|
1503
|
+
if (!url) throw new Error("DOWNLOAD requires a URL");
|
|
1504
|
+
logger.info(`Downloading: ${url}`);
|
|
1505
|
+
await this.smartFetch.download(url, outputPath, referer);
|
|
1506
|
+
this.result.extracted.push({
|
|
1507
|
+
type: "download",
|
|
1508
|
+
url,
|
|
1509
|
+
path: outputPath
|
|
1510
|
+
});
|
|
1511
|
+
}
|
|
1512
|
+
async setReferer(args) {
|
|
1513
|
+
const referer = args[0];
|
|
1514
|
+
if (!referer) throw new Error("REFERER requires a URL");
|
|
1515
|
+
logger.info(`Set manual Referer: ${referer}`);
|
|
1516
|
+
}
|
|
1517
|
+
async bypassCloudflare(args) {
|
|
1518
|
+
const mode = args[0] || "auto";
|
|
1519
|
+
await this.cfManager.handleCloudflare(this.page, this.url);
|
|
1520
|
+
}
|
|
1521
|
+
async repeat(stepCmd) {
|
|
1522
|
+
const selector = stepCmd.args[0];
|
|
1523
|
+
if (!selector) throw new Error("REPEAT requires a selector");
|
|
1524
|
+
if (!stepCmd.children?.length) throw new Error("REPEAT has no child commands");
|
|
1525
|
+
const elements = await this.page.$$(selector);
|
|
1526
|
+
step(this.url, `REPEAT ${elements.length}x over "${selector}"`, { mode: "cmd", step: "REPEAT" });
|
|
1527
|
+
for (let i = 0; i < elements.length; i++) {
|
|
1528
|
+
step(this.url, ` REPEAT iteration ${i + 1}/${elements.length}`, { mode: "cmd" });
|
|
1529
|
+
for (const child of stepCmd.children) {
|
|
1530
|
+
await this.runStep(child);
|
|
1531
|
+
}
|
|
1532
|
+
}
|
|
1533
|
+
}
|
|
1534
|
+
async conditional(stepCmd) {
|
|
1535
|
+
const selector = stepCmd.args[0];
|
|
1536
|
+
if (!selector) throw new Error("IF requires a selector");
|
|
1537
|
+
if (!stepCmd.children?.length) throw new Error("IF has no child commands");
|
|
1538
|
+
const exists = await this.page.locator(selector).count() > 0;
|
|
1539
|
+
if (exists && stepCmd.children) {
|
|
1540
|
+
for (const child of stepCmd.children) {
|
|
1541
|
+
await this.runStep(child);
|
|
1542
|
+
}
|
|
1543
|
+
}
|
|
1544
|
+
}
|
|
1545
|
+
async loop(stepCmd) {
|
|
1546
|
+
const count = parseInt(stepCmd.args[0] || "1", 10);
|
|
1547
|
+
if (!stepCmd.children?.length) throw new Error("LOOP has no child commands");
|
|
1548
|
+
for (let i = 0; i < count; i++) {
|
|
1549
|
+
step(this.url, ` LOOP iteration ${i + 1}/${count}`, { mode: "cmd" });
|
|
1550
|
+
for (const child of stepCmd.children) {
|
|
1551
|
+
await this.runStep(child);
|
|
1552
|
+
}
|
|
1553
|
+
}
|
|
1554
|
+
}
|
|
1555
|
+
};
|
|
1556
|
+
|
|
1557
|
+
// src/core/scraper.ts
|
|
1558
|
+
init_logger();
|
|
1559
|
+
init_config();
|
|
1560
|
+
var FirekidScraper = class {
|
|
1561
|
+
config;
|
|
1562
|
+
browser = null;
|
|
1563
|
+
context = null;
|
|
1564
|
+
page = null;
|
|
1565
|
+
cfManager;
|
|
1566
|
+
constructor(userConfig = {}) {
|
|
1567
|
+
this.config = {
|
|
1568
|
+
headless: userConfig.headless ?? config.browser.headless,
|
|
1569
|
+
bypassCloudflare: userConfig.bypassCloudflare ?? true,
|
|
1570
|
+
maxWorkers: userConfig.maxWorkers ?? config.browser.maxWorkers,
|
|
1571
|
+
timeout: userConfig.timeout ?? config.browser.timeout,
|
|
1572
|
+
dataDir: userConfig.dataDir ?? config.storage.dataDir,
|
|
1573
|
+
logLevel: userConfig.logLevel ?? config.logging.level
|
|
1574
|
+
};
|
|
1575
|
+
this.cfManager = new CloudflareManager();
|
|
1576
|
+
}
|
|
1577
|
+
async init() {
|
|
1578
|
+
if (this.browser) return;
|
|
1579
|
+
logger.info("Initializing Firekid Scraper...");
|
|
1580
|
+
this.browser = await import_playwright.chromium.launch({
|
|
1581
|
+
headless: this.config.headless,
|
|
1582
|
+
args: [
|
|
1583
|
+
"--disable-blink-features=AutomationControlled",
|
|
1584
|
+
"--no-sandbox"
|
|
1585
|
+
]
|
|
1586
|
+
});
|
|
1587
|
+
this.context = await this.browser.newContext({
|
|
1588
|
+
viewport: { width: 1920, height: 1080 },
|
|
1589
|
+
userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
|
1590
|
+
});
|
|
1591
|
+
await applyGhost(this.context);
|
|
1592
|
+
this.page = await this.context.newPage();
|
|
1593
|
+
logger.info("Firekid Scraper initialized");
|
|
1594
|
+
}
|
|
1595
|
+
async goto(url) {
|
|
1596
|
+
await this.init();
|
|
1597
|
+
if (!this.page) throw new Error("Page not initialized");
|
|
1598
|
+
logger.info(`Navigating to ${url}`);
|
|
1599
|
+
await this.page.goto(url, {
|
|
1600
|
+
waitUntil: "domcontentloaded",
|
|
1601
|
+
timeout: this.config.timeout
|
|
1602
|
+
});
|
|
1603
|
+
if (this.config.bypassCloudflare) {
|
|
1604
|
+
await this.cfManager.handleCloudflare(this.page, url);
|
|
1605
|
+
}
|
|
1606
|
+
}
|
|
1607
|
+
async extract(url, selectors) {
|
|
1608
|
+
await this.goto(url);
|
|
1609
|
+
if (!this.page) throw new Error("Page not initialized");
|
|
1610
|
+
const data = {};
|
|
1611
|
+
for (const [key, selector] of Object.entries(selectors)) {
|
|
1612
|
+
try {
|
|
1613
|
+
const element = await this.page.locator(selector).first();
|
|
1614
|
+
const text = await element.textContent();
|
|
1615
|
+
data[key] = text?.trim() || null;
|
|
1616
|
+
} catch (err) {
|
|
1617
|
+
logger.warn(`Failed to extract ${key} with selector ${selector}`);
|
|
1618
|
+
data[key] = null;
|
|
1619
|
+
}
|
|
1620
|
+
}
|
|
1621
|
+
return data;
|
|
1622
|
+
}
|
|
1623
|
+
async auto(url) {
|
|
1624
|
+
await this.goto(url);
|
|
1625
|
+
if (!this.page) throw new Error("Page not initialized");
|
|
1626
|
+
logger.info("Running auto mode...");
|
|
1627
|
+
const { AutoMode: AutoMode2 } = await Promise.resolve().then(() => (init_auto(), auto_exports));
|
|
1628
|
+
const autoMode = new AutoMode2(this.page);
|
|
1629
|
+
return await autoMode.execute(url);
|
|
1630
|
+
}
|
|
1631
|
+
async runCommandFile(filePath) {
|
|
1632
|
+
await this.init();
|
|
1633
|
+
if (!this.page) throw new Error("Page not initialized");
|
|
1634
|
+
const parser = new CommandParser();
|
|
1635
|
+
const cmdFile = parser.load(filePath);
|
|
1636
|
+
logger.info(`Executing command file: ${cmdFile.site}`);
|
|
1637
|
+
const executor = new CommandExecutor(this.page, cmdFile.steps[0]?.args[0] || "");
|
|
1638
|
+
const result = await executor.execute(cmdFile);
|
|
1639
|
+
return {
|
|
1640
|
+
success: result.success,
|
|
1641
|
+
data: result.extracted,
|
|
1642
|
+
errors: result.errors.map((e) => e.error),
|
|
1643
|
+
timestamp: Date.now()
|
|
1644
|
+
};
|
|
1645
|
+
}
|
|
1646
|
+
async close() {
|
|
1647
|
+
if (this.page) await this.page.close();
|
|
1648
|
+
if (this.context) await this.context.close();
|
|
1649
|
+
if (this.browser) await this.browser.close();
|
|
1650
|
+
this.page = null;
|
|
1651
|
+
this.context = null;
|
|
1652
|
+
this.browser = null;
|
|
1653
|
+
logger.info("Firekid Scraper closed");
|
|
1654
|
+
}
|
|
1655
|
+
getPage() {
|
|
1656
|
+
return this.page;
|
|
1657
|
+
}
|
|
1658
|
+
getBrowser() {
|
|
1659
|
+
return this.browser;
|
|
1660
|
+
}
|
|
1661
|
+
getContext() {
|
|
1662
|
+
return this.context;
|
|
1663
|
+
}
|
|
1664
|
+
};
|
|
1665
|
+
|
|
1666
|
+
// src/index.ts
|
|
1667
|
+
init_smart_fetch();
|
|
1668
|
+
|
|
1669
|
+
// src/recorder/recorder.ts
|
|
1670
|
+
init_cjs_shims();
|
|
1671
|
+
var import_playwright2 = require("playwright");
|
|
1672
|
+
|
|
1673
|
+
// src/recorder/selector-generator.ts
|
|
1674
|
+
init_cjs_shims();
|
|
1675
|
+
var SelectorGenerator = class {
|
|
1676
|
+
async generate(page, element) {
|
|
1677
|
+
const selectors = [];
|
|
1678
|
+
if (element.id) {
|
|
1679
|
+
selectors.push(`#${element.id}`);
|
|
1680
|
+
}
|
|
1681
|
+
if (element.className && typeof element.className === "string") {
|
|
1682
|
+
const classes = element.className.split(" ").filter(Boolean);
|
|
1683
|
+
if (classes.length > 0) {
|
|
1684
|
+
selectors.push(`.${classes.join(".")}`);
|
|
1685
|
+
}
|
|
1686
|
+
}
|
|
1687
|
+
if (element.tagName) {
|
|
1688
|
+
const tag = element.tagName.toLowerCase();
|
|
1689
|
+
if (element.type) {
|
|
1690
|
+
selectors.push(`${tag}[type="${element.type}"]`);
|
|
1691
|
+
}
|
|
1692
|
+
if (element.href) {
|
|
1693
|
+
selectors.push(`${tag}[href*="${this.simplifyUrl(element.href)}"]`);
|
|
1694
|
+
}
|
|
1695
|
+
if (element.textContent) {
|
|
1696
|
+
const text = element.textContent.trim().slice(0, 30);
|
|
1697
|
+
if (text) {
|
|
1698
|
+
selectors.push(`${tag}:has-text("${text}")`);
|
|
1699
|
+
}
|
|
1700
|
+
}
|
|
1701
|
+
}
|
|
1702
|
+
const primary = selectors[0] || "body";
|
|
1703
|
+
const fallbacks = selectors.slice(1);
|
|
1704
|
+
return { primary, fallbacks };
|
|
1705
|
+
}
|
|
1706
|
+
simplifyUrl(url) {
|
|
1707
|
+
try {
|
|
1708
|
+
const parsed = new URL(url);
|
|
1709
|
+
return parsed.pathname;
|
|
1710
|
+
} catch {
|
|
1711
|
+
return url;
|
|
1712
|
+
}
|
|
1713
|
+
}
|
|
1714
|
+
async generateCssPath(page, element) {
|
|
1715
|
+
return await page.evaluate((el) => {
|
|
1716
|
+
const path5 = [];
|
|
1717
|
+
let current = el;
|
|
1718
|
+
while (current && current.nodeType === Node.ELEMENT_NODE) {
|
|
1719
|
+
let selector = current.nodeName.toLowerCase();
|
|
1720
|
+
if (current.id) {
|
|
1721
|
+
selector += `#${current.id}`;
|
|
1722
|
+
path5.unshift(selector);
|
|
1723
|
+
break;
|
|
1724
|
+
} else {
|
|
1725
|
+
let sibling = current;
|
|
1726
|
+
let nth = 1;
|
|
1727
|
+
while (sibling.previousElementSibling) {
|
|
1728
|
+
sibling = sibling.previousElementSibling;
|
|
1729
|
+
if (sibling.nodeName === current.nodeName) {
|
|
1730
|
+
nth++;
|
|
1731
|
+
}
|
|
1732
|
+
}
|
|
1733
|
+
if (nth > 1) {
|
|
1734
|
+
selector += `:nth-of-type(${nth})`;
|
|
1735
|
+
}
|
|
1736
|
+
}
|
|
1737
|
+
path5.unshift(selector);
|
|
1738
|
+
current = current.parentNode;
|
|
1739
|
+
}
|
|
1740
|
+
return path5.join(" > ");
|
|
1741
|
+
}, element);
|
|
1742
|
+
}
|
|
1743
|
+
};
|
|
1744
|
+
|
|
1745
|
+
// src/recorder/pattern-detector.ts
|
|
1746
|
+
init_cjs_shims();
|
|
1747
|
+
init_logger();
|
|
1748
|
+
var PatternDetector = class {
|
|
1749
|
+
analyze(actions) {
|
|
1750
|
+
const patterns = {};
|
|
1751
|
+
patterns.hasFormSubmission = this.detectFormSubmission(actions);
|
|
1752
|
+
patterns.hasPagination = this.detectPagination(actions);
|
|
1753
|
+
patterns.hasInfiniteScroll = this.detectInfiniteScroll(actions);
|
|
1754
|
+
patterns.hasDownloadFlow = this.detectDownloadFlow(actions);
|
|
1755
|
+
patterns.hasLogin = this.detectLogin(actions);
|
|
1756
|
+
patterns.hasSearch = this.detectSearch(actions);
|
|
1757
|
+
logger.info("[pattern-detector] Detected patterns:", patterns);
|
|
1758
|
+
return patterns;
|
|
1759
|
+
}
|
|
1760
|
+
detectFormSubmission(actions) {
|
|
1761
|
+
const typeActions = actions.filter((a) => a.type === "type");
|
|
1762
|
+
const clickActions = actions.filter((a) => a.type === "click");
|
|
1763
|
+
if (typeActions.length < 2) return null;
|
|
1764
|
+
const fields = typeActions.map((action) => ({
|
|
1765
|
+
selector: action.selectors.primary,
|
|
1766
|
+
type: action.fieldType || "text",
|
|
1767
|
+
placeholder: action.element?.placeholder || ""
|
|
1768
|
+
}));
|
|
1769
|
+
const submitButton = clickActions.find(
|
|
1770
|
+
(a) => a.element?.textContent?.toLowerCase().includes("submit") || a.element?.textContent?.toLowerCase().includes("login") || a.element?.type === "submit"
|
|
1771
|
+
);
|
|
1772
|
+
if (!submitButton) return null;
|
|
1773
|
+
return {
|
|
1774
|
+
type: "FORM_SUBMISSION",
|
|
1775
|
+
fields,
|
|
1776
|
+
submitButton: submitButton.selectors.primary
|
|
1777
|
+
};
|
|
1778
|
+
}
|
|
1779
|
+
detectPagination(actions) {
|
|
1780
|
+
const clickActions = actions.filter((a) => a.type === "click");
|
|
1781
|
+
const nextClicks = clickActions.filter(
|
|
1782
|
+
(a) => a.element?.textContent?.toLowerCase().includes("next") || a.element?.className?.toLowerCase().includes("next") || a.element?.href?.includes("page")
|
|
1783
|
+
);
|
|
1784
|
+
if (nextClicks.length < 2) return null;
|
|
1785
|
+
const firstNext = nextClicks[0];
|
|
1786
|
+
const sameSelector = nextClicks.every(
|
|
1787
|
+
(a) => a.selectors.primary === firstNext.selectors.primary
|
|
1788
|
+
);
|
|
1789
|
+
if (sameSelector) {
|
|
1790
|
+
return {
|
|
1791
|
+
type: "PAGINATION",
|
|
1792
|
+
nextButton: firstNext.selectors.primary,
|
|
1793
|
+
timesClicked: nextClicks.length
|
|
1794
|
+
};
|
|
1795
|
+
}
|
|
1796
|
+
return null;
|
|
1797
|
+
}
|
|
1798
|
+
detectInfiniteScroll(actions) {
|
|
1799
|
+
const scrollActions = actions.filter((a) => a.type === "scroll");
|
|
1800
|
+
if (scrollActions.length > 5) {
|
|
1801
|
+
return {
|
|
1802
|
+
type: "INFINITE_SCROLL",
|
|
1803
|
+
totalScrolls: scrollActions.length
|
|
1804
|
+
};
|
|
1805
|
+
}
|
|
1806
|
+
return null;
|
|
1807
|
+
}
|
|
1808
|
+
detectDownloadFlow(actions) {
|
|
1809
|
+
return actions.some(
|
|
1810
|
+
(a) => a.element?.textContent?.toLowerCase().includes("download") || a.element?.href?.includes("download") || a.element?.href?.match(/\.(mp4|mp3|pdf|zip|rar)$/i)
|
|
1811
|
+
);
|
|
1812
|
+
}
|
|
1813
|
+
detectLogin(actions) {
|
|
1814
|
+
const typeActions = actions.filter((a) => a.type === "type");
|
|
1815
|
+
const hasPassword = typeActions.some(
|
|
1816
|
+
(a) => a.fieldType === "password" || a.selectors.primary.includes("password")
|
|
1817
|
+
);
|
|
1818
|
+
const hasUsername = typeActions.some(
|
|
1819
|
+
(a) => a.fieldType === "email" || a.fieldType === "text" || a.selectors.primary.includes("email") || a.selectors.primary.includes("username")
|
|
1820
|
+
);
|
|
1821
|
+
return hasPassword && hasUsername;
|
|
1822
|
+
}
|
|
1823
|
+
detectSearch(actions) {
|
|
1824
|
+
return actions.some(
|
|
1825
|
+
(a) => a.type === "type" && (a.selectors.primary.includes("search") || a.element?.placeholder?.toLowerCase().includes("search"))
|
|
1826
|
+
);
|
|
1827
|
+
}
|
|
1828
|
+
};
|
|
1829
|
+
|
|
1830
|
+
// src/recorder/cmd-generator.ts
|
|
1831
|
+
init_cjs_shims();
|
|
1832
|
+
var CmdGenerator = class {
|
|
1833
|
+
generate(url, actions, patterns) {
|
|
1834
|
+
const lines = [];
|
|
1835
|
+
lines.push(`GOTO ${url}`);
|
|
1836
|
+
lines.push(`WAITLOAD`);
|
|
1837
|
+
lines.push("");
|
|
1838
|
+
if (patterns.hasLogin) {
|
|
1839
|
+
lines.push("LOGIN DETECTED");
|
|
1840
|
+
}
|
|
1841
|
+
if (patterns.hasSearch) {
|
|
1842
|
+
lines.push("SEARCH DETECTED");
|
|
1843
|
+
}
|
|
1844
|
+
if (patterns.hasFormSubmission) {
|
|
1845
|
+
const form = patterns.hasFormSubmission;
|
|
1846
|
+
lines.push("");
|
|
1847
|
+
form.fields.forEach((field) => {
|
|
1848
|
+
lines.push(`WAIT ${field.selector}`);
|
|
1849
|
+
lines.push(`TYPE ${field.selector} YOUR_${field.type.toUpperCase()}_HERE`);
|
|
1850
|
+
});
|
|
1851
|
+
lines.push(`CLICK ${form.submitButton}`);
|
|
1852
|
+
lines.push("WAITLOAD");
|
|
1853
|
+
}
|
|
1854
|
+
const uniqueActions = this.deduplicateActions(actions);
|
|
1855
|
+
uniqueActions.forEach((action) => {
|
|
1856
|
+
if (action.type === "click") {
|
|
1857
|
+
lines.push(`CLICK ${action.selectors.primary}`);
|
|
1858
|
+
} else if (action.type === "type" && action.value) {
|
|
1859
|
+
lines.push(`TYPE ${action.selectors.primary} ${action.value}`);
|
|
1860
|
+
}
|
|
1861
|
+
});
|
|
1862
|
+
if (patterns.hasPagination) {
|
|
1863
|
+
const pagination = patterns.hasPagination;
|
|
1864
|
+
lines.push("");
|
|
1865
|
+
lines.push(`PAGINATE ${pagination.nextButton}`);
|
|
1866
|
+
}
|
|
1867
|
+
if (patterns.hasInfiniteScroll) {
|
|
1868
|
+
lines.push("");
|
|
1869
|
+
lines.push("INFINITESCROLL");
|
|
1870
|
+
}
|
|
1871
|
+
if (patterns.hasDownloadFlow) {
|
|
1872
|
+
lines.push("");
|
|
1873
|
+
lines.push("DOWNLOAD DETECTED");
|
|
1874
|
+
}
|
|
1875
|
+
return lines.join("\n");
|
|
1876
|
+
}
|
|
1877
|
+
deduplicateActions(actions) {
|
|
1878
|
+
const seen = /* @__PURE__ */ new Set();
|
|
1879
|
+
const unique = [];
|
|
1880
|
+
for (const action of actions) {
|
|
1881
|
+
const key = `${action.type}:${action.selectors.primary}`;
|
|
1882
|
+
if (!seen.has(key)) {
|
|
1883
|
+
seen.add(key);
|
|
1884
|
+
unique.push(action);
|
|
1885
|
+
}
|
|
1886
|
+
}
|
|
1887
|
+
return unique;
|
|
1888
|
+
}
|
|
1889
|
+
};
|
|
1890
|
+
|
|
1891
|
+
// src/recorder/recorder.ts
|
|
1892
|
+
init_logger();
|
|
1893
|
+
var import_fs3 = __toESM(require("fs"));
|
|
1894
|
+
var import_path3 = __toESM(require("path"));
|
|
1895
|
+
var ActionRecorder = class {
|
|
1896
|
+
browser = null;
|
|
1897
|
+
page = null;
|
|
1898
|
+
actions = [];
|
|
1899
|
+
isRecording = false;
|
|
1900
|
+
selectorGen;
|
|
1901
|
+
patternDetector;
|
|
1902
|
+
cmdGenerator;
|
|
1903
|
+
startUrl = "";
|
|
1904
|
+
constructor() {
|
|
1905
|
+
this.selectorGen = new SelectorGenerator();
|
|
1906
|
+
this.patternDetector = new PatternDetector();
|
|
1907
|
+
this.cmdGenerator = new CmdGenerator();
|
|
1908
|
+
}
|
|
1909
|
+
async startRecording(url) {
|
|
1910
|
+
this.startUrl = url;
|
|
1911
|
+
this.actions = [];
|
|
1912
|
+
this.isRecording = true;
|
|
1913
|
+
logger.info("[recorder] Starting recording session...");
|
|
1914
|
+
console.log("\n===========================================");
|
|
1915
|
+
console.log(" RECORDING MODE ACTIVATED");
|
|
1916
|
+
console.log(` URL: ${url}`);
|
|
1917
|
+
console.log(" Perform your actions in the browser...");
|
|
1918
|
+
console.log(" Close the browser when done");
|
|
1919
|
+
console.log("===========================================\n");
|
|
1920
|
+
this.browser = await import_playwright2.chromium.launch({ headless: false });
|
|
1921
|
+
const context = await this.browser.newContext();
|
|
1922
|
+
this.page = await context.newPage();
|
|
1923
|
+
await this.attachListeners(this.page);
|
|
1924
|
+
await this.page.goto(url);
|
|
1925
|
+
await this.page.waitForEvent("close");
|
|
1926
|
+
await this.stopRecording();
|
|
1927
|
+
}
|
|
1928
|
+
async attachListeners(page) {
|
|
1929
|
+
await page.exposeFunction("__recordClick", async (x, y) => {
|
|
1930
|
+
if (!this.isRecording) return;
|
|
1931
|
+
const element = await page.evaluate((coords) => {
|
|
1932
|
+
const el = document.elementFromPoint(coords.x, coords.y);
|
|
1933
|
+
if (!el) return null;
|
|
1934
|
+
return {
|
|
1935
|
+
tagName: el.tagName.toLowerCase(),
|
|
1936
|
+
id: el.id,
|
|
1937
|
+
className: el.className,
|
|
1938
|
+
textContent: el.textContent?.slice(0, 50),
|
|
1939
|
+
href: el.href,
|
|
1940
|
+
type: el.type
|
|
1941
|
+
};
|
|
1942
|
+
}, { x, y });
|
|
1943
|
+
if (element) {
|
|
1944
|
+
const selectors = await this.selectorGen.generate(page, element);
|
|
1945
|
+
this.actions.push({
|
|
1946
|
+
type: "click",
|
|
1947
|
+
selectors,
|
|
1948
|
+
timestamp: Date.now(),
|
|
1949
|
+
element
|
|
1950
|
+
});
|
|
1951
|
+
logger.info(`[recorder] Recorded CLICK on ${selectors.primary}`);
|
|
1952
|
+
}
|
|
1953
|
+
});
|
|
1954
|
+
await page.exposeFunction("__recordType", async (selector, value) => {
|
|
1955
|
+
if (!this.isRecording) return;
|
|
1956
|
+
const element = await page.evaluate((sel) => {
|
|
1957
|
+
const el = document.querySelector(sel);
|
|
1958
|
+
if (!el) return null;
|
|
1959
|
+
return {
|
|
1960
|
+
tagName: el.tagName.toLowerCase(),
|
|
1961
|
+
id: el.id,
|
|
1962
|
+
type: el.type,
|
|
1963
|
+
placeholder: el.placeholder
|
|
1964
|
+
};
|
|
1965
|
+
}, selector);
|
|
1966
|
+
if (element) {
|
|
1967
|
+
const selectors = await this.selectorGen.generate(page, element);
|
|
1968
|
+
this.actions.push({
|
|
1969
|
+
type: "type",
|
|
1970
|
+
selectors,
|
|
1971
|
+
value,
|
|
1972
|
+
timestamp: Date.now(),
|
|
1973
|
+
element,
|
|
1974
|
+
fieldType: element.type
|
|
1975
|
+
});
|
|
1976
|
+
logger.info(`[recorder] Recorded TYPE in ${selectors.primary}: "${value.slice(0, 20)}..."`);
|
|
1977
|
+
}
|
|
1978
|
+
});
|
|
1979
|
+
await page.addInitScript(() => {
|
|
1980
|
+
document.addEventListener("click", (e) => {
|
|
1981
|
+
const target = e.target;
|
|
1982
|
+
if (target) {
|
|
1983
|
+
window.__recordClick(e.clientX, e.clientY);
|
|
1984
|
+
}
|
|
1985
|
+
});
|
|
1986
|
+
document.addEventListener("input", (e) => {
|
|
1987
|
+
const target = e.target;
|
|
1988
|
+
if (target && target.tagName === "INPUT") {
|
|
1989
|
+
setTimeout(() => {
|
|
1990
|
+
window.__recordType(target.id || target.name, target.value);
|
|
1991
|
+
}, 500);
|
|
1992
|
+
}
|
|
1993
|
+
});
|
|
1994
|
+
});
|
|
1995
|
+
}
|
|
1996
|
+
async stopRecording() {
|
|
1997
|
+
this.isRecording = false;
|
|
1998
|
+
logger.info(`[recorder] Recording stopped - ${this.actions.length} actions captured`);
|
|
1999
|
+
const patterns = this.patternDetector.analyze(this.actions);
|
|
2000
|
+
const cmdFile = this.cmdGenerator.generate(this.startUrl, this.actions, patterns);
|
|
2001
|
+
const outputDir = "./commands";
|
|
2002
|
+
if (!import_fs3.default.existsSync(outputDir)) {
|
|
2003
|
+
import_fs3.default.mkdirSync(outputDir, { recursive: true });
|
|
2004
|
+
}
|
|
2005
|
+
const filename = `recorded-${Date.now()}.cmd`;
|
|
2006
|
+
const filepath = import_path3.default.join(outputDir, filename);
|
|
2007
|
+
import_fs3.default.writeFileSync(filepath, cmdFile);
|
|
2008
|
+
console.log("\n===========================================");
|
|
2009
|
+
console.log(" RECORDING COMPLETE");
|
|
2010
|
+
console.log(` Saved to: ${filepath}`);
|
|
2011
|
+
console.log(` Actions: ${this.actions.length}`);
|
|
2012
|
+
console.log("===========================================\n");
|
|
2013
|
+
if (this.browser) {
|
|
2014
|
+
await this.browser.close();
|
|
2015
|
+
}
|
|
2016
|
+
}
|
|
2017
|
+
getActions() {
|
|
2018
|
+
return this.actions;
|
|
2019
|
+
}
|
|
2020
|
+
};
|
|
2021
|
+
|
|
2022
|
+
// src/intelligence/pattern-cache.ts
|
|
2023
|
+
init_cjs_shims();
|
|
2024
|
+
var import_better_sqlite3 = __toESM(require("better-sqlite3"));
|
|
2025
|
+
init_logger();
|
|
2026
|
+
init_config();
|
|
2027
|
+
var import_path4 = __toESM(require("path"));
|
|
2028
|
+
var import_fs4 = __toESM(require("fs"));
|
|
2029
|
+
var PatternCache = class {
|
|
2030
|
+
db;
|
|
2031
|
+
constructor() {
|
|
2032
|
+
const dbPath = config.storage.patternsDb;
|
|
2033
|
+
const dir = import_path4.default.dirname(dbPath);
|
|
2034
|
+
if (!import_fs4.default.existsSync(dir)) {
|
|
2035
|
+
import_fs4.default.mkdirSync(dir, { recursive: true });
|
|
2036
|
+
}
|
|
2037
|
+
this.db = new import_better_sqlite3.default(dbPath);
|
|
2038
|
+
this.initialize();
|
|
2039
|
+
}
|
|
2040
|
+
initialize() {
|
|
2041
|
+
this.db.exec(`
|
|
2042
|
+
CREATE TABLE IF NOT EXISTS patterns (
|
|
2043
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
2044
|
+
site TEXT NOT NULL,
|
|
2045
|
+
type TEXT NOT NULL,
|
|
2046
|
+
selectors TEXT NOT NULL,
|
|
2047
|
+
flow TEXT NOT NULL,
|
|
2048
|
+
success_rate REAL DEFAULT 1.0,
|
|
2049
|
+
created_at INTEGER DEFAULT (strftime('%s', 'now')),
|
|
2050
|
+
updated_at INTEGER DEFAULT (strftime('%s', 'now'))
|
|
2051
|
+
)
|
|
2052
|
+
`);
|
|
2053
|
+
this.db.exec(`
|
|
2054
|
+
CREATE INDEX IF NOT EXISTS idx_site ON patterns(site)
|
|
2055
|
+
`);
|
|
2056
|
+
logger.info("[pattern-cache] Database initialized");
|
|
2057
|
+
}
|
|
2058
|
+
save(site, pattern) {
|
|
2059
|
+
const stmt = this.db.prepare(`
|
|
2060
|
+
INSERT INTO patterns (site, type, selectors, flow, success_rate)
|
|
2061
|
+
VALUES (?, ?, ?, ?, ?)
|
|
2062
|
+
`);
|
|
2063
|
+
stmt.run(
|
|
2064
|
+
site,
|
|
2065
|
+
pattern.type,
|
|
2066
|
+
JSON.stringify(pattern.selectors),
|
|
2067
|
+
JSON.stringify(pattern.flow),
|
|
2068
|
+
pattern.successRate
|
|
2069
|
+
);
|
|
2070
|
+
logger.info(`[pattern-cache] Saved pattern for ${site}`);
|
|
2071
|
+
}
|
|
2072
|
+
load(site) {
|
|
2073
|
+
const stmt = this.db.prepare(`
|
|
2074
|
+
SELECT * FROM patterns
|
|
2075
|
+
WHERE site = ?
|
|
2076
|
+
ORDER BY success_rate DESC, updated_at DESC
|
|
2077
|
+
LIMIT 1
|
|
2078
|
+
`);
|
|
2079
|
+
const row = stmt.get(site);
|
|
2080
|
+
if (!row) return null;
|
|
2081
|
+
return {
|
|
2082
|
+
type: row.type,
|
|
2083
|
+
selectors: JSON.parse(row.selectors),
|
|
2084
|
+
flow: JSON.parse(row.flow),
|
|
2085
|
+
successRate: row.success_rate
|
|
2086
|
+
};
|
|
2087
|
+
}
|
|
2088
|
+
updateSuccessRate(site, success) {
|
|
2089
|
+
const current = this.load(site);
|
|
2090
|
+
if (!current) return;
|
|
2091
|
+
const newRate = success ? Math.min(1, current.successRate + 0.1) : Math.max(0.1, current.successRate - 0.1);
|
|
2092
|
+
const stmt = this.db.prepare(`
|
|
2093
|
+
UPDATE patterns
|
|
2094
|
+
SET success_rate = ?, updated_at = strftime('%s', 'now')
|
|
2095
|
+
WHERE site = ?
|
|
2096
|
+
`);
|
|
2097
|
+
stmt.run(newRate, site);
|
|
2098
|
+
logger.info(`[pattern-cache] Updated success rate for ${site}: ${newRate}`);
|
|
2099
|
+
}
|
|
2100
|
+
listAll() {
|
|
2101
|
+
const stmt = this.db.prepare(`
|
|
2102
|
+
SELECT * FROM patterns
|
|
2103
|
+
ORDER BY success_rate DESC
|
|
2104
|
+
`);
|
|
2105
|
+
const rows = stmt.all();
|
|
2106
|
+
return rows.map((row) => ({
|
|
2107
|
+
site: row.site,
|
|
2108
|
+
pattern: {
|
|
2109
|
+
type: row.type,
|
|
2110
|
+
selectors: JSON.parse(row.selectors),
|
|
2111
|
+
flow: JSON.parse(row.flow),
|
|
2112
|
+
successRate: row.success_rate
|
|
2113
|
+
}
|
|
2114
|
+
}));
|
|
2115
|
+
}
|
|
2116
|
+
clear() {
|
|
2117
|
+
this.db.exec("DELETE FROM patterns");
|
|
2118
|
+
logger.info("[pattern-cache] Cleared all patterns");
|
|
2119
|
+
}
|
|
2120
|
+
close() {
|
|
2121
|
+
this.db.close();
|
|
2122
|
+
}
|
|
2123
|
+
};
|
|
2124
|
+
|
|
2125
|
+
// src/index.ts
|
|
2126
|
+
init_config();
|
|
2127
|
+
init_logger();
|
|
2128
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
2129
|
+
0 && (module.exports = {
|
|
2130
|
+
ActionRecorder,
|
|
2131
|
+
CloudflareManager,
|
|
2132
|
+
CommandExecutor,
|
|
2133
|
+
CommandParser,
|
|
2134
|
+
FirekidScraper,
|
|
2135
|
+
PatternCache,
|
|
2136
|
+
SmartFetch,
|
|
2137
|
+
applyGhost,
|
|
2138
|
+
config,
|
|
2139
|
+
getConfig,
|
|
2140
|
+
getNewSeed,
|
|
2141
|
+
getSeedForSite,
|
|
2142
|
+
logger,
|
|
2143
|
+
setLogLevel,
|
|
2144
|
+
updateConfig
|
|
2145
|
+
});
|