@firekid/scraper 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +767 -0
- package/dist/bin/firekid-scraper.cjs +2264 -0
- package/dist/bin/firekid-scraper.d.mts +1 -0
- package/dist/bin/firekid-scraper.d.ts +1 -0
- package/dist/bin/firekid-scraper.js +2251 -0
- package/dist/index.cjs +2145 -0
- package/dist/index.d.mts +366 -0
- package/dist/index.d.ts +366 -0
- package/dist/index.js +2107 -0
- package/package.json +90 -0
package/dist/index.d.mts
ADDED
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
import { Page, Browser, BrowserContext } from 'playwright';
|
|
2
|
+
import winston from 'winston';
|
|
3
|
+
|
|
4
|
+
interface FirekidConfig {
|
|
5
|
+
headless?: boolean;
|
|
6
|
+
bypassCloudflare?: boolean;
|
|
7
|
+
maxWorkers?: number;
|
|
8
|
+
timeout?: number;
|
|
9
|
+
dataDir?: string;
|
|
10
|
+
logLevel?: 'error' | 'warn' | 'info' | 'debug';
|
|
11
|
+
}
|
|
12
|
+
interface ScrapingResult {
|
|
13
|
+
success: boolean;
|
|
14
|
+
data: any;
|
|
15
|
+
errors: string[];
|
|
16
|
+
timestamp: number;
|
|
17
|
+
}
|
|
18
|
+
type CmdAction = 'GOTO' | 'BACK' | 'FORWARD' | 'REFRESH' | 'CLICK' | 'TYPE' | 'PRESS' | 'SELECT' | 'CHECK' | 'UPLOAD' | 'WAIT' | 'WAITLOAD' | 'SCROLL' | 'SCROLLDOWN' | 'SCAN' | 'EXTRACT' | 'SCREENSHOT' | 'PAGINATE' | 'INFINITESCROLL' | 'FETCH' | 'DOWNLOAD' | 'REFERER' | 'BYPASS_CLOUDFLARE' | 'REPEAT' | 'IF' | 'LOOP';
|
|
19
|
+
interface CmdStep {
|
|
20
|
+
action: CmdAction;
|
|
21
|
+
args: string[];
|
|
22
|
+
children?: CmdStep[];
|
|
23
|
+
line: number;
|
|
24
|
+
}
|
|
25
|
+
interface CmdFile {
|
|
26
|
+
site: string;
|
|
27
|
+
steps: CmdStep[];
|
|
28
|
+
raw: string;
|
|
29
|
+
}
|
|
30
|
+
interface Seed {
|
|
31
|
+
id: string;
|
|
32
|
+
chromeVersion: string;
|
|
33
|
+
screenWidth: number;
|
|
34
|
+
screenHeight: number;
|
|
35
|
+
language: string;
|
|
36
|
+
timezone: string;
|
|
37
|
+
canvasNoise: number;
|
|
38
|
+
webglVendor: string;
|
|
39
|
+
webglRenderer: string;
|
|
40
|
+
audioNoise: number;
|
|
41
|
+
fonts: string[];
|
|
42
|
+
}
|
|
43
|
+
interface GhostOptions {
|
|
44
|
+
siteHost?: string;
|
|
45
|
+
fresh?: boolean;
|
|
46
|
+
seed?: Seed;
|
|
47
|
+
}
|
|
48
|
+
interface CFTokens {
|
|
49
|
+
cfClearance?: string;
|
|
50
|
+
cfBm?: string;
|
|
51
|
+
headers?: Record<string, string>;
|
|
52
|
+
}
|
|
53
|
+
interface FetchOptions {
|
|
54
|
+
url: string;
|
|
55
|
+
referer?: string;
|
|
56
|
+
autoReferer?: boolean;
|
|
57
|
+
method?: 'GET' | 'POST';
|
|
58
|
+
headers?: Record<string, string>;
|
|
59
|
+
cookies?: Record<string, string>;
|
|
60
|
+
body?: any;
|
|
61
|
+
followRedirects?: boolean;
|
|
62
|
+
timeout?: number;
|
|
63
|
+
}
|
|
64
|
+
interface FetchResponse {
|
|
65
|
+
status: number;
|
|
66
|
+
headers: Record<string, string>;
|
|
67
|
+
data: any;
|
|
68
|
+
}
|
|
69
|
+
interface SitePattern {
|
|
70
|
+
type: string;
|
|
71
|
+
selectors: Record<string, string>;
|
|
72
|
+
flow: string[];
|
|
73
|
+
successRate: number;
|
|
74
|
+
}
|
|
75
|
+
interface RecordedAction {
|
|
76
|
+
type: string;
|
|
77
|
+
selectors: SelectorSet;
|
|
78
|
+
value?: string;
|
|
79
|
+
timestamp: number;
|
|
80
|
+
element?: any;
|
|
81
|
+
text?: string;
|
|
82
|
+
fieldType?: string;
|
|
83
|
+
}
|
|
84
|
+
interface SelectorSet {
|
|
85
|
+
primary: string;
|
|
86
|
+
fallbacks: string[];
|
|
87
|
+
}
|
|
88
|
+
type Mode = 'downloader' | 'scrape' | 'navigator' | 'ssr' | 'api-hunter' | 'auto';
|
|
89
|
+
interface FormField {
|
|
90
|
+
selector: string;
|
|
91
|
+
fallbackSelectors: string[];
|
|
92
|
+
type: FieldType;
|
|
93
|
+
label: string | null;
|
|
94
|
+
placeholder: string;
|
|
95
|
+
required: boolean;
|
|
96
|
+
validation: ValidationRule[];
|
|
97
|
+
}
|
|
98
|
+
type FieldType = 'email' | 'password' | 'username' | 'phone' | 'name' | 'text';
|
|
99
|
+
interface ValidationRule {
|
|
100
|
+
type: 'regex' | 'minlength' | 'maxlength';
|
|
101
|
+
value?: any;
|
|
102
|
+
pattern?: string;
|
|
103
|
+
}
|
|
104
|
+
interface FormTemplate {
|
|
105
|
+
fields: FormField[];
|
|
106
|
+
submitButton: string;
|
|
107
|
+
formType: FormType;
|
|
108
|
+
}
|
|
109
|
+
type FormType = 'LOGIN' | 'REGISTRATION' | 'SEARCH' | 'CONTACT' | 'GENERIC';
|
|
110
|
+
interface BehaviorProfile {
|
|
111
|
+
typingSpeed: {
|
|
112
|
+
min: number;
|
|
113
|
+
max: number;
|
|
114
|
+
};
|
|
115
|
+
mouseMovements: Array<{
|
|
116
|
+
x: number;
|
|
117
|
+
y: number;
|
|
118
|
+
timestamp: number;
|
|
119
|
+
}>;
|
|
120
|
+
scrollPatterns: Array<{
|
|
121
|
+
y: number;
|
|
122
|
+
timestamp: number;
|
|
123
|
+
}>;
|
|
124
|
+
pauseDistribution: number[];
|
|
125
|
+
clickTiming: Array<{
|
|
126
|
+
timestamp: number;
|
|
127
|
+
}>;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
declare class FirekidScraper {
|
|
131
|
+
private config;
|
|
132
|
+
private browser;
|
|
133
|
+
private context;
|
|
134
|
+
private page;
|
|
135
|
+
private cfManager;
|
|
136
|
+
constructor(userConfig?: FirekidConfig);
|
|
137
|
+
init(): Promise<void>;
|
|
138
|
+
goto(url: string): Promise<void>;
|
|
139
|
+
extract(url: string, selectors: Record<string, string>): Promise<any>;
|
|
140
|
+
auto(url: string): Promise<ScrapingResult>;
|
|
141
|
+
runCommandFile(filePath: string): Promise<ScrapingResult>;
|
|
142
|
+
close(): Promise<void>;
|
|
143
|
+
getPage(): Page | null;
|
|
144
|
+
getBrowser(): Browser | null;
|
|
145
|
+
getContext(): BrowserContext | null;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
declare class CommandParser {
|
|
149
|
+
private variables;
|
|
150
|
+
setVariable(key: string, value: string): void;
|
|
151
|
+
private resolve;
|
|
152
|
+
private parseLine;
|
|
153
|
+
parse(content: string, filePath?: string): CmdFile;
|
|
154
|
+
load(filePath: string): CmdFile;
|
|
155
|
+
findAll(dir?: string): string[];
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
interface CmdResult {
|
|
159
|
+
success: boolean;
|
|
160
|
+
skipped: string[];
|
|
161
|
+
extracted: Record<string, unknown>[];
|
|
162
|
+
errors: {
|
|
163
|
+
line: number;
|
|
164
|
+
action: string;
|
|
165
|
+
error: string;
|
|
166
|
+
}[];
|
|
167
|
+
}
|
|
168
|
+
declare class CommandExecutor {
|
|
169
|
+
private page;
|
|
170
|
+
private url;
|
|
171
|
+
private result;
|
|
172
|
+
private smartFetch;
|
|
173
|
+
private cfManager;
|
|
174
|
+
private variables;
|
|
175
|
+
constructor(page: Page, url: string);
|
|
176
|
+
execute(cmd: CmdFile): Promise<CmdResult>;
|
|
177
|
+
private runStep;
|
|
178
|
+
private goto;
|
|
179
|
+
private click;
|
|
180
|
+
private type;
|
|
181
|
+
private press;
|
|
182
|
+
private select;
|
|
183
|
+
private check;
|
|
184
|
+
private upload;
|
|
185
|
+
private wait;
|
|
186
|
+
private scroll;
|
|
187
|
+
private scrollDown;
|
|
188
|
+
private scan;
|
|
189
|
+
private extract;
|
|
190
|
+
private screenshot;
|
|
191
|
+
private paginate;
|
|
192
|
+
private infiniteScroll;
|
|
193
|
+
private fetch;
|
|
194
|
+
private download;
|
|
195
|
+
private setReferer;
|
|
196
|
+
private bypassCloudflare;
|
|
197
|
+
private repeat;
|
|
198
|
+
private conditional;
|
|
199
|
+
private loop;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
declare class HumanBehavior {
|
|
203
|
+
private seed;
|
|
204
|
+
private profile;
|
|
205
|
+
constructor(seed: Seed);
|
|
206
|
+
private generateProfile;
|
|
207
|
+
randomDelay(min?: number, max?: number): Promise<void>;
|
|
208
|
+
humanClick(page: Page, selector: string): Promise<void>;
|
|
209
|
+
humanType(page: Page, selector: string, text: string): Promise<void>;
|
|
210
|
+
randomScroll(page: Page): Promise<void>;
|
|
211
|
+
randomMouseMovement(page: Page): Promise<void>;
|
|
212
|
+
getProfile(): BehaviorProfile;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
declare function getNewSeed(): Seed;
|
|
216
|
+
declare function getSeedForSite(siteHost: string): Seed;
|
|
217
|
+
|
|
218
|
+
declare function applyGhost(context: BrowserContext, options?: GhostOptions): Promise<HumanBehavior>;
|
|
219
|
+
|
|
220
|
+
declare class CloudflareManager {
|
|
221
|
+
detect(page: Page): Promise<boolean>;
|
|
222
|
+
waitForClearance(page: Page, timeoutMs?: number): Promise<boolean>;
|
|
223
|
+
extractTokens(context: BrowserContext): Promise<CFTokens>;
|
|
224
|
+
detectWAF(page: Page): Promise<string | null>;
|
|
225
|
+
handleCloudflare(page: Page, url: string): Promise<boolean>;
|
|
226
|
+
detectTurnstile(page: Page): Promise<boolean>;
|
|
227
|
+
handleTurnstile(page: Page, url: string): Promise<boolean>;
|
|
228
|
+
isTurnstileSolved(page: Page): Promise<boolean>;
|
|
229
|
+
waitForTurnstileSolved(page: Page): Promise<void>;
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
declare class SmartFetch {
|
|
233
|
+
private pageContext;
|
|
234
|
+
private lastReferer;
|
|
235
|
+
setPageContext(page: Page): void;
|
|
236
|
+
fetch(options: FetchOptions): Promise<FetchResponse>;
|
|
237
|
+
download(url: string, outputPath: string, referer?: string): Promise<void>;
|
|
238
|
+
getLastReferer(): string;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
declare class ActionRecorder {
|
|
242
|
+
private browser;
|
|
243
|
+
private page;
|
|
244
|
+
private actions;
|
|
245
|
+
private isRecording;
|
|
246
|
+
private selectorGen;
|
|
247
|
+
private patternDetector;
|
|
248
|
+
private cmdGenerator;
|
|
249
|
+
private startUrl;
|
|
250
|
+
constructor();
|
|
251
|
+
startRecording(url: string): Promise<void>;
|
|
252
|
+
private attachListeners;
|
|
253
|
+
stopRecording(): Promise<void>;
|
|
254
|
+
getActions(): RecordedAction[];
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
declare class PatternCache {
|
|
258
|
+
private db;
|
|
259
|
+
constructor();
|
|
260
|
+
private initialize;
|
|
261
|
+
save(site: string, pattern: SitePattern): void;
|
|
262
|
+
load(site: string): SitePattern | null;
|
|
263
|
+
updateSuccessRate(site: string, success: boolean): void;
|
|
264
|
+
listAll(): Array<{
|
|
265
|
+
site: string;
|
|
266
|
+
pattern: SitePattern;
|
|
267
|
+
}>;
|
|
268
|
+
clear(): void;
|
|
269
|
+
close(): void;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
declare const config: {
|
|
273
|
+
browser: {
|
|
274
|
+
headless: boolean;
|
|
275
|
+
timeout: number;
|
|
276
|
+
maxWorkers: number;
|
|
277
|
+
};
|
|
278
|
+
cloudflare: {
|
|
279
|
+
bypass: string;
|
|
280
|
+
turnstileSolver: string;
|
|
281
|
+
};
|
|
282
|
+
captcha: {
|
|
283
|
+
apiKey: string;
|
|
284
|
+
};
|
|
285
|
+
server: {
|
|
286
|
+
enabled: boolean;
|
|
287
|
+
port: number;
|
|
288
|
+
apiKey: string;
|
|
289
|
+
};
|
|
290
|
+
proxy: {
|
|
291
|
+
enabled: boolean;
|
|
292
|
+
url: string;
|
|
293
|
+
};
|
|
294
|
+
storage: {
|
|
295
|
+
dataDir: string;
|
|
296
|
+
patternsDb: string;
|
|
297
|
+
sessionsDb: string;
|
|
298
|
+
};
|
|
299
|
+
logging: {
|
|
300
|
+
level: "error" | "warn" | "info" | "debug";
|
|
301
|
+
};
|
|
302
|
+
recording: {
|
|
303
|
+
autoHideAfterSolve: boolean;
|
|
304
|
+
recordScreenshots: boolean;
|
|
305
|
+
};
|
|
306
|
+
rateLimit: {
|
|
307
|
+
enabled: boolean;
|
|
308
|
+
max: number;
|
|
309
|
+
window: number;
|
|
310
|
+
};
|
|
311
|
+
advanced: {
|
|
312
|
+
enableTelemetry: boolean;
|
|
313
|
+
enableAnalytics: boolean;
|
|
314
|
+
};
|
|
315
|
+
};
|
|
316
|
+
declare function getConfig(): {
|
|
317
|
+
browser: {
|
|
318
|
+
headless: boolean;
|
|
319
|
+
timeout: number;
|
|
320
|
+
maxWorkers: number;
|
|
321
|
+
};
|
|
322
|
+
cloudflare: {
|
|
323
|
+
bypass: string;
|
|
324
|
+
turnstileSolver: string;
|
|
325
|
+
};
|
|
326
|
+
captcha: {
|
|
327
|
+
apiKey: string;
|
|
328
|
+
};
|
|
329
|
+
server: {
|
|
330
|
+
enabled: boolean;
|
|
331
|
+
port: number;
|
|
332
|
+
apiKey: string;
|
|
333
|
+
};
|
|
334
|
+
proxy: {
|
|
335
|
+
enabled: boolean;
|
|
336
|
+
url: string;
|
|
337
|
+
};
|
|
338
|
+
storage: {
|
|
339
|
+
dataDir: string;
|
|
340
|
+
patternsDb: string;
|
|
341
|
+
sessionsDb: string;
|
|
342
|
+
};
|
|
343
|
+
logging: {
|
|
344
|
+
level: "error" | "warn" | "info" | "debug";
|
|
345
|
+
};
|
|
346
|
+
recording: {
|
|
347
|
+
autoHideAfterSolve: boolean;
|
|
348
|
+
recordScreenshots: boolean;
|
|
349
|
+
};
|
|
350
|
+
rateLimit: {
|
|
351
|
+
enabled: boolean;
|
|
352
|
+
max: number;
|
|
353
|
+
window: number;
|
|
354
|
+
};
|
|
355
|
+
advanced: {
|
|
356
|
+
enableTelemetry: boolean;
|
|
357
|
+
enableAnalytics: boolean;
|
|
358
|
+
};
|
|
359
|
+
};
|
|
360
|
+
declare function updateConfig(updates: Partial<typeof config>): void;
|
|
361
|
+
|
|
362
|
+
declare const logger: winston.Logger;
|
|
363
|
+
|
|
364
|
+
declare function setLogLevel(level: 'error' | 'warn' | 'info' | 'debug'): void;
|
|
365
|
+
|
|
366
|
+
export { ActionRecorder, type BehaviorProfile, CloudflareManager, type CmdFile, type CmdStep, CommandExecutor, CommandParser, type FetchOptions, type FetchResponse, type FirekidConfig, FirekidScraper, type FormTemplate, type GhostOptions, type Mode, PatternCache, type ScrapingResult, type Seed, type SitePattern, SmartFetch, applyGhost, config, getConfig, getNewSeed, getSeedForSite, logger, setLogLevel, updateConfig };
|