jumpy-lion 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/README.md +594 -0
  2. package/dist/browser-controller.d.ts.map +1 -1
  3. package/dist/browser-controller.js +10 -7
  4. package/dist/browser-controller.js.map +1 -1
  5. package/dist/browser-plugin.d.ts +11 -0
  6. package/dist/browser-plugin.d.ts.map +1 -1
  7. package/dist/browser-plugin.js +6 -1
  8. package/dist/browser-plugin.js.map +1 -1
  9. package/dist/browser-process/browser.d.ts +24 -0
  10. package/dist/browser-process/browser.d.ts.map +1 -1
  11. package/dist/browser-process/browser.js +22 -6
  12. package/dist/browser-process/browser.js.map +1 -1
  13. package/dist/browser-process/get-chrome-executable.d.ts +1 -0
  14. package/dist/browser-process/get-chrome-executable.d.ts.map +1 -1
  15. package/dist/browser-process/get-chrome-executable.js +15 -0
  16. package/dist/browser-process/get-chrome-executable.js.map +1 -1
  17. package/dist/browser-process/process.d.ts +2 -1
  18. package/dist/browser-process/process.d.ts.map +1 -1
  19. package/dist/browser-process/process.js +12 -1
  20. package/dist/browser-process/process.js.map +1 -1
  21. package/dist/crawler.d.ts +6 -0
  22. package/dist/crawler.d.ts.map +1 -1
  23. package/dist/crawler.js.map +1 -1
  24. package/dist/fingerprinting/fingerprint-injector.d.ts +4 -0
  25. package/dist/fingerprinting/fingerprint-injector.d.ts.map +1 -1
  26. package/dist/fingerprinting/fingerprint-injector.js +85 -21
  27. package/dist/fingerprinting/fingerprint-injector.js.map +1 -1
  28. package/dist/fingerprinting/fingerprint-overrides/audio-spoofing.d.ts.map +1 -1
  29. package/dist/fingerprinting/fingerprint-overrides/audio-spoofing.js +16 -1
  30. package/dist/fingerprinting/fingerprint-overrides/audio-spoofing.js.map +1 -1
  31. package/dist/fingerprinting/fingerprint-overrides/canvas-protection.d.ts.map +1 -1
  32. package/dist/fingerprinting/fingerprint-overrides/canvas-protection.js +18 -1
  33. package/dist/fingerprinting/fingerprint-overrides/canvas-protection.js.map +1 -1
  34. package/dist/fingerprinting/fingerprint-overrides/client-rect-spoofing.d.ts.map +1 -1
  35. package/dist/fingerprinting/fingerprint-overrides/client-rect-spoofing.js +16 -1
  36. package/dist/fingerprinting/fingerprint-overrides/client-rect-spoofing.js.map +1 -1
  37. package/dist/fingerprinting/fingerprint-overrides/coalesced-events-spoofing.d.ts.map +1 -1
  38. package/dist/fingerprinting/fingerprint-overrides/coalesced-events-spoofing.js +16 -1
  39. package/dist/fingerprinting/fingerprint-overrides/coalesced-events-spoofing.js.map +1 -1
  40. package/dist/fingerprinting/fingerprint-overrides/datadome-bypass.d.ts.map +1 -1
  41. package/dist/fingerprinting/fingerprint-overrides/datadome-bypass.js +16 -1
  42. package/dist/fingerprinting/fingerprint-overrides/datadome-bypass.js.map +1 -1
  43. package/dist/fingerprinting/fingerprint-overrides/font-spoofing.d.ts.map +1 -1
  44. package/dist/fingerprinting/fingerprint-overrides/font-spoofing.js +13 -3
  45. package/dist/fingerprinting/fingerprint-overrides/font-spoofing.js.map +1 -1
  46. package/dist/fingerprinting/fingerprint-overrides/index.d.ts +1 -1
  47. package/dist/fingerprinting/fingerprint-overrides/index.d.ts.map +1 -1
  48. package/dist/fingerprinting/fingerprint-overrides/index.js +1 -1
  49. package/dist/fingerprinting/fingerprint-overrides/index.js.map +1 -1
  50. package/dist/fingerprinting/fingerprint-overrides/mouse-humanization.d.ts.map +1 -1
  51. package/dist/fingerprinting/fingerprint-overrides/mouse-humanization.js +16 -1
  52. package/dist/fingerprinting/fingerprint-overrides/mouse-humanization.js.map +1 -1
  53. package/dist/fingerprinting/fingerprint-overrides/performance-spoofing.d.ts.map +1 -1
  54. package/dist/fingerprinting/fingerprint-overrides/performance-spoofing.js +16 -1
  55. package/dist/fingerprinting/fingerprint-overrides/performance-spoofing.js.map +1 -1
  56. package/dist/fingerprinting/fingerprint-overrides/ua-ch.d.ts.map +1 -1
  57. package/dist/fingerprinting/fingerprint-overrides/ua-ch.js +27 -18
  58. package/dist/fingerprinting/fingerprint-overrides/ua-ch.js.map +1 -1
  59. package/dist/fingerprinting/fingerprint-overrides/webgl-spoofing.d.ts.map +1 -1
  60. package/dist/fingerprinting/fingerprint-overrides/webgl-spoofing.js +16 -4
  61. package/dist/fingerprinting/fingerprint-overrides/webgl-spoofing.js.map +1 -1
  62. package/dist/fingerprinting/fingerprint-overrides/webgpu-spoofing.d.ts.map +1 -1
  63. package/dist/fingerprinting/fingerprint-overrides/webgpu-spoofing.js +16 -2
  64. package/dist/fingerprinting/fingerprint-overrides/webgpu-spoofing.js.map +1 -1
  65. package/dist/fingerprinting/fingerprint-overrides/webrtc-spoofing.d.ts +1 -0
  66. package/dist/fingerprinting/fingerprint-overrides/webrtc-spoofing.d.ts.map +1 -1
  67. package/dist/fingerprinting/fingerprint-overrides/webrtc-spoofing.js +53 -9
  68. package/dist/fingerprinting/fingerprint-overrides/webrtc-spoofing.js.map +1 -1
  69. package/dist/page.d.ts +8 -0
  70. package/dist/page.d.ts.map +1 -1
  71. package/dist/page.js +84 -7
  72. package/dist/page.js.map +1 -1
  73. package/dist/tsconfig.build.tsbuildinfo +1 -1
  74. package/package.json +1 -1
package/README.md ADDED
@@ -0,0 +1,594 @@
1
+ # Crawler Documentation
2
+
3
+ ## Table of Contents
4
+
5
+ - [Overview](#overview)
6
+ - [NPM Package](#npm-package)
7
+ - [Usage](#usage)
8
+ - [Example Project](#example-project)
9
+ - [Internal Guide](#internal-guide)
10
+ - [Examples and Configuration](#examples-and-configuration)
11
+ - [Advanced Fingerprints Usage](#advanced-fingerprints-usage)
12
+ - [Syncing BrowserPool and launchOptions fingerprints](#syncing-browserpool-and-launchoptions-fingerprints)
13
+ - [Stealth Consistency and Network Policies](#stealth-consistency-and-network-policies)
14
+ - [Configurable Fingerprint Options](#configurable-fingerprint-options)
15
+ - [Usage](#usage-1)
16
+ - [Available Options](#available-options)
17
+ - [Core Stealth Options](#core-stealth-options)
18
+ - [Fingerprint Spoofing](#fingerprint-spoofing)
19
+ - [Platform Configuration](#platform-configuration)
20
+ - [Additional Features](#additional-features)
21
+ - [Default Behavior](#default-behavior)
22
+ - [Best Practices](#best-practices)
23
+ - [Performance Considerations](#performance-considerations)
24
+ - [Launch Options for Network and Persistence](#launch-options-for-network-and-persistence)
25
+ - [Crawler Class Documentation](#crawler-class-documentation)
26
+ - [Constructor](#constructor)
27
+ - [CdpPage Class Documentation](#cdppage-class-documentation)
28
+ - [Constructor](#constructor-1)
29
+ - [Static Methods](#static-methods)
30
+ - [Public Methods](#public-methods)
31
+ - [Utility Functions](#utility-functions)
32
+ - [createCDPRouter](#createcdprouter)
33
+
34
+ ## Overview
35
+
36
+ The `Crawler` class is a custom implementation of the `BrowserCrawler` from Crawlee, designed to utilize the Chrome DevTools Protocol (CDP) for advanced antiblocking capabilities.
37
+
38
+ ## NPM Package
39
+
40
+ The `jumpy-lion` is official cdp crawler package. See it [here](https://www.npmjs.com/package/jumpy-lion).
41
+
42
+ ---
43
+ ---
44
+
45
+ ## Usage
46
+
47
+ ### Example Project
48
+
49
+ Refer to this [GitHub repository](https://github.com/apify-projects/cdp-crawler-example) for a complete example of using the `Crawler` class.
50
+
51
+ ### Internal Guide
52
+
53
+ Check out the [CDP Crawler internal guide](https://www.notion.so/apify/CDP-Crawler-internal-guide-183f39950a2280be81d7c86dc048a47a?pvs=4) for tutorial.
54
+
55
+ ### Examples and Configuration
56
+
57
+ For detailed examples and configuration patterns, see the [Examples README](./examples/README.md). The examples include:
58
+
59
+ - **Basic Configuration**: Simple fingerprint setup for common use cases
60
+ - **Comprehensive Configuration**: Full feature setup with all spoofing options
61
+ - **Platform-Specific Configurations**: macOS, Windows, and Linux targeting
62
+ - **Performance-Focused Configuration**: Optimized settings for speed
63
+ - **Minimal Configuration**: Using intelligent defaults
64
+
65
+ The examples demonstrate real-world usage patterns and best practices for different scenarios.
66
+
67
+ ### Advanced Fingerprints usage
68
+
69
+ To use advanced fingerprints, you need to set the `useExperimentalFingerprints` option to `true` in the `launchContext.launchOptions` of the `Crawler` constructor.
70
+
71
+ ```typescript
72
+ const crawler = new Crawler({
73
+ launchContext: {
74
+ launchOptions: {
75
+ useExperimentalFingerprints: true,
76
+ }
77
+ },
78
+ });
79
+ ```
80
+
81
+ ---
82
+
83
+ ### Syncing BrowserPool and launchOptions fingerprints
84
+
85
+ **Always keep the operating system in sync between BrowserPool fingerprints and `launchOptions.fingerprintOptions`.** A mismatch can lead to inconsistent signals (for example `navigator.platform`, User-Agent, WebGL, fonts) and reduce antibot effectiveness.
86
+
87
+ - **launchOptions side**: Set `launchContext.launchOptions.fingerprintOptions.platform` to the desired platform string.
88
+ - **BrowserPool side**: When `browserPoolOptions.useFingerprints` is `true`, set `browserPoolOptions.fingerprintOptions.fingerprintGeneratorOptions.operatingSystems` to the corresponding OS.
89
+
90
+ Mapping guidance:
91
+ - `platform: 'Win32'` ↔ `operatingSystems: ['windows']`
92
+ - `platform: 'MacIntel'` ↔ `operatingSystems: ['macos']`
93
+ - `platform: 'Linux x86_64'` ↔ `operatingSystems: ['linux']`
94
+
95
+ Example:
96
+
97
+ ```typescript
98
+ const crawler = new Crawler({
99
+ launchContext: {
100
+ launchOptions: {
101
+ useExperimentalFingerprints: true,
102
+ fingerprintOptions: {
103
+ platform: 'Win32', // Keep this in sync with BrowserPool OS
104
+ },
105
+ },
106
+ },
107
+ browserPoolOptions: {
108
+ useFingerprints: true,
109
+ fingerprintOptions: {
110
+ fingerprintGeneratorOptions: {
111
+ browsers: ['chrome'],
112
+ operatingSystems: ['windows'], // Matches platform: 'Win32'
113
+ devices: ['desktop'],
114
+ },
115
+ },
116
+ },
117
+ });
118
+ ```
119
+
120
+ Note: This configuration surface will be unified later. We are currently testing our custom fingerprint injector so it works even with the BrowserPool built‑in fingerprints turned off. If you prefer, you can rely solely on the custom injector by setting `browserPoolOptions.useFingerprints: false` and keeping `launchOptions.useExperimentalFingerprints: true`.
121
+
122
+ ---
123
+
124
+ ### Stealth Consistency and Network Policies
125
+
126
+ Recent stealth hardening adds explicit consistency and policy controls:
127
+
128
+ - **UA/Binary version alignment**: the injector aligns advertised `Chrome/x.y.z.w` with the actual running Chrome binary version to reduce fingerprint drift.
129
+ - **WebRTC policy control**: set `fingerprintOptions.webRtcPolicy` to:
130
+ - `'spoof'` (default): redacts/normalizes WebRTC leak surfaces.
131
+ - `'disable'`: removes WebRTC APIs from page context.
132
+ - **DNS hardening controls**: configure DoH and secure DNS through launch options (`dnsOverHttpsServer`, `secureDnsMode`).
133
+ - **WebRTC transport policy flag**: configure `webrtcIpHandlingPolicy` at browser launch level.
134
+ - **Persistent profile mode**: set `userDataDir` (+ optional `keepUserDataDir`) to reuse browser state across runs.
135
+
136
+ ## Configurable Fingerprint Options
137
+
138
+ The CDP crawler supports configurable fingerprint options that can be passed through the crawler options. This allows you to customize the fingerprint spoofing behavior for different use cases.
139
+
140
+ ### Usage
141
+
142
+ You can configure fingerprint options by adding them to the `launchContext.launchOptions.fingerprintOptions` in your crawler configuration:
143
+
144
+ ```typescript
145
+ import { Crawler } from 'cdp-crawler';
146
+
147
+ const crawler = new Crawler({
148
+ launchContext: {
149
+ launchOptions: {
150
+ fingerprintOptions: {
151
+ // Enable advanced stealth features
152
+ enableAdvancedStealth: true,
153
+
154
+ // Bypass Runtime.enable detection
155
+ bypassRuntimeEnable: true,
156
+
157
+ // Humanize mouse interactions
158
+ humanizeInteractions: true,
159
+
160
+ // Spoof WebGL fingerprinting
161
+ spoofWebGL: true,
162
+
163
+ // Spoof audio context fingerprinting
164
+ spoofAudioContext: true,
165
+
166
+ // Add variations to client rect measurements
167
+ spoofClientRects: true,
168
+
169
+ // Mask automation flags
170
+ maskAutomationFlags: true,
171
+
172
+ // Use fingerprint-generator defaults when available
173
+ useFingerprintDefaults: true,
174
+
175
+ // Platform to spoof (defaults to Win32 for better evasion)
176
+ platform: 'Win32', // 'Win32' | 'MacIntel' | 'Linux x86_64'
177
+
178
+ // Spoof font measurements
179
+ spoofFonts: true,
180
+
181
+ // Spoof performance timing
182
+ spoofPerformance: true,
183
+
184
+ // Spoof locale settings
185
+ spoofLocale: true,
186
+
187
+ // Detect timezone from proxy (useful with residential proxies)
188
+ detectTimezone: true,
189
+
190
+ // WebRTC policy: 'spoof' (default) or 'disable'
191
+ webRtcPolicy: 'spoof',
192
+ }
193
+ }
194
+ },
195
+ // ... other crawler options
196
+ });
197
+ ```
198
+
199
+ ### Available Options
200
+
201
+ #### Core Stealth Options
202
+
203
+ - **`enableAdvancedStealth`** (boolean): Enables advanced stealth features including WebGPU spoofing and platform consistency
204
+ - **`bypassRuntimeEnable`** (boolean): Prevents CDP detection through Runtime.enable bypass techniques
205
+ - **`humanizeInteractions`** (boolean): Generates human-like mouse movements using bezier curves
206
+
207
+ #### Fingerprint Spoofing
208
+
209
+ - **`spoofWebGL`** (boolean): Spoofs WebGL fingerprinting by modifying GPU adapter information
210
+ - **`spoofAudioContext`** (boolean): Adds noise to audio processing to prevent audio fingerprinting
211
+ - **`spoofClientRects`** (boolean): Adds small variations to getBoundingClientRect results
212
+ - **`spoofFonts`** (boolean): Hides platform-specific fonts and adds font measurement variations
213
+ - **`spoofPerformance`** (boolean): Modifies timing characteristics to match the target platform
214
+ - **`spoofLocale`** (boolean): Ensures consistent locale formatting across all browser properties
215
+
216
+ #### Platform Configuration
217
+
218
+ - **`platform`** (string): Target platform to spoof. Options: `'Win32'`, `'MacIntel'`, `'Linux x86_64'`
219
+ - **`useFingerprintDefaults`** (boolean): Use hardcoded defaults instead of fingerprint-generator values. When `false`, uses generated fingerprint values; when `true` (default), uses hardcoded defaults
220
+
221
+ #### Additional Features
222
+
223
+ - **`maskAutomationFlags`** (boolean): Masks automation-related flags in the browser
224
+ - **`detectTimezone`** (boolean): Automatically detect timezone from proxy IP (useful with residential proxies)
225
+ - **`webRtcPolicy`** (`'spoof' | 'disable'`): Controls whether WebRTC is spoofed or fully removed from page APIs
226
+
227
+ ### Default Behavior
228
+
229
+ When no fingerprint options are provided, the crawler uses intelligent defaults:
230
+
231
+ - **On Apify**: Uses Apify-recommended settings optimized for the Apify environment
232
+ - **On other platforms**: Uses a comprehensive set of stealth features with Windows platform spoofing
233
+ - **Humanization defaults**: mouse, keyboard, and scroll humanization are enabled with safe defaults
234
+ - **UA consistency**: claimed UA Chrome version is automatically aligned to the running Chrome binary
235
+
236
+ ### Best Practices
237
+
238
+ 1. **Use `platform: 'Win32'`** for better evasion on Linux servers (like Apify)
239
+ 2. **Enable `detectTimezone: true`** when using residential proxies
240
+ 3. **Use `useFingerprintDefaults: false`** to leverage fingerprint-generator's realistic values
241
+ 4. **Enable `bypassRuntimeEnable: true`** for sites that detect automation
242
+ 5. **Use `enableAdvancedStealth: true`** for maximum protection against fingerprinting
243
+ 6. **Keep OS settings in sync** between `launchOptions.fingerprintOptions.platform` and `browserPoolOptions.fingerprintOptions.fingerprintGeneratorOptions.operatingSystems`
244
+ 7. **Use `webRtcPolicy: 'disable'`** for strictest leak prevention, or `'spoof'` for compatibility-sensitive targets
245
+
246
+ ### Performance Considerations
247
+
248
+ - More fingerprint options enabled = slightly higher CPU usage
249
+ - WebGPU spoofing may add a small delay to page loads
250
+ - Humanized interactions add realistic delays to mouse movements
251
+
252
+ The fingerprint options are designed to provide maximum protection while maintaining good performance for web scraping tasks.
253
+
254
+ For more configuration examples and patterns, see the [Examples README](./examples/README.md).
255
+
256
+ ---
257
+
258
+ ## Launch Options for Network and Persistence
259
+
260
+ The following options are configured in `launchContext.launchOptions`:
261
+
262
+ - **`dnsOverHttpsServer`** (string): DoH endpoint template, for example `https://cloudflare-dns.com/dns-query`
263
+ - **`secureDnsMode`** (`'off' | 'automatic' | 'secure'`): Chromium secure DNS mode
264
+ - **`webrtcIpHandlingPolicy`** (`'default' | 'default_public_interface_only' | 'default_public_and_private_interfaces' | 'disable_non_proxied_udp'`): Browser-level WebRTC IP handling policy
265
+ - **`userDataDir`** (string): Reuse a specific Chrome profile directory across runs
266
+ - **`keepUserDataDir`** (boolean): Keep/cleanup profile directory on close (defaults to keep custom dir, cleanup temp dir)
267
+
268
+ Example:
269
+
270
+ ```typescript
271
+ const crawler = new Crawler({
272
+ launchContext: {
273
+ launchOptions: {
274
+ dnsOverHttpsServer: 'https://cloudflare-dns.com/dns-query',
275
+ secureDnsMode: 'secure',
276
+ webrtcIpHandlingPolicy: 'disable_non_proxied_udp',
277
+ userDataDir: './state/chrome-profile',
278
+ keepUserDataDir: true,
279
+ fingerprintOptions: {
280
+ webRtcPolicy: 'disable',
281
+ },
282
+ },
283
+ },
284
+ });
285
+ ```
286
+
287
+ ---
288
+
289
+ ## `Crawler` Class Documentation
290
+
291
+ ### Constructor
292
+
293
+ #### `constructor(options: BrowserCrawlerOptions = {}, override readonly config = Configuration.getGlobalConfig())`
294
+
295
+ Initializes the `Crawler` instance with default and provided options.
296
+
297
+ - **Parameters**:
298
+
299
+ - `options` (BrowserCrawlerOptions): Configuration options for the crawler.
300
+ - `launchContext`: Specifies browser launch parameters.
301
+ - Default: `{}`
302
+ - `headless`: Runs the browser in headless mode.
303
+ - Default: `false`
304
+ - `browserPoolOptions`: Configuration for managing browser instances.
305
+ - `config` (Configuration): Global Crawlee configuration.
306
+ - Default: `Configuration.getGlobalConfig()`
307
+
308
+ - **Default Behavior**:
309
+ - Throws an error if `launchContext.proxyUrl` is provided. Use `proxyConfiguration` instead.
310
+ - Throws an error if `browserPoolOptions.browserPlugins` is set. Use `launchContext.launcher` instead.
311
+
312
+ ---
313
+
314
+ ## `CdpPage` Class Documentation
315
+
316
+ ### Constructor
317
+
318
+ #### `constructor(client: CDP.Client)`
319
+
320
+ Initializes the `CdpPage` instance with a CDP client.
321
+
322
+ - **Parameters**:
323
+
324
+ - `client` (CDP.Client): The Chrome DevTools Protocol client.
325
+
326
+ - **Emitted Events**:
327
+ - `PAGE_CREATED`: Triggered upon the creation of the page.
328
+
329
+ ### Static Methods
330
+
331
+ #### `static async create(client: CDP.Client): Promise<CdpPage>`
332
+
333
+ Creates and initializes a new `CdpPage` instance.
334
+
335
+ - **Parameters**:
336
+
337
+ - `client` (CDP.Client): The CDP client.
338
+
339
+ - **Returns**:
340
+ - `Promise<CdpPage>`: A promise resolving to the new `CdpPage` instance.
341
+
342
+ ---
343
+
344
+ ### Public Methods
345
+
346
+ #### `async url(): Promise<string>`
347
+ Gets the current URL of the page.
348
+
349
+ - **Returns**:
350
+ - `Promise<string>`: The current URL.
351
+
352
+ #### `async goto(url: string, options?: GotoOptions): Promise<void>`
353
+ Navigates to a specified URL.
354
+
355
+ - **Parameters**:
356
+ - `url` (string): The URL to navigate to.
357
+ - `options` (GotoOptions): Navigation options, including:
358
+ - `waitUntil`: When to consider navigation finished (`domcontentloaded` or `load`).
359
+ - `timeout`: Maximum time to wait for navigation in milliseconds.
360
+
361
+ #### `async click(selector: string): Promise<void>`
362
+ Simulates a click on an element identified by the selector.
363
+
364
+ - **Parameters**:
365
+ - `selector` (string): CSS selector of the element.
366
+
367
+ #### `async type(selector: string, text: string, options?: { delay?: number }): Promise<void>`
368
+ Types text into an input field.
369
+
370
+ - **Parameters**:
371
+ - `selector` (string): CSS selector of the element.
372
+ - `text` (string): Text to type.
373
+ - `options` (object): Options for typing:
374
+ - `delay`: Time in milliseconds between key presses.
375
+
376
+ #### `async screenshot(options?: { path?: string; fullPage?: boolean; format?: 'png' | 'jpeg' }): Promise<Buffer>`
377
+ Takes a screenshot of the page, with support for PNG and JPEG formats.
378
+
379
+ - **Parameters**:
380
+ - `options` (object): Screenshot options:
381
+ - `path`: File path to save the screenshot.
382
+ - `fullPage`: Capture the entire page.
383
+ - `format`: Image format, either `'png'` (default) or `'jpeg'`.
384
+
385
+ - **Returns**:
386
+ - `Promise<Buffer>`: The screenshot as a buffer.
387
+
388
+ #### `async content(): Promise<string>`
389
+ Gets the HTML content of the page.
390
+
391
+ - **Returns**:
392
+ - `Promise<string>`: The page's HTML.
393
+
394
+ #### `async toCheerio(): Promise<cheerio.CheerioAPI>`
395
+ Converts the current page content to a Cheerio instance for DOM manipulation.
396
+
397
+ - **Returns**:
398
+ - `Promise<cheerio.CheerioAPI>`: A Cheerio API instance.
399
+
400
+ #### `async setViewport(viewport: Viewport): Promise<void>`
401
+ Sets the page's viewport dimensions.
402
+
403
+ - **Parameters**:
404
+ - `viewport` (Viewport): Object with `width` and `height` properties.
405
+
406
+ #### `async setUserAgent(userAgent: string): Promise<void>`
407
+ Overrides the user-agent string.
408
+
409
+ - **Parameters**:
410
+ - `userAgent` (string): The new user-agent string.
411
+
412
+ #### `async setExtraHTTPHeaders(headers: Record<string, string>): Promise<void>`
413
+ Sets additional HTTP headers for requests.
414
+
415
+ - **Parameters**:
416
+ - `headers` (Record<string, string>): Key-value pairs of headers.
417
+
418
+ #### `async waitForResponse(urlPart: string, statusCode?: number, timeout?: number): Promise<any>`
419
+ Waits for a specific network response.
420
+
421
+ - **Parameters**:
422
+ - `urlPart` (string): Part of the URL to match.
423
+ - `statusCode` (number): Expected HTTP status code.
424
+ - `timeout` (number): Maximum wait time in milliseconds.
425
+
426
+ - **Returns**:
427
+ - `Promise<any>`: The response.
428
+
429
+ #### `async setCookies(cookies: Cookie[]): Promise<void>`
430
+ Sets cookies for the page.
431
+
432
+ - **Parameters**:
433
+ - `cookies` (Cookie[]): Array of cookies to set.
434
+
435
+ #### `async getCookies(urls?: string[]): Promise<Cookie[]>`
436
+ Retrieves cookies for the given URLs or all cookies if no URLs are specified.
437
+
438
+ - **Parameters**:
439
+ - `urls` (string[]): Optional array of URLs.
440
+
441
+ - **Returns**:
442
+ - `Promise<Cookie[]>`: Array of cookies.
443
+
444
+ #### `async waitForSelector(selector: string, options?: { timeout?: number }): Promise<void>`
445
+ Waits for an element matching the selector to appear.
446
+
447
+ - **Parameters**:
448
+ - `selector` (string): CSS selector of the element.
449
+ - `options` (object): Options for waiting:
450
+ - `timeout`: Maximum wait time in milliseconds.
451
+
452
+ #### `async elementExists(selector: string): Promise<boolean>`
453
+ Checks if an element exists.
454
+
455
+ - **Parameters**:
456
+ - `selector` (string): CSS selector of the element.
457
+
458
+ - **Returns**:
459
+ - `Promise<boolean>`: `true` if the element exists, `false` otherwise.
460
+
461
+ #### `async getTextContent(selector: string): Promise<string>`
462
+ Gets the text content of an element.
463
+
464
+ - **Parameters**:
465
+ - `selector` (string): CSS selector of the element.
466
+
467
+ - **Returns**:
468
+ - `Promise<string>`: The element's text content.
469
+
470
+ #### `async getHref(selector: string): Promise<string>`
471
+ Gets the `href` attribute of an anchor element.
472
+
473
+ - **Parameters**:
474
+ - `selector` (string): CSS selector of the anchor element.
475
+
476
+ - **Returns**:
477
+ - `Promise<string>`: The `href` value.
478
+
479
+ #### `async reload(options?: GotoOptions): Promise<void>`
480
+ Reloads the current page.
481
+
482
+ - **Parameters**:
483
+ - `options` (GotoOptions): Navigation options, including:
484
+ - `waitUntil`: When to consider reload finished (`domcontentloaded` or `load`).
485
+ - `timeout`: Maximum time to wait for reload in milliseconds.
486
+
487
+ #### `async deleteInput(selector: string): Promise<void>`
488
+ Clears the value of an input field specified by the selector.
489
+
490
+ - **Parameters**:
491
+ - `selector` (string): CSS selector of the input element.
492
+
493
+ #### `async isVisible(selector: string): Promise<boolean>`
494
+ Checks if the element specified by selector is visible (not `display: none` and not `visibility: hidden`).
495
+ The selector should be the root item which can be hidden, otherwise this function could return a false positive.
496
+
497
+ - **Parameters**:
498
+ - `selector` (string): CSS selector of the element.
499
+ - **Returns**:
500
+ - `Promise<boolean>`: `true` if the element is visible, `false` otherwise.
501
+
502
+ #### `async selectOption(dropdownSelector: string, optionSelector: string | string[], options?: SelectOptionOptions): Promise<void>`
503
+ Selects one or more options from a select element or dropdown with intelligent automatic handling.
504
+
505
+ **Key Features**:
506
+ - **Automatic Detection**: Distinguishes between HTML `<select>` elements and custom dropdowns
507
+ - **Smart Trigger Discovery**: For custom dropdowns, automatically finds and clicks triggers using multiple strategies
508
+ - **Virtualized List Support**: Handles large dropdown lists with intelligent scrolling
509
+ - **No Manual Configuration**: No need to specify separate trigger and container selectors
510
+
511
+ - **Parameters**:
512
+ - `dropdownSelector` (string): CSS selector for the select element or dropdown container.
513
+ - `optionSelector` (string | string[]): CSS selector(s) for the option(s) to select. Can be a single selector or array of selectors.
514
+ - `options` (SelectOptionOptions): Optional configuration object with the following properties:
515
+ - `timeout` (number): Maximum wait time in milliseconds. Default: 30000.
516
+ - `force` (boolean): Bypass visibility and disabled checks. Default: false.
517
+ - `waitForOptions` (boolean): Wait for dropdown options to load. Default: true.
518
+ - `maxScrollAttempts` (number): Maximum scroll attempts for virtualized dropdowns. Default: 10.
519
+
520
+ #### `async waitForElementPositionToStabilize(selector: string, timeout?: number, checkInterval?: number, stabilityThreshold?: number, tolerance?: number): Promise<void>`
521
+ Waits for an element's position to stabilize by polling its bounding box. Useful before interactions after scrolling/animations.
522
+
523
+ - **Parameters**:
524
+ - `selector` (string): Target element selector
525
+ - `timeout` (number): Max time to wait. Default: 2000
526
+ - `checkInterval` (number): Polling interval. Default: 100
527
+ - `stabilityThreshold` (number): Consecutive stable checks required. Default: 3
528
+ - `tolerance` (number): Max pixel delta to consider stable. Default: 1
529
+
530
+ - **Usage Examples**:
531
+ ```typescript
532
+ // Regular HTML select element - works directly
533
+ await page.selectOption('select#country', 'option[value="us"]');
534
+
535
+ // Multiple selection in HTML select
536
+ await page.selectOption('select#languages', ['option[value="en"]', 'option[value="es"]']);
537
+
538
+ // Custom dropdown - automatically finds and clicks trigger
539
+ await page.selectOption('#dropdown-menu', '[data-value="premium"]');
540
+
541
+ // Virtualized dropdown - automatically scrolls to find option
542
+ await page.selectOption('#large-dropdown', '[data-item="item-500"]');
543
+
544
+ // With custom configuration
545
+ await page.selectOption(
546
+ '#complex-dropdown',
547
+ '.option[data-category="business"]',
548
+ {
549
+ timeout: 10000,
550
+ maxScrollAttempts: 15
551
+ }
552
+ );
553
+
554
+ // Bootstrap/Material-UI dropdowns work automatically
555
+ await page.selectOption('.MuiSelect-menu', '[data-value="option1"]');
556
+ await page.selectOption('.dropdown-menu', '.dropdown-item[data-value="choice2"]');
557
+ ```
558
+
559
+ - **How Trigger Detection Works**:
560
+ The method automatically detects dropdown triggers using multiple strategies:
561
+ 1. **Accessibility patterns**: `[aria-haspopup]`, `[role="button"]`
562
+ 2. **Common class names**: `.dropdown-trigger`, `.select-trigger`
563
+ 3. **Sibling elements**: Previous sibling of the dropdown container
564
+ 4. **ID pattern matching**: `#menu-id` → `#trigger-id`, `#dropdown-menu` → `#dropdown-trigger`
565
+
566
+ - **Migration from Previous API**:
567
+ ```typescript
568
+ // OLD - Complex API with manual configuration
569
+ await page.selectOption('#trigger', '[data-value="item"]', {
570
+ dropdownSelector: '#menu',
571
+ optionSelector: '.dropdown-item'
572
+ });
573
+
574
+ // NEW - Simplified API with automatic detection
575
+ await page.selectOption('#menu', '[data-value="item"]');
576
+ ```
577
+
578
+ ---
579
+
580
+ ## Utility Functions
581
+
582
+ ### `createCDPRouter`
583
+
584
+ #### `export function createCDPRouter<Context extends CDPCrawlingContext = CDPCrawlingContext, UserData extends Dictionary = GetUserDataFromRequest<Context['request']>>(routes?: RouterRoutes<Context, UserData>): Router<Context>`
585
+
586
+ Creates a custom router for handling crawling routes using CDP.
587
+
588
+ - **Parameters**:
589
+ - `routes` (RouterRoutes<Context, UserData>): Optional routes for defining crawl logic.
590
+
591
+ - **Returns**:
592
+ - `Router<Context>`: A configured router instance.
593
+
594
+ ---
@@ -1 +1 @@
1
- {"version":3,"file":"browser-controller.d.ts","sourceRoot":"","sources":["../src/browser-controller.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,iBAAiB,EAAc,MAAM,SAAS,CAAC;AAGxD,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,qBAAqB,CAAC;AAGhE,OAAO,KAAK,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACpE,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,WAAW,CAAC;AACxC,OAAO,OAAO,MAAM,WAAW,CAAC;AAEhC,MAAM,MAAM,aAAa,GAAG,OAAO,CAAC;AAEpC,MAAM,CAAC,OAAO,OAAO,oBAAqB,SAAQ,iBAAiB,CAC/D,WAAW,EAAE,6CAA6C;AAC1D,oBAAoB,EAAE,gBAAgB;AACtC,YAAY,EAAE,4BAA4B;AAC1C,EAAE,EAAE,0DAA0D;AAC9D,aAAa,CAChB;IACG,OAAO,CAAC,MAAM,CAAC,CAAa;IAC5B,OAAO,CAAC,iBAAiB,CAAK;IAC9B,OAAO,CAAC,qBAAqB,CAAS;IACtC,OAAO,CAAC,SAAS,CAAS;IAEpB,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IA4BjC;;;OAGG;IACH,OAAO,CAAC,yBAAyB;IAiBjC;;;OAGG;YACW,iBAAiB;IAqC/B;;;OAGG;cACa,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC;IAiCvC;;;OAGG;cACa,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAiBtC;;;OAGG;cACa,QAAQ,CAAC,UAAU,SAAI,GAAG,OAAO,CAAC,aAAa,CAAC;YAqDlD,yBAAyB;IA8JvC;;;OAGG;cACa,WAAW,CAAC,IAAI,EAAE,aAAa,EAAE,OAAO,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAQlF;;;OAGG;cACa,WAAW,CAAC,IAAI,EAAE,aAAa,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAQ1D,qBAAqB,CAAC,SAAS,EAAE,MAAM,GAAG,SAAS,EAAE,YAAY,EAAE,GAAG,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC;CAI5G"}
1
+ {"version":3,"file":"browser-controller.d.ts","sourceRoot":"","sources":["../src/browser-controller.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,iBAAiB,EAAc,MAAM,SAAS,CAAC;AAGxD,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,qBAAqB,CAAC;AAGhE,OAAO,KAAK,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACpE,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,WAAW,CAAC;AACxC,OAAO,OAAO,MAAM,WAAW,CAAC;AAEhC,MAAM,MAAM,aAAa,GAAG,OAAO,CAAC;AAEpC,MAAM,CAAC,OAAO,OAAO,oBAAqB,SAAQ,iBAAiB,CAC/D,WAAW,EAAE,6CAA6C;AAC1D,oBAAoB,EAAE,gBAAgB;AACtC,YAAY,EAAE,4BAA4B;AAC1C,EAAE,EAAE,0DAA0D;AAC9D,aAAa,CAChB;IACG,OAAO,CAAC,MAAM,CAAC,CAAa;IAC5B,OAAO,CAAC,iBAAiB,CAAK;IAC9B,OAAO,CAAC,qBAAqB,CAAS;IACtC,OAAO,CAAC,SAAS,CAAS;IAEpB,UAAU,IAAI,OAAO,CAAC,IAAI,CAAC;IA4BjC;;;OAGG;IACH,OAAO,CAAC,yBAAyB;IAiBjC;;;OAGG;YACW,iBAAiB;IAqC/B;;;OAGG;cACa,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC;IAiCvC;;;OAGG;cACa,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAiBtC;;;OAGG;cACa,QAAQ,CAAC,UAAU,SAAI,GAAG,OAAO,CAAC,aAAa,CAAC;YAqDlD,yBAAyB;IAkKvC;;;OAGG;cACa,WAAW,CAAC,IAAI,EAAE,aAAa,EAAE,OAAO,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAQlF;;;OAGG;cACa,WAAW,CAAC,IAAI,EAAE,aAAa,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAQ1D,qBAAqB,CAAC,SAAS,EAAE,MAAM,GAAG,SAAS,EAAE,YAAY,EAAE,GAAG,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC;CAI5G"}
@@ -219,7 +219,8 @@ export default class CDPBrowserController extends BrowserController {
219
219
  // Get fingerprint generator options from launch context (passed from browserPoolOptions)
220
220
  const fingerprintGeneratorOptions = this.launchContext.launchOptions?.fingerprintGeneratorOptions;
221
221
  // Check if we need to regenerate the fingerprint based on OS mismatch
222
- let fingerprint = this.launchContext.fingerprint;
222
+ const { fingerprint: launchFingerprint } = this.launchContext;
223
+ let fingerprint = launchFingerprint;
223
224
  let fp = fingerprint.fingerprint;
224
225
  if (fingerprintGeneratorOptions?.operatingSystems?.length > 0) {
225
226
  const requestedOS = fingerprintGeneratorOptions.operatingSystems[0]?.toLowerCase();
@@ -253,7 +254,7 @@ export default class CDPBrowserController extends BrowserController {
253
254
  this.launchContext.fingerprint = fingerprint;
254
255
  log.info('Fingerprint regenerated successfully', {
255
256
  newPlatform: fp?.navigator?.platform,
256
- newUserAgent: fp?.navigator?.userAgent?.substring(0, 50) + '...',
257
+ newUserAgent: `${(fp?.navigator?.userAgent || '').substring(0, 50)}...`,
257
258
  });
258
259
  }
259
260
  catch (regenerateError) {
@@ -269,7 +270,7 @@ export default class CDPBrowserController extends BrowserController {
269
270
  hasFingerprint: !!fp,
270
271
  hasHeaders: !!fingerprint.headers,
271
272
  navigatorPlatform: fp?.navigator?.platform,
272
- navigatorUserAgent: fp?.navigator?.userAgent?.substring(0, 50) + '...',
273
+ navigatorUserAgent: `${(fp?.navigator?.userAgent || '').substring(0, 50)}...`,
273
274
  screenWidth: fp?.screen?.width,
274
275
  screenHeight: fp?.screen?.height,
275
276
  webglVendor: webgl?.vendorUnmasked?.substring(0, 30),
@@ -302,11 +303,12 @@ export default class CDPBrowserController extends BrowserController {
302
303
  spoofStorage: true,
303
304
  spoofTiming: true,
304
305
  enableDataDomeBypass: false,
305
- // Humanization disabled by default
306
+ webRtcPolicy: 'spoof',
307
+ // Safe defaults for humanized interaction patterns
306
308
  humanization: {
307
- mouse: false,
308
- keyboard: false,
309
- scroll: false,
309
+ mouse: true,
310
+ keyboard: true,
311
+ scroll: true,
310
312
  },
311
313
  };
312
314
  // Determine effective useFingerprintDefaults value
@@ -331,6 +333,7 @@ export default class CDPBrowserController extends BrowserController {
331
333
  },
332
334
  };
333
335
  await CdpFingerprintInjector.injectFingerprint(page, fingerprint, mergedOptions);
336
+ page.setHumanizationOptions(mergedOptions.humanization);
334
337
  log.info('Fingerprint injected successfully', {
335
338
  platform: mergedOptions.platform,
336
339
  isApify,