jumpy-lion 0.1.6-beta.3 → 0.1.6-beta.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. package/README.md +1 -607
  2. package/browser/fonts/macos-bundle/40-macos-aliases.conf +497 -0
  3. package/browser/fonts/macos-bundle/README.md +24 -0
  4. package/browser/fonts/macos-bundle/install.sh +83 -0
  5. package/dist/browser-controller.d.ts +21 -3
  6. package/dist/browser-controller.d.ts.map +1 -1
  7. package/dist/browser-controller.js +75 -231
  8. package/dist/browser-controller.js.map +1 -1
  9. package/dist/browser-plugin.d.ts +75 -104
  10. package/dist/browser-plugin.d.ts.map +1 -1
  11. package/dist/browser-plugin.js +241 -280
  12. package/dist/browser-plugin.js.map +1 -1
  13. package/dist/browser-process/align-fingerprint-config.d.ts +40 -0
  14. package/dist/browser-process/align-fingerprint-config.d.ts.map +1 -0
  15. package/dist/browser-process/align-fingerprint-config.js +79 -0
  16. package/dist/browser-process/align-fingerprint-config.js.map +1 -0
  17. package/dist/browser-process/anti-detect-config.d.ts +130 -1
  18. package/dist/browser-process/anti-detect-config.d.ts.map +1 -1
  19. package/dist/browser-process/anti-detect-config.js +353 -110
  20. package/dist/browser-process/anti-detect-config.js.map +1 -1
  21. package/dist/browser-process/browser.d.ts +37 -89
  22. package/dist/browser-process/browser.d.ts.map +1 -1
  23. package/dist/browser-process/browser.js +389 -85
  24. package/dist/browser-process/browser.js.map +1 -1
  25. package/dist/browser-process/fingerprint-config.d.ts +103 -0
  26. package/dist/browser-process/fingerprint-config.d.ts.map +1 -0
  27. package/dist/browser-process/fingerprint-config.js +245 -0
  28. package/dist/browser-process/fingerprint-config.js.map +1 -0
  29. package/dist/browser-process/get-chrome-executable.d.ts +6 -0
  30. package/dist/browser-process/get-chrome-executable.d.ts.map +1 -1
  31. package/dist/browser-process/get-chrome-executable.js +23 -0
  32. package/dist/browser-process/get-chrome-executable.js.map +1 -1
  33. package/dist/browser-process/gpu-family-profiles.d.ts +53 -0
  34. package/dist/browser-process/gpu-family-profiles.d.ts.map +1 -0
  35. package/dist/browser-process/gpu-family-profiles.js +395 -0
  36. package/dist/browser-process/gpu-family-profiles.js.map +1 -0
  37. package/dist/browser-process/index.d.ts +1 -1
  38. package/dist/browser-process/index.d.ts.map +1 -1
  39. package/dist/browser-process/index.js +1 -1
  40. package/dist/browser-process/index.js.map +1 -1
  41. package/dist/browser-process/launch-options.d.ts +195 -0
  42. package/dist/browser-process/launch-options.d.ts.map +1 -0
  43. package/dist/browser-process/launch-options.js +2 -0
  44. package/dist/browser-process/launch-options.js.map +1 -0
  45. package/dist/browser-process/port-allocator.d.ts +46 -0
  46. package/dist/browser-process/port-allocator.d.ts.map +1 -0
  47. package/dist/browser-process/port-allocator.js +171 -0
  48. package/dist/browser-process/port-allocator.js.map +1 -0
  49. package/dist/browser-process/process.d.ts +35 -0
  50. package/dist/browser-process/process.d.ts.map +1 -1
  51. package/dist/browser-process/process.js +110 -0
  52. package/dist/browser-process/process.js.map +1 -1
  53. package/dist/browser-process/signal-cleanup.d.ts +37 -0
  54. package/dist/browser-process/signal-cleanup.d.ts.map +1 -0
  55. package/dist/browser-process/signal-cleanup.js +93 -0
  56. package/dist/browser-process/signal-cleanup.js.map +1 -0
  57. package/dist/connection/cdp-reconnection.d.ts +61 -0
  58. package/dist/connection/cdp-reconnection.d.ts.map +1 -0
  59. package/dist/connection/cdp-reconnection.js +98 -0
  60. package/dist/connection/cdp-reconnection.js.map +1 -0
  61. package/dist/connection/page-session.d.ts +109 -0
  62. package/dist/connection/page-session.d.ts.map +1 -0
  63. package/dist/connection/page-session.js +257 -0
  64. package/dist/connection/page-session.js.map +1 -0
  65. package/dist/crawler.d.ts +30 -1
  66. package/dist/crawler.d.ts.map +1 -1
  67. package/dist/crawler.js +97 -7
  68. package/dist/crawler.js.map +1 -1
  69. package/dist/fingerprinting/fingerprint-injector.d.ts +6 -116
  70. package/dist/fingerprinting/fingerprint-injector.d.ts.map +1 -1
  71. package/dist/fingerprinting/fingerprint-injector.js +82 -531
  72. package/dist/fingerprinting/fingerprint-injector.js.map +1 -1
  73. package/dist/fingerprinting/fingerprint-overrides/index.d.ts +6 -2
  74. package/dist/fingerprinting/fingerprint-overrides/index.d.ts.map +1 -1
  75. package/dist/fingerprinting/fingerprint-overrides/index.js +6 -2
  76. package/dist/fingerprinting/fingerprint-overrides/index.js.map +1 -1
  77. package/dist/fingerprinting/fingerprint-overrides/navigator-override.d.ts +18 -0
  78. package/dist/fingerprinting/fingerprint-overrides/navigator-override.d.ts.map +1 -0
  79. package/dist/fingerprinting/fingerprint-overrides/navigator-override.js +136 -0
  80. package/dist/fingerprinting/fingerprint-overrides/navigator-override.js.map +1 -0
  81. package/dist/fingerprinting/fingerprint-overrides/override.d.ts +137 -0
  82. package/dist/fingerprinting/fingerprint-overrides/override.d.ts.map +1 -0
  83. package/dist/fingerprinting/fingerprint-overrides/override.js +14 -0
  84. package/dist/fingerprinting/fingerprint-overrides/override.js.map +1 -0
  85. package/dist/fingerprinting/fingerprint-overrides/platform-consistency.d.ts.map +1 -1
  86. package/dist/fingerprinting/fingerprint-overrides/platform-consistency.js +42 -11
  87. package/dist/fingerprinting/fingerprint-overrides/platform-consistency.js.map +1 -1
  88. package/dist/fingerprinting/fingerprint-overrides/registry.d.ts +27 -0
  89. package/dist/fingerprinting/fingerprint-overrides/registry.d.ts.map +1 -0
  90. package/dist/fingerprinting/fingerprint-overrides/registry.js +285 -0
  91. package/dist/fingerprinting/fingerprint-overrides/registry.js.map +1 -0
  92. package/dist/fingerprinting/fingerprint-overrides/screen-override.d.ts +16 -0
  93. package/dist/fingerprinting/fingerprint-overrides/screen-override.d.ts.map +1 -0
  94. package/dist/fingerprinting/fingerprint-overrides/screen-override.js +175 -0
  95. package/dist/fingerprinting/fingerprint-overrides/screen-override.js.map +1 -0
  96. package/dist/fingerprinting/fingerprint-overrides/stealth-script.d.ts.map +1 -1
  97. package/dist/fingerprinting/fingerprint-overrides/stealth-script.js +72 -6
  98. package/dist/fingerprinting/fingerprint-overrides/stealth-script.js.map +1 -1
  99. package/dist/fingerprinting/fingerprint-overrides/webgl-spoofing.d.ts.map +1 -1
  100. package/dist/fingerprinting/fingerprint-overrides/webgl-spoofing.js +23 -4
  101. package/dist/fingerprinting/fingerprint-overrides/webgl-spoofing.js.map +1 -1
  102. package/dist/fingerprinting/injection-planner.d.ts +178 -0
  103. package/dist/fingerprinting/injection-planner.d.ts.map +1 -0
  104. package/dist/fingerprinting/injection-planner.js +376 -0
  105. package/dist/fingerprinting/injection-planner.js.map +1 -0
  106. package/dist/fingerprinting/locale-resolver.d.ts +7 -0
  107. package/dist/fingerprinting/locale-resolver.d.ts.map +1 -1
  108. package/dist/fingerprinting/locale-resolver.js +24 -1
  109. package/dist/fingerprinting/locale-resolver.js.map +1 -1
  110. package/dist/fingerprinting/non-apify-fingerprint-generator.d.ts +23 -11
  111. package/dist/fingerprinting/non-apify-fingerprint-generator.d.ts.map +1 -1
  112. package/dist/fingerprinting/non-apify-fingerprint-generator.js +53 -15
  113. package/dist/fingerprinting/non-apify-fingerprint-generator.js.map +1 -1
  114. package/dist/fingerprinting/non-apify-profiles.json +52186 -0
  115. package/dist/fingerprinting/os-consistency.d.ts +31 -0
  116. package/dist/fingerprinting/os-consistency.d.ts.map +1 -0
  117. package/dist/fingerprinting/os-consistency.js +50 -0
  118. package/dist/fingerprinting/os-consistency.js.map +1 -0
  119. package/dist/fingerprinting/profile-quality.d.ts +24 -0
  120. package/dist/fingerprinting/profile-quality.d.ts.map +1 -0
  121. package/dist/fingerprinting/profile-quality.js +165 -0
  122. package/dist/fingerprinting/profile-quality.js.map +1 -0
  123. package/dist/fingerprinting/profile-selector.d.ts +101 -0
  124. package/dist/fingerprinting/profile-selector.d.ts.map +1 -0
  125. package/dist/fingerprinting/profile-selector.js +156 -0
  126. package/dist/fingerprinting/profile-selector.js.map +1 -0
  127. package/dist/fingerprinting/ua-alignment.d.ts +51 -0
  128. package/dist/fingerprinting/ua-alignment.d.ts.map +1 -0
  129. package/dist/fingerprinting/ua-alignment.js +146 -0
  130. package/dist/fingerprinting/ua-alignment.js.map +1 -0
  131. package/dist/index.d.ts +5 -0
  132. package/dist/index.d.ts.map +1 -1
  133. package/dist/index.js +6 -0
  134. package/dist/index.js.map +1 -1
  135. package/dist/input/dropdown-selector.d.ts +74 -0
  136. package/dist/input/dropdown-selector.d.ts.map +1 -0
  137. package/dist/input/dropdown-selector.js +306 -0
  138. package/dist/input/dropdown-selector.js.map +1 -0
  139. package/dist/input/element-target.d.ts +117 -0
  140. package/dist/input/element-target.d.ts.map +1 -0
  141. package/dist/input/element-target.js +383 -0
  142. package/dist/input/element-target.js.map +1 -0
  143. package/dist/input/input-emulator.d.ts +85 -0
  144. package/dist/input/input-emulator.d.ts.map +1 -0
  145. package/dist/input/input-emulator.js +319 -0
  146. package/dist/input/input-emulator.js.map +1 -0
  147. package/dist/input/input-transport.d.ts +60 -0
  148. package/dist/input/input-transport.d.ts.map +1 -0
  149. package/dist/input/input-transport.js +28 -0
  150. package/dist/input/input-transport.js.map +1 -0
  151. package/dist/input/recording-transport.d.ts +32 -0
  152. package/dist/input/recording-transport.d.ts.map +1 -0
  153. package/dist/input/recording-transport.js +43 -0
  154. package/dist/input/recording-transport.js.map +1 -0
  155. package/dist/navigation/page-navigation.d.ts +67 -0
  156. package/dist/navigation/page-navigation.d.ts.map +1 -0
  157. package/dist/navigation/page-navigation.js +107 -0
  158. package/dist/navigation/page-navigation.js.map +1 -0
  159. package/dist/network/network-watch.d.ts +72 -0
  160. package/dist/network/network-watch.d.ts.map +1 -0
  161. package/dist/network/network-watch.js +143 -0
  162. package/dist/network/network-watch.js.map +1 -0
  163. package/dist/page.d.ts +90 -103
  164. package/dist/page.d.ts.map +1 -1
  165. package/dist/page.js +184 -1117
  166. package/dist/page.js.map +1 -1
  167. package/dist/session/inject-hook.d.ts +24 -0
  168. package/dist/session/inject-hook.d.ts.map +1 -0
  169. package/dist/session/inject-hook.js +80 -0
  170. package/dist/session/inject-hook.js.map +1 -0
  171. package/dist/session/save.d.ts +21 -0
  172. package/dist/session/save.d.ts.map +1 -0
  173. package/dist/session/save.js +163 -0
  174. package/dist/session/save.js.map +1 -0
  175. package/dist/session/tar-data-dir.d.ts +13 -0
  176. package/dist/session/tar-data-dir.d.ts.map +1 -0
  177. package/dist/session/tar-data-dir.js +107 -0
  178. package/dist/session/tar-data-dir.js.map +1 -0
  179. package/dist/session/types.d.ts +80 -0
  180. package/dist/session/types.d.ts.map +1 -0
  181. package/dist/session/types.js +29 -0
  182. package/dist/session/types.js.map +1 -0
  183. package/dist/session-profile.d.ts +79 -0
  184. package/dist/session-profile.d.ts.map +1 -0
  185. package/dist/session-profile.js +124 -0
  186. package/dist/session-profile.js.map +1 -0
  187. package/dist/tsconfig.build.tsbuildinfo +1 -1
  188. package/package.json +16 -9
  189. package/scripts/postinstall.cjs +58 -0
  190. package/dist/fingerprinting/custom-fingerprint-injector.d.ts +0 -87
  191. package/dist/fingerprinting/custom-fingerprint-injector.d.ts.map +0 -1
  192. package/dist/fingerprinting/custom-fingerprint-injector.js +0 -342
  193. package/dist/fingerprinting/custom-fingerprint-injector.js.map +0 -1
  194. package/dist/launcher-wrap.d.ts +0 -10
  195. package/dist/launcher-wrap.d.ts.map +0 -1
  196. package/dist/launcher-wrap.js +0 -11
  197. package/dist/launcher-wrap.js.map +0 -1
package/README.md CHANGED
@@ -1,607 +1 @@
1
- # Crawler Documentation
2
-
3
- ## Table of Contents
4
-
5
- - [Overview](#overview)
6
- - [NPM Package](#npm-package)
7
- - [Usage](#usage)
8
- - [Example Project](#example-project)
9
- - [Internal Guide](#internal-guide)
10
- - [Examples and Configuration](#examples-and-configuration)
11
- - [Advanced Fingerprints Usage](#advanced-fingerprints-usage)
12
- - [Syncing BrowserPool and launchOptions fingerprints](#syncing-browserpool-and-launchoptions-fingerprints)
13
- - [Stealth Consistency and Network Policies](#stealth-consistency-and-network-policies)
14
- - [Configurable Fingerprint Options](#configurable-fingerprint-options)
15
- - [Usage](#usage-1)
16
- - [Available Options](#available-options)
17
- - [Core Stealth Options](#core-stealth-options)
18
- - [Fingerprint Spoofing](#fingerprint-spoofing)
19
- - [Platform Configuration](#platform-configuration)
20
- - [Additional Features](#additional-features)
21
- - [Default Behavior](#default-behavior)
22
- - [Best Practices](#best-practices)
23
- - [Performance Considerations](#performance-considerations)
24
- - [Launch Options for Network and Persistence](#launch-options-for-network-and-persistence)
25
- - [Crawler Class Documentation](#crawler-class-documentation)
26
- - [Constructor](#constructor)
27
- - [CdpPage Class Documentation](#cdppage-class-documentation)
28
- - [Constructor](#constructor-1)
29
- - [Static Methods](#static-methods)
30
- - [Public Methods](#public-methods)
31
- - [Utility Functions](#utility-functions)
32
- - [createCDPRouter](#createcdprouter)
33
-
34
- ## Overview
35
-
36
- The `Crawler` class is a custom implementation of the `BrowserCrawler` from Crawlee, designed to utilize the Chrome DevTools Protocol (CDP) for advanced antiblocking capabilities.
37
-
38
- ## NPM Package
39
-
40
- The `jumpy-lion` is official cdp crawler package. See it [here](https://www.npmjs.com/package/jumpy-lion).
41
-
42
- ### Installation
43
-
44
- ```bash
45
- npm install jumpy-lion
46
- ```
47
-
48
- A postinstall hook pulls the matching anti-detect Chromium build from a public
49
- Apify key-value store — no GitHub token, no Apify token. Works the same on
50
- your workstation, CI, and the Apify platform. If the download fails for any
51
- reason (offline, proxy, etc.) the install still succeeds; the crawler falls
52
- back to the system Chrome on `PATH`. See `browser/BUILD.md` for env-var
53
- overrides and the maintainer release flow.
54
-
55
- ---
56
- ---
57
-
58
- ## Usage
59
-
60
- ### Example Project
61
-
62
- Refer to this [GitHub repository](https://github.com/apify-projects/cdp-crawler-example) for a complete example of using the `Crawler` class.
63
-
64
- ### Internal Guide
65
-
66
- Check out the [CDP Crawler internal guide](https://www.notion.so/apify/CDP-Crawler-internal-guide-183f39950a2280be81d7c86dc048a47a?pvs=4) for tutorial.
67
-
68
- ### Examples and Configuration
69
-
70
- For detailed examples and configuration patterns, see the [Examples README](./examples/README.md). The examples include:
71
-
72
- - **Basic Configuration**: Simple fingerprint setup for common use cases
73
- - **Comprehensive Configuration**: Full feature setup with all spoofing options
74
- - **Platform-Specific Configurations**: macOS, Windows, and Linux targeting
75
- - **Performance-Focused Configuration**: Optimized settings for speed
76
- - **Minimal Configuration**: Using intelligent defaults
77
-
78
- The examples demonstrate real-world usage patterns and best practices for different scenarios.
79
-
80
- ### Advanced Fingerprints usage
81
-
82
- To use advanced fingerprints, you need to set the `useExperimentalFingerprints` option to `true` in the `launchContext.launchOptions` of the `Crawler` constructor.
83
-
84
- ```typescript
85
- const crawler = new Crawler({
86
- launchContext: {
87
- launchOptions: {
88
- useExperimentalFingerprints: true,
89
- }
90
- },
91
- });
92
- ```
93
-
94
- ---
95
-
96
- ### Syncing BrowserPool and launchOptions fingerprints
97
-
98
- **Always keep the operating system in sync between BrowserPool fingerprints and `launchOptions.fingerprintOptions`.** A mismatch can lead to inconsistent signals (for example `navigator.platform`, User-Agent, WebGL, fonts) and reduce antibot effectiveness.
99
-
100
- - **launchOptions side**: Set `launchContext.launchOptions.fingerprintOptions.platform` to the desired platform string.
101
- - **BrowserPool side**: When `browserPoolOptions.useFingerprints` is `true`, set `browserPoolOptions.fingerprintOptions.fingerprintGeneratorOptions.operatingSystems` to the corresponding OS.
102
-
103
- Mapping guidance:
104
- - `platform: 'Win32'` ↔ `operatingSystems: ['windows']`
105
- - `platform: 'MacIntel'` ↔ `operatingSystems: ['macos']`
106
- - `platform: 'Linux x86_64'` ↔ `operatingSystems: ['linux']`
107
-
108
- Example:
109
-
110
- ```typescript
111
- const crawler = new Crawler({
112
- launchContext: {
113
- launchOptions: {
114
- useExperimentalFingerprints: true,
115
- fingerprintOptions: {
116
- platform: 'Win32', // Keep this in sync with BrowserPool OS
117
- },
118
- },
119
- },
120
- browserPoolOptions: {
121
- useFingerprints: true,
122
- fingerprintOptions: {
123
- fingerprintGeneratorOptions: {
124
- browsers: ['chrome'],
125
- operatingSystems: ['windows'], // Matches platform: 'Win32'
126
- devices: ['desktop'],
127
- },
128
- },
129
- },
130
- });
131
- ```
132
-
133
- Note: This configuration surface will be unified later. We are currently testing our custom fingerprint injector so it works even with the BrowserPool built‑in fingerprints turned off. If you prefer, you can rely solely on the custom injector by setting `browserPoolOptions.useFingerprints: false` and keeping `launchOptions.useExperimentalFingerprints: true`.
134
-
135
- ---
136
-
137
- ### Stealth Consistency and Network Policies
138
-
139
- Recent stealth hardening adds explicit consistency and policy controls:
140
-
141
- - **UA/Binary version alignment**: the injector aligns advertised `Chrome/x.y.z.w` with the actual running Chrome binary version to reduce fingerprint drift.
142
- - **WebRTC policy control**: set `fingerprintOptions.webRtcPolicy` to:
143
- - `'spoof'` (default): redacts/normalizes WebRTC leak surfaces.
144
- - `'disable'`: removes WebRTC APIs from page context.
145
- - **DNS hardening controls**: configure DoH and secure DNS through launch options (`dnsOverHttpsServer`, `secureDnsMode`).
146
- - **WebRTC transport policy flag**: configure `webrtcIpHandlingPolicy` at browser launch level.
147
- - **Persistent profile mode**: set `userDataDir` (+ optional `keepUserDataDir`) to reuse browser state across runs.
148
-
149
- ## Configurable Fingerprint Options
150
-
151
- The CDP crawler supports configurable fingerprint options that can be passed through the crawler options. This allows you to customize the fingerprint spoofing behavior for different use cases.
152
-
153
- ### Usage
154
-
155
- You can configure fingerprint options by adding them to the `launchContext.launchOptions.fingerprintOptions` in your crawler configuration:
156
-
157
- ```typescript
158
- import { Crawler } from 'cdp-crawler';
159
-
160
- const crawler = new Crawler({
161
- launchContext: {
162
- launchOptions: {
163
- fingerprintOptions: {
164
- // Enable advanced stealth features
165
- enableAdvancedStealth: true,
166
-
167
- // Bypass Runtime.enable detection
168
- bypassRuntimeEnable: true,
169
-
170
- // Humanize mouse interactions
171
- humanizeInteractions: true,
172
-
173
- // Spoof WebGL fingerprinting
174
- spoofWebGL: true,
175
-
176
- // Spoof audio context fingerprinting
177
- spoofAudioContext: true,
178
-
179
- // Add variations to client rect measurements
180
- spoofClientRects: true,
181
-
182
- // Mask automation flags
183
- maskAutomationFlags: true,
184
-
185
- // Use fingerprint-generator defaults when available
186
- useFingerprintDefaults: true,
187
-
188
- // Platform to spoof (defaults to Win32 for better evasion)
189
- platform: 'Win32', // 'Win32' | 'MacIntel' | 'Linux x86_64'
190
-
191
- // Spoof font measurements
192
- spoofFonts: true,
193
-
194
- // Spoof performance timing
195
- spoofPerformance: true,
196
-
197
- // Spoof locale settings
198
- spoofLocale: true,
199
-
200
- // Detect timezone from proxy (useful with residential proxies)
201
- detectTimezone: true,
202
-
203
- // WebRTC policy: 'spoof' (default) or 'disable'
204
- webRtcPolicy: 'spoof',
205
- }
206
- }
207
- },
208
- // ... other crawler options
209
- });
210
- ```
211
-
212
- ### Available Options
213
-
214
- #### Core Stealth Options
215
-
216
- - **`enableAdvancedStealth`** (boolean): Enables advanced stealth features including WebGPU spoofing and platform consistency
217
- - **`bypassRuntimeEnable`** (boolean): Prevents CDP detection through Runtime.enable bypass techniques
218
- - **`humanizeInteractions`** (boolean): Generates human-like mouse movements using bezier curves
219
-
220
- #### Fingerprint Spoofing
221
-
222
- - **`spoofWebGL`** (boolean): Spoofs WebGL fingerprinting by modifying GPU adapter information
223
- - **`spoofAudioContext`** (boolean): Adds noise to audio processing to prevent audio fingerprinting
224
- - **`spoofClientRects`** (boolean): Adds small variations to getBoundingClientRect results
225
- - **`spoofFonts`** (boolean): Hides platform-specific fonts and adds font measurement variations
226
- - **`spoofPerformance`** (boolean): Modifies timing characteristics to match the target platform
227
- - **`spoofLocale`** (boolean): Ensures consistent locale formatting across all browser properties
228
-
229
- #### Platform Configuration
230
-
231
- - **`platform`** (string): Target platform to spoof. Options: `'Win32'`, `'MacIntel'`, `'Linux x86_64'`
232
- - **`useFingerprintDefaults`** (boolean): Use hardcoded defaults instead of fingerprint-generator values. When `false`, uses generated fingerprint values; when `true` (default), uses hardcoded defaults
233
-
234
- #### Additional Features
235
-
236
- - **`maskAutomationFlags`** (boolean): Masks automation-related flags in the browser
237
- - **`detectTimezone`** (boolean): Automatically detect timezone from proxy IP (useful with residential proxies)
238
- - **`webRtcPolicy`** (`'spoof' | 'disable'`): Controls whether WebRTC is spoofed or fully removed from page APIs
239
-
240
- ### Default Behavior
241
-
242
- When no fingerprint options are provided, the crawler uses intelligent defaults:
243
-
244
- - **On Apify**: Uses Apify-recommended settings optimized for the Apify environment
245
- - **On other platforms**: Uses a comprehensive set of stealth features with Windows platform spoofing
246
- - **Humanization defaults**: mouse, keyboard, and scroll humanization are enabled with safe defaults
247
- - **UA consistency**: claimed UA Chrome version is automatically aligned to the running Chrome binary
248
-
249
- ### Best Practices
250
-
251
- 1. **Use `platform: 'Win32'`** for better evasion on Linux servers (like Apify)
252
- 2. **Enable `detectTimezone: true`** when using residential proxies
253
- 3. **Use `useFingerprintDefaults: false`** to leverage fingerprint-generator's realistic values
254
- 4. **Enable `bypassRuntimeEnable: true`** for sites that detect automation
255
- 5. **Use `enableAdvancedStealth: true`** for maximum protection against fingerprinting
256
- 6. **Keep OS settings in sync** between `launchOptions.fingerprintOptions.platform` and `browserPoolOptions.fingerprintOptions.fingerprintGeneratorOptions.operatingSystems`
257
- 7. **Use `webRtcPolicy: 'disable'`** for strictest leak prevention, or `'spoof'` for compatibility-sensitive targets
258
-
259
- ### Performance Considerations
260
-
261
- - More fingerprint options enabled = slightly higher CPU usage
262
- - WebGPU spoofing may add a small delay to page loads
263
- - Humanized interactions add realistic delays to mouse movements
264
-
265
- The fingerprint options are designed to provide maximum protection while maintaining good performance for web scraping tasks.
266
-
267
- For more configuration examples and patterns, see the [Examples README](./examples/README.md).
268
-
269
- ---
270
-
271
- ## Launch Options for Network and Persistence
272
-
273
- The following options are configured in `launchContext.launchOptions`:
274
-
275
- - **`dnsOverHttpsServer`** (string): DoH endpoint template, for example `https://cloudflare-dns.com/dns-query`
276
- - **`secureDnsMode`** (`'off' | 'automatic' | 'secure'`): Chromium secure DNS mode
277
- - **`webrtcIpHandlingPolicy`** (`'default' | 'default_public_interface_only' | 'default_public_and_private_interfaces' | 'disable_non_proxied_udp'`): Browser-level WebRTC IP handling policy
278
- - **`userDataDir`** (string): Reuse a specific Chrome profile directory across runs
279
- - **`keepUserDataDir`** (boolean): Keep/cleanup profile directory on close (defaults to keep custom dir, cleanup temp dir)
280
-
281
- Example:
282
-
283
- ```typescript
284
- const crawler = new Crawler({
285
- launchContext: {
286
- launchOptions: {
287
- dnsOverHttpsServer: 'https://cloudflare-dns.com/dns-query',
288
- secureDnsMode: 'secure',
289
- webrtcIpHandlingPolicy: 'disable_non_proxied_udp',
290
- userDataDir: './state/chrome-profile',
291
- keepUserDataDir: true,
292
- fingerprintOptions: {
293
- webRtcPolicy: 'disable',
294
- },
295
- },
296
- },
297
- });
298
- ```
299
-
300
- ---
301
-
302
- ## `Crawler` Class Documentation
303
-
304
- ### Constructor
305
-
306
- #### `constructor(options: BrowserCrawlerOptions = {}, override readonly config = Configuration.getGlobalConfig())`
307
-
308
- Initializes the `Crawler` instance with default and provided options.
309
-
310
- - **Parameters**:
311
-
312
- - `options` (BrowserCrawlerOptions): Configuration options for the crawler.
313
- - `launchContext`: Specifies browser launch parameters.
314
- - Default: `{}`
315
- - `headless`: Runs the browser in headless mode.
316
- - Default: `false`
317
- - `browserPoolOptions`: Configuration for managing browser instances.
318
- - `config` (Configuration): Global Crawlee configuration.
319
- - Default: `Configuration.getGlobalConfig()`
320
-
321
- - **Default Behavior**:
322
- - Throws an error if `launchContext.proxyUrl` is provided. Use `proxyConfiguration` instead.
323
- - Throws an error if `browserPoolOptions.browserPlugins` is set. Use `launchContext.launcher` instead.
324
-
325
- ---
326
-
327
- ## `CdpPage` Class Documentation
328
-
329
- ### Constructor
330
-
331
- #### `constructor(client: CDP.Client)`
332
-
333
- Initializes the `CdpPage` instance with a CDP client.
334
-
335
- - **Parameters**:
336
-
337
- - `client` (CDP.Client): The Chrome DevTools Protocol client.
338
-
339
- - **Emitted Events**:
340
- - `PAGE_CREATED`: Triggered upon the creation of the page.
341
-
342
- ### Static Methods
343
-
344
- #### `static async create(client: CDP.Client): Promise<CdpPage>`
345
-
346
- Creates and initializes a new `CdpPage` instance.
347
-
348
- - **Parameters**:
349
-
350
- - `client` (CDP.Client): The CDP client.
351
-
352
- - **Returns**:
353
- - `Promise<CdpPage>`: A promise resolving to the new `CdpPage` instance.
354
-
355
- ---
356
-
357
- ### Public Methods
358
-
359
- #### `async url(): Promise<string>`
360
- Gets the current URL of the page.
361
-
362
- - **Returns**:
363
- - `Promise<string>`: The current URL.
364
-
365
- #### `async goto(url: string, options?: GotoOptions): Promise<void>`
366
- Navigates to a specified URL.
367
-
368
- - **Parameters**:
369
- - `url` (string): The URL to navigate to.
370
- - `options` (GotoOptions): Navigation options, including:
371
- - `waitUntil`: When to consider navigation finished (`domcontentloaded` or `load`).
372
- - `timeout`: Maximum time to wait for navigation in milliseconds.
373
-
374
- #### `async click(selector: string): Promise<void>`
375
- Simulates a click on an element identified by the selector.
376
-
377
- - **Parameters**:
378
- - `selector` (string): CSS selector of the element.
379
-
380
- #### `async type(selector: string, text: string, options?: { delay?: number }): Promise<void>`
381
- Types text into an input field.
382
-
383
- - **Parameters**:
384
- - `selector` (string): CSS selector of the element.
385
- - `text` (string): Text to type.
386
- - `options` (object): Options for typing:
387
- - `delay`: Time in milliseconds between key presses.
388
-
389
- #### `async screenshot(options?: { path?: string; fullPage?: boolean; format?: 'png' | 'jpeg' }): Promise<Buffer>`
390
- Takes a screenshot of the page, with support for PNG and JPEG formats.
391
-
392
- - **Parameters**:
393
- - `options` (object): Screenshot options:
394
- - `path`: File path to save the screenshot.
395
- - `fullPage`: Capture the entire page.
396
- - `format`: Image format, either `'png'` (default) or `'jpeg'`.
397
-
398
- - **Returns**:
399
- - `Promise<Buffer>`: The screenshot as a buffer.
400
-
401
- #### `async content(): Promise<string>`
402
- Gets the HTML content of the page.
403
-
404
- - **Returns**:
405
- - `Promise<string>`: The page's HTML.
406
-
407
- #### `async toCheerio(): Promise<cheerio.CheerioAPI>`
408
- Converts the current page content to a Cheerio instance for DOM manipulation.
409
-
410
- - **Returns**:
411
- - `Promise<cheerio.CheerioAPI>`: A Cheerio API instance.
412
-
413
- #### `async setViewport(viewport: Viewport): Promise<void>`
414
- Sets the page's viewport dimensions.
415
-
416
- - **Parameters**:
417
- - `viewport` (Viewport): Object with `width` and `height` properties.
418
-
419
- #### `async setUserAgent(userAgent: string): Promise<void>`
420
- Overrides the user-agent string.
421
-
422
- - **Parameters**:
423
- - `userAgent` (string): The new user-agent string.
424
-
425
- #### `async setExtraHTTPHeaders(headers: Record<string, string>): Promise<void>`
426
- Sets additional HTTP headers for requests.
427
-
428
- - **Parameters**:
429
- - `headers` (Record<string, string>): Key-value pairs of headers.
430
-
431
- #### `async waitForResponse(urlPart: string, statusCode?: number, timeout?: number): Promise<any>`
432
- Waits for a specific network response.
433
-
434
- - **Parameters**:
435
- - `urlPart` (string): Part of the URL to match.
436
- - `statusCode` (number): Expected HTTP status code.
437
- - `timeout` (number): Maximum wait time in milliseconds.
438
-
439
- - **Returns**:
440
- - `Promise<any>`: The response.
441
-
442
- #### `async setCookies(cookies: Cookie[]): Promise<void>`
443
- Sets cookies for the page.
444
-
445
- - **Parameters**:
446
- - `cookies` (Cookie[]): Array of cookies to set.
447
-
448
- #### `async getCookies(urls?: string[]): Promise<Cookie[]>`
449
- Retrieves cookies for the given URLs or all cookies if no URLs are specified.
450
-
451
- - **Parameters**:
452
- - `urls` (string[]): Optional array of URLs.
453
-
454
- - **Returns**:
455
- - `Promise<Cookie[]>`: Array of cookies.
456
-
457
- #### `async waitForSelector(selector: string, options?: { timeout?: number }): Promise<void>`
458
- Waits for an element matching the selector to appear.
459
-
460
- - **Parameters**:
461
- - `selector` (string): CSS selector of the element.
462
- - `options` (object): Options for waiting:
463
- - `timeout`: Maximum wait time in milliseconds.
464
-
465
- #### `async elementExists(selector: string): Promise<boolean>`
466
- Checks if an element exists.
467
-
468
- - **Parameters**:
469
- - `selector` (string): CSS selector of the element.
470
-
471
- - **Returns**:
472
- - `Promise<boolean>`: `true` if the element exists, `false` otherwise.
473
-
474
- #### `async getTextContent(selector: string): Promise<string>`
475
- Gets the text content of an element.
476
-
477
- - **Parameters**:
478
- - `selector` (string): CSS selector of the element.
479
-
480
- - **Returns**:
481
- - `Promise<string>`: The element's text content.
482
-
483
- #### `async getHref(selector: string): Promise<string>`
484
- Gets the `href` attribute of an anchor element.
485
-
486
- - **Parameters**:
487
- - `selector` (string): CSS selector of the anchor element.
488
-
489
- - **Returns**:
490
- - `Promise<string>`: The `href` value.
491
-
492
- #### `async reload(options?: GotoOptions): Promise<void>`
493
- Reloads the current page.
494
-
495
- - **Parameters**:
496
- - `options` (GotoOptions): Navigation options, including:
497
- - `waitUntil`: When to consider reload finished (`domcontentloaded` or `load`).
498
- - `timeout`: Maximum time to wait for reload in milliseconds.
499
-
500
- #### `async deleteInput(selector: string): Promise<void>`
501
- Clears the value of an input field specified by the selector.
502
-
503
- - **Parameters**:
504
- - `selector` (string): CSS selector of the input element.
505
-
506
- #### `async isVisible(selector: string): Promise<boolean>`
507
- Checks if the element specified by selector is visible (not `display: none` and not `visibility: hidden`).
508
- The selector should be the root item which can be hidden, otherwise this function could return a false positive.
509
-
510
- - **Parameters**:
511
- - `selector` (string): CSS selector of the element.
512
- - **Returns**:
513
- - `Promise<boolean>`: `true` if the element is visible, `false` otherwise.
514
-
515
- #### `async selectOption(dropdownSelector: string, optionSelector: string | string[], options?: SelectOptionOptions): Promise<void>`
516
- Selects one or more options from a select element or dropdown with intelligent automatic handling.
517
-
518
- **Key Features**:
519
- - **Automatic Detection**: Distinguishes between HTML `<select>` elements and custom dropdowns
520
- - **Smart Trigger Discovery**: For custom dropdowns, automatically finds and clicks triggers using multiple strategies
521
- - **Virtualized List Support**: Handles large dropdown lists with intelligent scrolling
522
- - **No Manual Configuration**: No need to specify separate trigger and container selectors
523
-
524
- - **Parameters**:
525
- - `dropdownSelector` (string): CSS selector for the select element or dropdown container.
526
- - `optionSelector` (string | string[]): CSS selector(s) for the option(s) to select. Can be a single selector or array of selectors.
527
- - `options` (SelectOptionOptions): Optional configuration object with the following properties:
528
- - `timeout` (number): Maximum wait time in milliseconds. Default: 30000.
529
- - `force` (boolean): Bypass visibility and disabled checks. Default: false.
530
- - `waitForOptions` (boolean): Wait for dropdown options to load. Default: true.
531
- - `maxScrollAttempts` (number): Maximum scroll attempts for virtualized dropdowns. Default: 10.
532
-
533
- #### `async waitForElementPositionToStabilize(selector: string, timeout?: number, checkInterval?: number, stabilityThreshold?: number, tolerance?: number): Promise<void>`
534
- Waits for an element's position to stabilize by polling its bounding box. Useful before interactions after scrolling/animations.
535
-
536
- - **Parameters**:
537
- - `selector` (string): Target element selector
538
- - `timeout` (number): Max time to wait. Default: 2000
539
- - `checkInterval` (number): Polling interval. Default: 100
540
- - `stabilityThreshold` (number): Consecutive stable checks required. Default: 3
541
- - `tolerance` (number): Max pixel delta to consider stable. Default: 1
542
-
543
- - **Usage Examples**:
544
- ```typescript
545
- // Regular HTML select element - works directly
546
- await page.selectOption('select#country', 'option[value="us"]');
547
-
548
- // Multiple selection in HTML select
549
- await page.selectOption('select#languages', ['option[value="en"]', 'option[value="es"]']);
550
-
551
- // Custom dropdown - automatically finds and clicks trigger
552
- await page.selectOption('#dropdown-menu', '[data-value="premium"]');
553
-
554
- // Virtualized dropdown - automatically scrolls to find option
555
- await page.selectOption('#large-dropdown', '[data-item="item-500"]');
556
-
557
- // With custom configuration
558
- await page.selectOption(
559
- '#complex-dropdown',
560
- '.option[data-category="business"]',
561
- {
562
- timeout: 10000,
563
- maxScrollAttempts: 15
564
- }
565
- );
566
-
567
- // Bootstrap/Material-UI dropdowns work automatically
568
- await page.selectOption('.MuiSelect-menu', '[data-value="option1"]');
569
- await page.selectOption('.dropdown-menu', '.dropdown-item[data-value="choice2"]');
570
- ```
571
-
572
- - **How Trigger Detection Works**:
573
- The method automatically detects dropdown triggers using multiple strategies:
574
- 1. **Accessibility patterns**: `[aria-haspopup]`, `[role="button"]`
575
- 2. **Common class names**: `.dropdown-trigger`, `.select-trigger`
576
- 3. **Sibling elements**: Previous sibling of the dropdown container
577
- 4. **ID pattern matching**: `#menu-id` → `#trigger-id`, `#dropdown-menu` → `#dropdown-trigger`
578
-
579
- - **Migration from Previous API**:
580
- ```typescript
581
- // OLD - Complex API with manual configuration
582
- await page.selectOption('#trigger', '[data-value="item"]', {
583
- dropdownSelector: '#menu',
584
- optionSelector: '.dropdown-item'
585
- });
586
-
587
- // NEW - Simplified API with automatic detection
588
- await page.selectOption('#menu', '[data-value="item"]');
589
- ```
590
-
591
- ---
592
-
593
- ## Utility Functions
594
-
595
- ### `createCDPRouter`
596
-
597
- #### `export function createCDPRouter<Context extends CDPCrawlingContext = CDPCrawlingContext, UserData extends Dictionary = GetUserDataFromRequest<Context['request']>>(routes?: RouterRoutes<Context, UserData>): Router<Context>`
598
-
599
- Creates a custom router for handling crawling routes using CDP.
600
-
601
- - **Parameters**:
602
- - `routes` (RouterRoutes<Context, UserData>): Optional routes for defining crawl logic.
603
-
604
- - **Returns**:
605
- - `Router<Context>`: A configured router instance.
606
-
607
- ---
1
+ # jumpy-lion