jumpy-lion 0.1.6-beta.30 → 0.1.6-beta.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -795
- package/browser/fonts/macos-bundle/README.md +9 -61
- package/dist/browser-controller.d.ts +8 -3
- package/dist/browser-controller.d.ts.map +1 -1
- package/dist/browser-controller.js +66 -266
- package/dist/browser-controller.js.map +1 -1
- package/dist/browser-plugin.d.ts +68 -106
- package/dist/browser-plugin.d.ts.map +1 -1
- package/dist/browser-plugin.js +210 -705
- package/dist/browser-plugin.js.map +1 -1
- package/dist/browser-process/align-fingerprint-config.d.ts +40 -0
- package/dist/browser-process/align-fingerprint-config.d.ts.map +1 -0
- package/dist/browser-process/align-fingerprint-config.js +79 -0
- package/dist/browser-process/align-fingerprint-config.js.map +1 -0
- package/dist/browser-process/anti-detect-config.d.ts +5 -43
- package/dist/browser-process/anti-detect-config.d.ts.map +1 -1
- package/dist/browser-process/anti-detect-config.js +212 -554
- package/dist/browser-process/anti-detect-config.js.map +1 -1
- package/dist/browser-process/browser.d.ts +19 -137
- package/dist/browser-process/browser.d.ts.map +1 -1
- package/dist/browser-process/browser.js +49 -10
- package/dist/browser-process/browser.js.map +1 -1
- package/dist/browser-process/fingerprint-config.d.ts +103 -0
- package/dist/browser-process/fingerprint-config.d.ts.map +1 -0
- package/dist/browser-process/fingerprint-config.js +245 -0
- package/dist/browser-process/fingerprint-config.js.map +1 -0
- package/dist/browser-process/gpu-family-profiles.d.ts +53 -0
- package/dist/browser-process/gpu-family-profiles.d.ts.map +1 -0
- package/dist/browser-process/gpu-family-profiles.js +395 -0
- package/dist/browser-process/gpu-family-profiles.js.map +1 -0
- package/dist/browser-process/launch-options.d.ts +195 -0
- package/dist/browser-process/launch-options.d.ts.map +1 -0
- package/dist/browser-process/launch-options.js +2 -0
- package/dist/browser-process/launch-options.js.map +1 -0
- package/dist/browser-process/port-allocator.d.ts +46 -0
- package/dist/browser-process/port-allocator.d.ts.map +1 -0
- package/dist/browser-process/port-allocator.js +171 -0
- package/dist/browser-process/port-allocator.js.map +1 -0
- package/dist/browser-process/process.d.ts +54 -0
- package/dist/browser-process/process.d.ts.map +1 -1
- package/dist/browser-process/process.js +164 -19
- package/dist/browser-process/process.js.map +1 -1
- package/dist/browser-process/signal-cleanup.d.ts +37 -0
- package/dist/browser-process/signal-cleanup.d.ts.map +1 -0
- package/dist/browser-process/signal-cleanup.js +93 -0
- package/dist/browser-process/signal-cleanup.js.map +1 -0
- package/dist/connection/cdp-reconnection.d.ts +61 -0
- package/dist/connection/cdp-reconnection.d.ts.map +1 -0
- package/dist/connection/cdp-reconnection.js +98 -0
- package/dist/connection/cdp-reconnection.js.map +1 -0
- package/dist/connection/page-session.d.ts +109 -0
- package/dist/connection/page-session.d.ts.map +1 -0
- package/dist/connection/page-session.js +257 -0
- package/dist/connection/page-session.js.map +1 -0
- package/dist/crawler.d.ts +1 -1
- package/dist/crawler.d.ts.map +1 -1
- package/dist/crawler.js +1 -1
- package/dist/crawler.js.map +1 -1
- package/dist/fingerprinting/fingerprint-injector.d.ts +6 -116
- package/dist/fingerprinting/fingerprint-injector.d.ts.map +1 -1
- package/dist/fingerprinting/fingerprint-injector.js +50 -535
- package/dist/fingerprinting/fingerprint-injector.js.map +1 -1
- package/dist/fingerprinting/fingerprint-overrides/index.d.ts +6 -2
- package/dist/fingerprinting/fingerprint-overrides/index.d.ts.map +1 -1
- package/dist/fingerprinting/fingerprint-overrides/index.js +6 -2
- package/dist/fingerprinting/fingerprint-overrides/index.js.map +1 -1
- package/dist/fingerprinting/fingerprint-overrides/navigator-override.d.ts +18 -0
- package/dist/fingerprinting/fingerprint-overrides/navigator-override.d.ts.map +1 -0
- package/dist/fingerprinting/fingerprint-overrides/navigator-override.js +136 -0
- package/dist/fingerprinting/fingerprint-overrides/navigator-override.js.map +1 -0
- package/dist/fingerprinting/fingerprint-overrides/override.d.ts +137 -0
- package/dist/fingerprinting/fingerprint-overrides/override.d.ts.map +1 -0
- package/dist/fingerprinting/fingerprint-overrides/override.js +14 -0
- package/dist/fingerprinting/fingerprint-overrides/override.js.map +1 -0
- package/dist/fingerprinting/fingerprint-overrides/registry.d.ts +27 -0
- package/dist/fingerprinting/fingerprint-overrides/registry.d.ts.map +1 -0
- package/dist/fingerprinting/fingerprint-overrides/registry.js +285 -0
- package/dist/fingerprinting/fingerprint-overrides/registry.js.map +1 -0
- package/dist/fingerprinting/fingerprint-overrides/screen-override.d.ts +16 -0
- package/dist/fingerprinting/fingerprint-overrides/screen-override.d.ts.map +1 -0
- package/dist/fingerprinting/fingerprint-overrides/screen-override.js +175 -0
- package/dist/fingerprinting/fingerprint-overrides/screen-override.js.map +1 -0
- package/dist/fingerprinting/injection-planner.d.ts +178 -0
- package/dist/fingerprinting/injection-planner.d.ts.map +1 -0
- package/dist/fingerprinting/injection-planner.js +376 -0
- package/dist/fingerprinting/injection-planner.js.map +1 -0
- package/dist/fingerprinting/profile-quality.d.ts +24 -0
- package/dist/fingerprinting/profile-quality.d.ts.map +1 -0
- package/dist/fingerprinting/profile-quality.js +165 -0
- package/dist/fingerprinting/profile-quality.js.map +1 -0
- package/dist/fingerprinting/profile-selector.d.ts +101 -0
- package/dist/fingerprinting/profile-selector.d.ts.map +1 -0
- package/dist/fingerprinting/profile-selector.js +156 -0
- package/dist/fingerprinting/profile-selector.js.map +1 -0
- package/dist/fingerprinting/ua-alignment.d.ts +51 -0
- package/dist/fingerprinting/ua-alignment.d.ts.map +1 -0
- package/dist/fingerprinting/ua-alignment.js +146 -0
- package/dist/fingerprinting/ua-alignment.js.map +1 -0
- package/dist/input/dropdown-selector.d.ts +74 -0
- package/dist/input/dropdown-selector.d.ts.map +1 -0
- package/dist/input/dropdown-selector.js +306 -0
- package/dist/input/dropdown-selector.js.map +1 -0
- package/dist/input/element-target.d.ts +117 -0
- package/dist/input/element-target.d.ts.map +1 -0
- package/dist/input/element-target.js +383 -0
- package/dist/input/element-target.js.map +1 -0
- package/dist/input/input-emulator.d.ts +85 -0
- package/dist/input/input-emulator.d.ts.map +1 -0
- package/dist/input/input-emulator.js +319 -0
- package/dist/input/input-emulator.js.map +1 -0
- package/dist/input/input-transport.d.ts +60 -0
- package/dist/input/input-transport.d.ts.map +1 -0
- package/dist/input/input-transport.js +28 -0
- package/dist/input/input-transport.js.map +1 -0
- package/dist/input/recording-transport.d.ts +32 -0
- package/dist/input/recording-transport.d.ts.map +1 -0
- package/dist/input/recording-transport.js +43 -0
- package/dist/input/recording-transport.js.map +1 -0
- package/dist/navigation/page-navigation.d.ts +67 -0
- package/dist/navigation/page-navigation.d.ts.map +1 -0
- package/dist/navigation/page-navigation.js +107 -0
- package/dist/navigation/page-navigation.js.map +1 -0
- package/dist/network/network-watch.d.ts +72 -0
- package/dist/network/network-watch.d.ts.map +1 -0
- package/dist/network/network-watch.js +143 -0
- package/dist/network/network-watch.js.map +1 -0
- package/dist/page.d.ts +59 -117
- package/dist/page.d.ts.map +1 -1
- package/dist/page.js +169 -1304
- package/dist/page.js.map +1 -1
- package/dist/session-profile.d.ts +79 -0
- package/dist/session-profile.d.ts.map +1 -0
- package/dist/session-profile.js +124 -0
- package/dist/session-profile.js.map +1 -0
- package/dist/tsconfig.build.tsbuildinfo +1 -1
- package/package.json +6 -4
- package/dist/fingerprinting/custom-fingerprint-injector.d.ts +0 -87
- package/dist/fingerprinting/custom-fingerprint-injector.d.ts.map +0 -1
- package/dist/fingerprinting/custom-fingerprint-injector.js +0 -342
- package/dist/fingerprinting/custom-fingerprint-injector.js.map +0 -1
- package/dist/launcher-wrap.d.ts +0 -10
- package/dist/launcher-wrap.d.ts.map +0 -1
- package/dist/launcher-wrap.js +0 -11
- package/dist/launcher-wrap.js.map +0 -1
package/README.md
CHANGED
|
@@ -1,795 +1 @@
|
|
|
1
|
-
#
|
|
2
|
-
|
|
3
|
-
## Table of Contents
|
|
4
|
-
|
|
5
|
-
- [Overview](#overview)
|
|
6
|
-
- [NPM Package](#npm-package)
|
|
7
|
-
- [Usage](#usage)
|
|
8
|
-
- [Example Project](#example-project)
|
|
9
|
-
- [Internal Guide](#internal-guide)
|
|
10
|
-
- [Examples and Configuration](#examples-and-configuration)
|
|
11
|
-
- [Advanced Fingerprints Usage](#advanced-fingerprints-usage)
|
|
12
|
-
- [Syncing BrowserPool and launchOptions fingerprints](#syncing-browserpool-and-launchoptions-fingerprints)
|
|
13
|
-
- [Stealth Consistency and Network Policies](#stealth-consistency-and-network-policies)
|
|
14
|
-
- [Configurable Fingerprint Options](#configurable-fingerprint-options)
|
|
15
|
-
- [Usage](#usage-1)
|
|
16
|
-
- [Available Options](#available-options)
|
|
17
|
-
- [Core Stealth Options](#core-stealth-options)
|
|
18
|
-
- [Fingerprint Spoofing](#fingerprint-spoofing)
|
|
19
|
-
- [Platform Configuration](#platform-configuration)
|
|
20
|
-
- [Additional Features](#additional-features)
|
|
21
|
-
- [Default Behavior](#default-behavior)
|
|
22
|
-
- [Best Practices](#best-practices)
|
|
23
|
-
- [Performance Considerations](#performance-considerations)
|
|
24
|
-
- [Launch Options for Network and Persistence](#launch-options-for-network-and-persistence)
|
|
25
|
-
- [Session Bundle (save & restore browser state)](#session-bundle-save--restore-browser-state)
|
|
26
|
-
- [When to use it](#when-to-use-it)
|
|
27
|
-
- [Producer: capturing a bundle](#producer-capturing-a-bundle)
|
|
28
|
-
- [Consumer: rehydrating a bundle](#consumer-rehydrating-a-bundle)
|
|
29
|
-
- [What's inside a bundle](#whats-inside-a-bundle)
|
|
30
|
-
- [`saveSession()` options](#savesession-options)
|
|
31
|
-
- [`restoreSession()`](#restoresession)
|
|
32
|
-
- [Caveats](#caveats)
|
|
33
|
-
- [Crawler Class Documentation](#crawler-class-documentation)
|
|
34
|
-
- [Constructor](#constructor)
|
|
35
|
-
- [CdpPage Class Documentation](#cdppage-class-documentation)
|
|
36
|
-
- [Constructor](#constructor-1)
|
|
37
|
-
- [Static Methods](#static-methods)
|
|
38
|
-
- [Public Methods](#public-methods)
|
|
39
|
-
- [Utility Functions](#utility-functions)
|
|
40
|
-
- [createCDPRouter](#createcdprouter)
|
|
41
|
-
- [saveSession](#savesession)
|
|
42
|
-
- [restoreSession](#restoresession-1)
|
|
43
|
-
|
|
44
|
-
## Overview
|
|
45
|
-
|
|
46
|
-
The `Crawler` class is a custom implementation of the `BrowserCrawler` from Crawlee, designed to utilize the Chrome DevTools Protocol (CDP) for advanced antiblocking capabilities.
|
|
47
|
-
|
|
48
|
-
## NPM Package
|
|
49
|
-
|
|
50
|
-
The `jumpy-lion` is official cdp crawler package. See it [here](https://www.npmjs.com/package/jumpy-lion).
|
|
51
|
-
|
|
52
|
-
### Installation
|
|
53
|
-
|
|
54
|
-
```bash
|
|
55
|
-
npm install jumpy-lion
|
|
56
|
-
```
|
|
57
|
-
|
|
58
|
-
A postinstall hook pulls the matching anti-detect Chromium build from a public
|
|
59
|
-
Apify key-value store — no GitHub token, no Apify token. Works the same on
|
|
60
|
-
your workstation, CI, and the Apify platform. If the download fails for any
|
|
61
|
-
reason (offline, proxy, etc.) the install still succeeds; the crawler falls
|
|
62
|
-
back to the system Chrome on `PATH`. See `browser/BUILD.md` for env-var
|
|
63
|
-
overrides and the maintainer release flow.
|
|
64
|
-
|
|
65
|
-
---
|
|
66
|
-
---
|
|
67
|
-
|
|
68
|
-
## Usage
|
|
69
|
-
|
|
70
|
-
### Example Project
|
|
71
|
-
|
|
72
|
-
Refer to this [GitHub repository](https://github.com/apify-projects/cdp-crawler-example) for a complete example of using the `Crawler` class.
|
|
73
|
-
|
|
74
|
-
### Internal Guide
|
|
75
|
-
|
|
76
|
-
Check out the [CDP Crawler internal guide](https://www.notion.so/apify/CDP-Crawler-internal-guide-183f39950a2280be81d7c86dc048a47a?pvs=4) for tutorial.
|
|
77
|
-
|
|
78
|
-
### Examples and Configuration
|
|
79
|
-
|
|
80
|
-
For detailed examples and configuration patterns, see the [Examples README](./examples/README.md). The examples include:
|
|
81
|
-
|
|
82
|
-
- **Basic Configuration**: Simple fingerprint setup for common use cases
|
|
83
|
-
- **Comprehensive Configuration**: Full feature setup with all spoofing options
|
|
84
|
-
- **Platform-Specific Configurations**: macOS, Windows, and Linux targeting
|
|
85
|
-
- **Performance-Focused Configuration**: Optimized settings for speed
|
|
86
|
-
- **Minimal Configuration**: Using intelligent defaults
|
|
87
|
-
|
|
88
|
-
The examples demonstrate real-world usage patterns and best practices for different scenarios.
|
|
89
|
-
|
|
90
|
-
### Advanced Fingerprints usage
|
|
91
|
-
|
|
92
|
-
To use advanced fingerprints, you need to set the `useExperimentalFingerprints` option to `true` in the `launchContext.launchOptions` of the `Crawler` constructor.
|
|
93
|
-
|
|
94
|
-
```typescript
|
|
95
|
-
const crawler = new Crawler({
|
|
96
|
-
launchContext: {
|
|
97
|
-
launchOptions: {
|
|
98
|
-
useExperimentalFingerprints: true,
|
|
99
|
-
}
|
|
100
|
-
},
|
|
101
|
-
});
|
|
102
|
-
```
|
|
103
|
-
|
|
104
|
-
---
|
|
105
|
-
|
|
106
|
-
### Syncing BrowserPool and launchOptions fingerprints
|
|
107
|
-
|
|
108
|
-
**Always keep the operating system in sync between BrowserPool fingerprints and `launchOptions.fingerprintOptions`.** A mismatch can lead to inconsistent signals (for example `navigator.platform`, User-Agent, WebGL, fonts) and reduce antibot effectiveness.
|
|
109
|
-
|
|
110
|
-
- **launchOptions side**: Set `launchContext.launchOptions.fingerprintOptions.platform` to the desired platform string.
|
|
111
|
-
- **BrowserPool side**: When `browserPoolOptions.useFingerprints` is `true`, set `browserPoolOptions.fingerprintOptions.fingerprintGeneratorOptions.operatingSystems` to the corresponding OS.
|
|
112
|
-
|
|
113
|
-
Mapping guidance:
|
|
114
|
-
- `platform: 'Win32'` ↔ `operatingSystems: ['windows']`
|
|
115
|
-
- `platform: 'MacIntel'` ↔ `operatingSystems: ['macos']`
|
|
116
|
-
- `platform: 'Linux x86_64'` ↔ `operatingSystems: ['linux']`
|
|
117
|
-
|
|
118
|
-
Example:
|
|
119
|
-
|
|
120
|
-
```typescript
|
|
121
|
-
const crawler = new Crawler({
|
|
122
|
-
launchContext: {
|
|
123
|
-
launchOptions: {
|
|
124
|
-
useExperimentalFingerprints: true,
|
|
125
|
-
fingerprintOptions: {
|
|
126
|
-
platform: 'Win32', // Keep this in sync with BrowserPool OS
|
|
127
|
-
},
|
|
128
|
-
},
|
|
129
|
-
},
|
|
130
|
-
browserPoolOptions: {
|
|
131
|
-
useFingerprints: true,
|
|
132
|
-
fingerprintOptions: {
|
|
133
|
-
fingerprintGeneratorOptions: {
|
|
134
|
-
browsers: ['chrome'],
|
|
135
|
-
operatingSystems: ['windows'], // Matches platform: 'Win32'
|
|
136
|
-
devices: ['desktop'],
|
|
137
|
-
},
|
|
138
|
-
},
|
|
139
|
-
},
|
|
140
|
-
});
|
|
141
|
-
```
|
|
142
|
-
|
|
143
|
-
Note: This configuration surface will be unified later. We are currently testing our custom fingerprint injector so it works even with the BrowserPool built‑in fingerprints turned off. If you prefer, you can rely solely on the custom injector by setting `browserPoolOptions.useFingerprints: false` and keeping `launchOptions.useExperimentalFingerprints: true`.
|
|
144
|
-
|
|
145
|
-
---
|
|
146
|
-
|
|
147
|
-
### Stealth Consistency and Network Policies
|
|
148
|
-
|
|
149
|
-
Recent stealth hardening adds explicit consistency and policy controls:
|
|
150
|
-
|
|
151
|
-
- **UA/Binary version alignment**: the injector aligns advertised `Chrome/x.y.z.w` with the actual running Chrome binary version to reduce fingerprint drift.
|
|
152
|
-
- **WebRTC policy control**: set `fingerprintOptions.webRtcPolicy` to:
|
|
153
|
-
- `'spoof'` (default): redacts/normalizes WebRTC leak surfaces.
|
|
154
|
-
- `'disable'`: removes WebRTC APIs from page context.
|
|
155
|
-
- **DNS hardening controls**: configure DoH and secure DNS through launch options (`dnsOverHttpsServer`, `secureDnsMode`).
|
|
156
|
-
- **WebRTC transport policy flag**: configure `webrtcIpHandlingPolicy` at browser launch level.
|
|
157
|
-
- **Persistent profile mode**: set `userDataDir` (+ optional `keepUserDataDir`) to reuse browser state across runs.
|
|
158
|
-
|
|
159
|
-
## Configurable Fingerprint Options
|
|
160
|
-
|
|
161
|
-
The CDP crawler supports configurable fingerprint options that can be passed through the crawler options. This allows you to customize the fingerprint spoofing behavior for different use cases.
|
|
162
|
-
|
|
163
|
-
### Usage
|
|
164
|
-
|
|
165
|
-
You can configure fingerprint options by adding them to the `launchContext.launchOptions.fingerprintOptions` in your crawler configuration:
|
|
166
|
-
|
|
167
|
-
```typescript
|
|
168
|
-
import { Crawler } from 'cdp-crawler';
|
|
169
|
-
|
|
170
|
-
const crawler = new Crawler({
|
|
171
|
-
launchContext: {
|
|
172
|
-
launchOptions: {
|
|
173
|
-
fingerprintOptions: {
|
|
174
|
-
// Enable advanced stealth features
|
|
175
|
-
enableAdvancedStealth: true,
|
|
176
|
-
|
|
177
|
-
// Bypass Runtime.enable detection
|
|
178
|
-
bypassRuntimeEnable: true,
|
|
179
|
-
|
|
180
|
-
// Humanize mouse interactions
|
|
181
|
-
humanizeInteractions: true,
|
|
182
|
-
|
|
183
|
-
// Spoof WebGL fingerprinting
|
|
184
|
-
spoofWebGL: true,
|
|
185
|
-
|
|
186
|
-
// Spoof audio context fingerprinting
|
|
187
|
-
spoofAudioContext: true,
|
|
188
|
-
|
|
189
|
-
// Add variations to client rect measurements
|
|
190
|
-
spoofClientRects: true,
|
|
191
|
-
|
|
192
|
-
// Mask automation flags
|
|
193
|
-
maskAutomationFlags: true,
|
|
194
|
-
|
|
195
|
-
// Use fingerprint-generator defaults when available
|
|
196
|
-
useFingerprintDefaults: true,
|
|
197
|
-
|
|
198
|
-
// Platform to spoof (defaults to Win32 for better evasion)
|
|
199
|
-
platform: 'Win32', // 'Win32' | 'MacIntel' | 'Linux x86_64'
|
|
200
|
-
|
|
201
|
-
// Spoof font measurements
|
|
202
|
-
spoofFonts: true,
|
|
203
|
-
|
|
204
|
-
// Spoof performance timing
|
|
205
|
-
spoofPerformance: true,
|
|
206
|
-
|
|
207
|
-
// Spoof locale settings
|
|
208
|
-
spoofLocale: true,
|
|
209
|
-
|
|
210
|
-
// Detect timezone from proxy (useful with residential proxies)
|
|
211
|
-
detectTimezone: true,
|
|
212
|
-
|
|
213
|
-
// WebRTC policy: 'spoof' (default) or 'disable'
|
|
214
|
-
webRtcPolicy: 'spoof',
|
|
215
|
-
}
|
|
216
|
-
}
|
|
217
|
-
},
|
|
218
|
-
// ... other crawler options
|
|
219
|
-
});
|
|
220
|
-
```
|
|
221
|
-
|
|
222
|
-
### Available Options
|
|
223
|
-
|
|
224
|
-
#### Core Stealth Options
|
|
225
|
-
|
|
226
|
-
- **`enableAdvancedStealth`** (boolean): Enables advanced stealth features including WebGPU spoofing and platform consistency
|
|
227
|
-
- **`bypassRuntimeEnable`** (boolean): Prevents CDP detection through Runtime.enable bypass techniques
|
|
228
|
-
- **`humanizeInteractions`** (boolean): Generates human-like mouse movements using bezier curves
|
|
229
|
-
|
|
230
|
-
#### Fingerprint Spoofing
|
|
231
|
-
|
|
232
|
-
- **`spoofWebGL`** (boolean): Spoofs WebGL fingerprinting by modifying GPU adapter information
|
|
233
|
-
- **`spoofAudioContext`** (boolean): Adds noise to audio processing to prevent audio fingerprinting
|
|
234
|
-
- **`spoofClientRects`** (boolean): Adds small variations to getBoundingClientRect results
|
|
235
|
-
- **`spoofFonts`** (boolean): Hides platform-specific fonts and adds font measurement variations
|
|
236
|
-
- **`spoofPerformance`** (boolean): Modifies timing characteristics to match the target platform
|
|
237
|
-
- **`spoofLocale`** (boolean): Ensures consistent locale formatting across all browser properties
|
|
238
|
-
|
|
239
|
-
#### Platform Configuration
|
|
240
|
-
|
|
241
|
-
- **`platform`** (string): Target platform to spoof. Options: `'Win32'`, `'MacIntel'`, `'Linux x86_64'`
|
|
242
|
-
- **`useFingerprintDefaults`** (boolean): Use hardcoded defaults instead of fingerprint-generator values. When `false`, uses generated fingerprint values; when `true` (default), uses hardcoded defaults
|
|
243
|
-
|
|
244
|
-
#### Additional Features
|
|
245
|
-
|
|
246
|
-
- **`maskAutomationFlags`** (boolean): Masks automation-related flags in the browser
|
|
247
|
-
- **`detectTimezone`** (boolean): Automatically detect timezone from proxy IP (useful with residential proxies)
|
|
248
|
-
- **`webRtcPolicy`** (`'spoof' | 'disable'`): Controls whether WebRTC is spoofed or fully removed from page APIs
|
|
249
|
-
|
|
250
|
-
### Default Behavior
|
|
251
|
-
|
|
252
|
-
When no fingerprint options are provided, the crawler uses intelligent defaults:
|
|
253
|
-
|
|
254
|
-
- **On Apify**: Uses Apify-recommended settings optimized for the Apify environment
|
|
255
|
-
- **On other platforms**: Uses a comprehensive set of stealth features with Windows platform spoofing
|
|
256
|
-
- **Humanization defaults**: mouse, keyboard, and scroll humanization are enabled with safe defaults
|
|
257
|
-
- **UA consistency**: claimed UA Chrome version is automatically aligned to the running Chrome binary
|
|
258
|
-
|
|
259
|
-
### Best Practices
|
|
260
|
-
|
|
261
|
-
1. **Use `platform: 'Win32'`** for better evasion on Linux servers (like Apify)
|
|
262
|
-
2. **Enable `detectTimezone: true`** when using residential proxies
|
|
263
|
-
3. **Use `useFingerprintDefaults: false`** to leverage fingerprint-generator's realistic values
|
|
264
|
-
4. **Enable `bypassRuntimeEnable: true`** for sites that detect automation
|
|
265
|
-
5. **Use `enableAdvancedStealth: true`** for maximum protection against fingerprinting
|
|
266
|
-
6. **Keep OS settings in sync** between `launchOptions.fingerprintOptions.platform` and `browserPoolOptions.fingerprintOptions.fingerprintGeneratorOptions.operatingSystems`
|
|
267
|
-
7. **Use `webRtcPolicy: 'disable'`** for strictest leak prevention, or `'spoof'` for compatibility-sensitive targets
|
|
268
|
-
|
|
269
|
-
### Performance Considerations
|
|
270
|
-
|
|
271
|
-
- More fingerprint options enabled = slightly higher CPU usage
|
|
272
|
-
- WebGPU spoofing may add a small delay to page loads
|
|
273
|
-
- Humanized interactions add realistic delays to mouse movements
|
|
274
|
-
|
|
275
|
-
The fingerprint options are designed to provide maximum protection while maintaining good performance for web scraping tasks.
|
|
276
|
-
|
|
277
|
-
For more configuration examples and patterns, see the [Examples README](./examples/README.md).
|
|
278
|
-
|
|
279
|
-
---
|
|
280
|
-
|
|
281
|
-
## Launch Options for Network and Persistence
|
|
282
|
-
|
|
283
|
-
The following options are configured in `launchContext.launchOptions`:
|
|
284
|
-
|
|
285
|
-
- **`dnsOverHttpsServer`** (string): DoH endpoint template, for example `https://cloudflare-dns.com/dns-query`
|
|
286
|
-
- **`secureDnsMode`** (`'off' | 'automatic' | 'secure'`): Chromium secure DNS mode
|
|
287
|
-
- **`webrtcIpHandlingPolicy`** (`'default' | 'default_public_interface_only' | 'default_public_and_private_interfaces' | 'disable_non_proxied_udp'`): Browser-level WebRTC IP handling policy
|
|
288
|
-
- **`userDataDir`** (string): Reuse a specific Chrome profile directory across runs
|
|
289
|
-
- **`keepUserDataDir`** (boolean): Keep/cleanup profile directory on close (defaults to keep custom dir, cleanup temp dir)
|
|
290
|
-
|
|
291
|
-
Example:
|
|
292
|
-
|
|
293
|
-
```typescript
|
|
294
|
-
const crawler = new Crawler({
|
|
295
|
-
launchContext: {
|
|
296
|
-
launchOptions: {
|
|
297
|
-
dnsOverHttpsServer: 'https://cloudflare-dns.com/dns-query',
|
|
298
|
-
secureDnsMode: 'secure',
|
|
299
|
-
webrtcIpHandlingPolicy: 'disable_non_proxied_udp',
|
|
300
|
-
userDataDir: './state/chrome-profile',
|
|
301
|
-
keepUserDataDir: true,
|
|
302
|
-
fingerprintOptions: {
|
|
303
|
-
webRtcPolicy: 'disable',
|
|
304
|
-
},
|
|
305
|
-
},
|
|
306
|
-
},
|
|
307
|
-
});
|
|
308
|
-
```
|
|
309
|
-
|
|
310
|
-
---
|
|
311
|
-
|
|
312
|
-
## Session Bundle (save & restore browser state)
|
|
313
|
-
|
|
314
|
-
The session bundle lets one crawler **capture** the full live browser state — cookies, per-origin localStorage and sessionStorage, the Chrome user-data-dir (which transitively carries IndexedDB, Service Workers, and Cache Storage), the C++ `fingerprintConfig`, the captured `fingerprint`, and the resolved locale triple — into a single JSON-serializable blob. A second crawler can then **rehydrate** from that blob and come up byte-for-byte identical, so a session that was authenticated upstream stays authenticated downstream (matching what `naver-session-test` does, generalized into the framework).
|
|
315
|
-
|
|
316
|
-
This is mechanism, **not** persistence: nothing is written to disk for you. You stash the bundle wherever you like (Apify KV store, S3, a local file) and pass it back when you launch the next crawler.
|
|
317
|
-
|
|
318
|
-
### When to use it
|
|
319
|
-
|
|
320
|
-
- An Apify actor logs into a site, then hands the session to a second actor that does the actual scraping.
|
|
321
|
-
- A pool of long-running actors needs to checkpoint browser state between restarts.
|
|
322
|
-
- You need two crawls to look like the *exact same browser* to a bot detector (Cloudflare, DataDome, Naver) — same UA, same UA-CH, same WebGL renderer, same screen, same canvas/audio noise seed, same cookies, same localStorage, same Service Worker state.
|
|
323
|
-
|
|
324
|
-
### Producer: capturing a bundle
|
|
325
|
-
|
|
326
|
-
```typescript
|
|
327
|
-
import CDPCrawler, { saveSession, type SessionBundle } from 'cdp-crawler';
|
|
328
|
-
import { Actor } from 'apify';
|
|
329
|
-
|
|
330
|
-
let bundle: SessionBundle | undefined;
|
|
331
|
-
|
|
332
|
-
const producer = new CDPCrawler({
|
|
333
|
-
launchContext: {
|
|
334
|
-
launchOptions: {
|
|
335
|
-
useNativeStealth: true,
|
|
336
|
-
fingerprintOptions: { platform: 'MacIntel' },
|
|
337
|
-
},
|
|
338
|
-
},
|
|
339
|
-
requestHandler: async ({ page }) => {
|
|
340
|
-
await page.goto('https://target.example.com/login');
|
|
341
|
-
// … perform login, solve captcha, etc. …
|
|
342
|
-
|
|
343
|
-
bundle = await saveSession(page);
|
|
344
|
-
},
|
|
345
|
-
});
|
|
346
|
-
|
|
347
|
-
await producer.run(['https://target.example.com/login']);
|
|
348
|
-
await Actor.setValue('session', bundle); // ship to KV store for the next actor
|
|
349
|
-
```
|
|
350
|
-
|
|
351
|
-
### Consumer: rehydrating a bundle
|
|
352
|
-
|
|
353
|
-
Pass the bundle as `launchOptions.sessionBundle` on the new crawler. The plugin extracts the user-data-dir into a temp directory, feeds the captured `fingerprintConfig` straight to the C++ patches (regeneration is skipped), pins `useNonApifyFingerprints: false`, and replays cookies + per-origin storage on every new page **before** any navigation runs.
|
|
354
|
-
|
|
355
|
-
```typescript
|
|
356
|
-
import CDPCrawler, { type SessionBundle } from 'cdp-crawler';
|
|
357
|
-
import { Actor } from 'apify';
|
|
358
|
-
|
|
359
|
-
const bundle = await Actor.getValue<SessionBundle>('session');
|
|
360
|
-
if (!bundle) throw new Error('No session bundle available');
|
|
361
|
-
|
|
362
|
-
const consumer = new CDPCrawler({
|
|
363
|
-
launchContext: {
|
|
364
|
-
launchOptions: {
|
|
365
|
-
useNativeStealth: true,
|
|
366
|
-
sessionBundle: bundle, // ← the only new option
|
|
367
|
-
},
|
|
368
|
-
},
|
|
369
|
-
requestHandler: async ({ page }) => {
|
|
370
|
-
// The first page already has the producer's cookies, localStorage,
|
|
371
|
-
// sessionStorage, IndexedDB, Service Worker registrations, etc.
|
|
372
|
-
await page.goto('https://target.example.com/account'); // already logged in
|
|
373
|
-
},
|
|
374
|
-
});
|
|
375
|
-
|
|
376
|
-
await consumer.run(['https://target.example.com/account']);
|
|
377
|
-
```
|
|
378
|
-
|
|
379
|
-
### What's inside a bundle
|
|
380
|
-
|
|
381
|
-
```typescript
|
|
382
|
-
interface SessionBundle {
|
|
383
|
-
schemaVersion: 1;
|
|
384
|
-
createdAt: string;
|
|
385
|
-
createdBy?: { package: 'cdp-crawler'; version: string };
|
|
386
|
-
|
|
387
|
-
cookies: SerializedCookie[]; // Network.Cookie-shaped
|
|
388
|
-
localStorage: Record<origin, Record<key, value>>;
|
|
389
|
-
sessionStorage: Record<origin, Record<key, value>>;
|
|
390
|
-
|
|
391
|
-
userDataDir: {
|
|
392
|
-
encoding: 'base64+gzip+tar';
|
|
393
|
-
bytes: string; // the whole Chrome profile
|
|
394
|
-
sizeBytes: number;
|
|
395
|
-
capturedFiles: number;
|
|
396
|
-
} | null;
|
|
397
|
-
|
|
398
|
-
fingerprintConfig: FingerprintConfigJson; // C++ overrides JSON, byte-for-byte
|
|
399
|
-
fingerprint: BrowserFingerprintWithHeaders; // Crawlee-shaped fp object
|
|
400
|
-
fingerprintInput: { // The fingerprintOptions inputs
|
|
401
|
-
locale?: string; languages?: string; timezone?: string;
|
|
402
|
-
platform?: string; seedKey?: string;
|
|
403
|
-
useNonApifyFingerprints: false; // pinned on restore
|
|
404
|
-
[key: string]: unknown;
|
|
405
|
-
};
|
|
406
|
-
resolvedLocale: { locale: string; languages: string; timezone: string };
|
|
407
|
-
|
|
408
|
-
browserProfile: { // diagnostic snapshot
|
|
409
|
-
userAgent: string; platform: string; language: string;
|
|
410
|
-
screenWidth: number; screenHeight: number; devicePixelRatio: number;
|
|
411
|
-
webglRenderer: string; webglVendor: string;
|
|
412
|
-
};
|
|
413
|
-
|
|
414
|
-
proxyMeta?: { proxyUrl?: string; sessionId?: string; countryCode?: string };
|
|
415
|
-
}
|
|
416
|
-
```
|
|
417
|
-
|
|
418
|
-
Bundles are versioned via `schemaVersion`. Loading a bundle with an unrecognized version throws an explicit error rather than misbehaving silently. A helper `assertValidBundle(value)` is exported for callers that want to validate before passing the bundle around.
|
|
419
|
-
|
|
420
|
-
### `saveSession()` options
|
|
421
|
-
|
|
422
|
-
```typescript
|
|
423
|
-
await saveSession(page, {
|
|
424
|
-
includeUserDataDir: true, // default true; set false for a JSON-only bundle (~50 KB)
|
|
425
|
-
flushCookies: true, // default true → calls Storage.flushBrowserCookies first
|
|
426
|
-
userDataDirPath: undefined, // override; defaults to the path used at launch
|
|
427
|
-
cookieUrls: undefined, // forwarded to Network.getCookies as a fallback
|
|
428
|
-
proxyMeta: { // optional; stamped for inspection, NOT replayed
|
|
429
|
-
proxyUrl, sessionId, countryCode,
|
|
430
|
-
},
|
|
431
|
-
});
|
|
432
|
-
```
|
|
433
|
-
|
|
434
|
-
The proxy is intentionally not rebuilt on restore — pass your own `proxyUrl` to the consumer crawler so cookies stay tied to the same exit IP.
|
|
435
|
-
|
|
436
|
-
### `restoreSession()`
|
|
437
|
-
|
|
438
|
-
`restoreSession(page, bundle)` is the manual escape hatch for advanced users who open additional pages (or targets) inside a single crawl and want them to share the bundle's cookies + storage. Most users do not need it — the `launchOptions.sessionBundle` option is the canonical path.
|
|
439
|
-
|
|
440
|
-
```typescript
|
|
441
|
-
import { restoreSession } from 'cdp-crawler';
|
|
442
|
-
|
|
443
|
-
await restoreSession(page, bundle); // sets cookies + registers per-origin storage replay
|
|
444
|
-
```
|
|
445
|
-
|
|
446
|
-
Note: `restoreSession` cannot swap the user-data-dir or fingerprintConfig on a running browser — those are launch-time inputs and must travel through `launchOptions.sessionBundle`.
|
|
447
|
-
|
|
448
|
-
### Caveats
|
|
449
|
-
|
|
450
|
-
- **User-data-dir size**: a real Chrome profile can be several MB. Inlined as base64-gzipped-tar inside the JSON, this can push the bundle past the Apify KV-store 9 MB record limit. Use `includeUserDataDir: false` if you only need cookies + storage and can live without IndexedDB / Service Workers / Cache Storage.
|
|
451
|
-
- **Locks**: Singleton* sentinels, GPU/Shader/Code caches, and Crashpad metrics are deliberately stripped from the tar — they are process-bound or freely regenerable and would otherwise break restore on a new machine.
|
|
452
|
-
- **Capture timing**: the data-dir tar is taken while Chrome is still running. To avoid half-written LevelDB files, prefer to call `saveSession` after the page has gone idle (`waitForLoadState`-style settling, or right before `crawler.teardown()`). `saveSession` automatically calls `Storage.flushBrowserCookies` first.
|
|
453
|
-
- **Cross-version replay**: bundles are tagged with `createdBy.version`; loading a bundle produced by a meaningfully different `cdp-crawler` version may warn or fail depending on what changed in `fingerprintConfig`'s shape. The plan is to migrate forward, not to support arbitrarily old bundles.
|
|
454
|
-
- **Multi-origin localStorage**: `saveSession` captures the current page's origin only. If you need storage from multiple origins, navigate to each one before calling `saveSession`, or call `saveSession` per page and merge the bundles client-side.
|
|
455
|
-
|
|
456
|
-
---
|
|
457
|
-
|
|
458
|
-
## `Crawler` Class Documentation
|
|
459
|
-
|
|
460
|
-
### Constructor
|
|
461
|
-
|
|
462
|
-
#### `constructor(options: BrowserCrawlerOptions = {}, override readonly config = Configuration.getGlobalConfig())`
|
|
463
|
-
|
|
464
|
-
Initializes the `Crawler` instance with default and provided options.
|
|
465
|
-
|
|
466
|
-
- **Parameters**:
|
|
467
|
-
|
|
468
|
-
- `options` (BrowserCrawlerOptions): Configuration options for the crawler.
|
|
469
|
-
- `launchContext`: Specifies browser launch parameters.
|
|
470
|
-
- Default: `{}`
|
|
471
|
-
- `headless`: Runs the browser in headless mode.
|
|
472
|
-
- Default: `false`
|
|
473
|
-
- `browserPoolOptions`: Configuration for managing browser instances.
|
|
474
|
-
- `config` (Configuration): Global Crawlee configuration.
|
|
475
|
-
- Default: `Configuration.getGlobalConfig()`
|
|
476
|
-
|
|
477
|
-
- **Default Behavior**:
|
|
478
|
-
- Throws an error if `launchContext.proxyUrl` is provided. Use `proxyConfiguration` instead.
|
|
479
|
-
- Throws an error if `browserPoolOptions.browserPlugins` is set. Use `launchContext.launcher` instead.
|
|
480
|
-
|
|
481
|
-
---
|
|
482
|
-
|
|
483
|
-
## `CdpPage` Class Documentation
|
|
484
|
-
|
|
485
|
-
### Constructor
|
|
486
|
-
|
|
487
|
-
#### `constructor(client: CDP.Client)`
|
|
488
|
-
|
|
489
|
-
Initializes the `CdpPage` instance with a CDP client.
|
|
490
|
-
|
|
491
|
-
- **Parameters**:
|
|
492
|
-
|
|
493
|
-
- `client` (CDP.Client): The Chrome DevTools Protocol client.
|
|
494
|
-
|
|
495
|
-
- **Emitted Events**:
|
|
496
|
-
- `PAGE_CREATED`: Triggered upon the creation of the page.
|
|
497
|
-
|
|
498
|
-
### Static Methods
|
|
499
|
-
|
|
500
|
-
#### `static async create(client: CDP.Client): Promise<CdpPage>`
|
|
501
|
-
|
|
502
|
-
Creates and initializes a new `CdpPage` instance.
|
|
503
|
-
|
|
504
|
-
- **Parameters**:
|
|
505
|
-
|
|
506
|
-
- `client` (CDP.Client): The CDP client.
|
|
507
|
-
|
|
508
|
-
- **Returns**:
|
|
509
|
-
- `Promise<CdpPage>`: A promise resolving to the new `CdpPage` instance.
|
|
510
|
-
|
|
511
|
-
---
|
|
512
|
-
|
|
513
|
-
### Public Methods
|
|
514
|
-
|
|
515
|
-
#### `async url(): Promise<string>`
|
|
516
|
-
Gets the current URL of the page.
|
|
517
|
-
|
|
518
|
-
- **Returns**:
|
|
519
|
-
- `Promise<string>`: The current URL.
|
|
520
|
-
|
|
521
|
-
#### `async goto(url: string, options?: GotoOptions): Promise<void>`
|
|
522
|
-
Navigates to a specified URL.
|
|
523
|
-
|
|
524
|
-
- **Parameters**:
|
|
525
|
-
- `url` (string): The URL to navigate to.
|
|
526
|
-
- `options` (GotoOptions): Navigation options, including:
|
|
527
|
-
- `waitUntil`: When to consider navigation finished (`domcontentloaded` or `load`).
|
|
528
|
-
- `timeout`: Maximum time to wait for navigation in milliseconds.
|
|
529
|
-
|
|
530
|
-
#### `async click(selector: string): Promise<void>`
|
|
531
|
-
Simulates a click on an element identified by the selector.
|
|
532
|
-
|
|
533
|
-
- **Parameters**:
|
|
534
|
-
- `selector` (string): CSS selector of the element.
|
|
535
|
-
|
|
536
|
-
#### `async type(selector: string, text: string, options?: { delay?: number }): Promise<void>`
|
|
537
|
-
Types text into an input field.
|
|
538
|
-
|
|
539
|
-
- **Parameters**:
|
|
540
|
-
- `selector` (string): CSS selector of the element.
|
|
541
|
-
- `text` (string): Text to type.
|
|
542
|
-
- `options` (object): Options for typing:
|
|
543
|
-
- `delay`: Time in milliseconds between key presses.
|
|
544
|
-
|
|
545
|
-
#### `async screenshot(options?: { path?: string; fullPage?: boolean; format?: 'png' | 'jpeg' }): Promise<Buffer>`
|
|
546
|
-
Takes a screenshot of the page, with support for PNG and JPEG formats.
|
|
547
|
-
|
|
548
|
-
- **Parameters**:
|
|
549
|
-
- `options` (object): Screenshot options:
|
|
550
|
-
- `path`: File path to save the screenshot.
|
|
551
|
-
- `fullPage`: Capture the entire page.
|
|
552
|
-
- `format`: Image format, either `'png'` (default) or `'jpeg'`.
|
|
553
|
-
|
|
554
|
-
- **Returns**:
|
|
555
|
-
- `Promise<Buffer>`: The screenshot as a buffer.
|
|
556
|
-
|
|
557
|
-
#### `async content(): Promise<string>`
|
|
558
|
-
Gets the HTML content of the page.
|
|
559
|
-
|
|
560
|
-
- **Returns**:
|
|
561
|
-
- `Promise<string>`: The page's HTML.
|
|
562
|
-
|
|
563
|
-
#### `async toCheerio(): Promise<cheerio.CheerioAPI>`
|
|
564
|
-
Converts the current page content to a Cheerio instance for DOM manipulation.
|
|
565
|
-
|
|
566
|
-
- **Returns**:
|
|
567
|
-
- `Promise<cheerio.CheerioAPI>`: A Cheerio API instance.
|
|
568
|
-
|
|
569
|
-
#### `async setViewport(viewport: Viewport): Promise<void>`
|
|
570
|
-
Sets the page's viewport dimensions.
|
|
571
|
-
|
|
572
|
-
- **Parameters**:
|
|
573
|
-
- `viewport` (Viewport): Object with `width` and `height` properties.
|
|
574
|
-
|
|
575
|
-
#### `async setUserAgent(userAgent: string): Promise<void>`
|
|
576
|
-
Overrides the user-agent string.
|
|
577
|
-
|
|
578
|
-
- **Parameters**:
|
|
579
|
-
- `userAgent` (string): The new user-agent string.
|
|
580
|
-
|
|
581
|
-
#### `async setExtraHTTPHeaders(headers: Record<string, string>): Promise<void>`
|
|
582
|
-
Sets additional HTTP headers for requests.
|
|
583
|
-
|
|
584
|
-
- **Parameters**:
|
|
585
|
-
- `headers` (Record<string, string>): Key-value pairs of headers.
|
|
586
|
-
|
|
587
|
-
#### `async waitForResponse(urlPart: string, statusCode?: number, timeout?: number): Promise<any>`
|
|
588
|
-
Waits for a specific network response.
|
|
589
|
-
|
|
590
|
-
- **Parameters**:
|
|
591
|
-
- `urlPart` (string): Part of the URL to match.
|
|
592
|
-
- `statusCode` (number): Expected HTTP status code.
|
|
593
|
-
- `timeout` (number): Maximum wait time in milliseconds.
|
|
594
|
-
|
|
595
|
-
- **Returns**:
|
|
596
|
-
- `Promise<any>`: The response.
|
|
597
|
-
|
|
598
|
-
#### `async setCookies(cookies: Cookie[]): Promise<void>`
|
|
599
|
-
Sets cookies for the page.
|
|
600
|
-
|
|
601
|
-
- **Parameters**:
|
|
602
|
-
- `cookies` (Cookie[]): Array of cookies to set.
|
|
603
|
-
|
|
604
|
-
#### `async getCookies(urls?: string[]): Promise<Cookie[]>`
|
|
605
|
-
Retrieves cookies for the given URLs or all cookies if no URLs are specified.
|
|
606
|
-
|
|
607
|
-
- **Parameters**:
|
|
608
|
-
- `urls` (string[]): Optional array of URLs.
|
|
609
|
-
|
|
610
|
-
- **Returns**:
|
|
611
|
-
- `Promise<Cookie[]>`: Array of cookies.
|
|
612
|
-
|
|
613
|
-
#### `async waitForSelector(selector: string, options?: { timeout?: number }): Promise<void>`
|
|
614
|
-
Waits for an element matching the selector to appear.
|
|
615
|
-
|
|
616
|
-
- **Parameters**:
|
|
617
|
-
- `selector` (string): CSS selector of the element.
|
|
618
|
-
- `options` (object): Options for waiting:
|
|
619
|
-
- `timeout`: Maximum wait time in milliseconds.
|
|
620
|
-
|
|
621
|
-
#### `async elementExists(selector: string): Promise<boolean>`
|
|
622
|
-
Checks if an element exists.
|
|
623
|
-
|
|
624
|
-
- **Parameters**:
|
|
625
|
-
- `selector` (string): CSS selector of the element.
|
|
626
|
-
|
|
627
|
-
- **Returns**:
|
|
628
|
-
- `Promise<boolean>`: `true` if the element exists, `false` otherwise.
|
|
629
|
-
|
|
630
|
-
#### `async getTextContent(selector: string): Promise<string>`
|
|
631
|
-
Gets the text content of an element.
|
|
632
|
-
|
|
633
|
-
- **Parameters**:
|
|
634
|
-
- `selector` (string): CSS selector of the element.
|
|
635
|
-
|
|
636
|
-
- **Returns**:
|
|
637
|
-
- `Promise<string>`: The element's text content.
|
|
638
|
-
|
|
639
|
-
#### `async getHref(selector: string): Promise<string>`
|
|
640
|
-
Gets the `href` attribute of an anchor element.
|
|
641
|
-
|
|
642
|
-
- **Parameters**:
|
|
643
|
-
- `selector` (string): CSS selector of the anchor element.
|
|
644
|
-
|
|
645
|
-
- **Returns**:
|
|
646
|
-
- `Promise<string>`: The `href` value.
|
|
647
|
-
|
|
648
|
-
#### `async reload(options?: GotoOptions): Promise<void>`
|
|
649
|
-
Reloads the current page.
|
|
650
|
-
|
|
651
|
-
- **Parameters**:
|
|
652
|
-
- `options` (GotoOptions): Navigation options, including:
|
|
653
|
-
- `waitUntil`: When to consider reload finished (`domcontentloaded` or `load`).
|
|
654
|
-
- `timeout`: Maximum time to wait for reload in milliseconds.
|
|
655
|
-
|
|
656
|
-
#### `async deleteInput(selector: string): Promise<void>`
|
|
657
|
-
Clears the value of an input field specified by the selector.
|
|
658
|
-
|
|
659
|
-
- **Parameters**:
|
|
660
|
-
- `selector` (string): CSS selector of the input element.
|
|
661
|
-
|
|
662
|
-
#### `async isVisible(selector: string): Promise<boolean>`
|
|
663
|
-
Checks if the element specified by selector is visible (not `display: none` and not `visibility: hidden`).
|
|
664
|
-
The selector should be the root item which can be hidden, otherwise this function could return a false positive.
|
|
665
|
-
|
|
666
|
-
- **Parameters**:
|
|
667
|
-
- `selector` (string): CSS selector of the element.
|
|
668
|
-
- **Returns**:
|
|
669
|
-
- `Promise<boolean>`: `true` if the element is visible, `false` otherwise.
|
|
670
|
-
|
|
671
|
-
#### `async selectOption(dropdownSelector: string, optionSelector: string | string[], options?: SelectOptionOptions): Promise<void>`
|
|
672
|
-
Selects one or more options from a select element or dropdown with intelligent automatic handling.
|
|
673
|
-
|
|
674
|
-
**Key Features**:
|
|
675
|
-
- **Automatic Detection**: Distinguishes between HTML `<select>` elements and custom dropdowns
|
|
676
|
-
- **Smart Trigger Discovery**: For custom dropdowns, automatically finds and clicks triggers using multiple strategies
|
|
677
|
-
- **Virtualized List Support**: Handles large dropdown lists with intelligent scrolling
|
|
678
|
-
- **No Manual Configuration**: No need to specify separate trigger and container selectors
|
|
679
|
-
|
|
680
|
-
- **Parameters**:
|
|
681
|
-
- `dropdownSelector` (string): CSS selector for the select element or dropdown container.
|
|
682
|
-
- `optionSelector` (string | string[]): CSS selector(s) for the option(s) to select. Can be a single selector or array of selectors.
|
|
683
|
-
- `options` (SelectOptionOptions): Optional configuration object with the following properties:
|
|
684
|
-
- `timeout` (number): Maximum wait time in milliseconds. Default: 30000.
|
|
685
|
-
- `force` (boolean): Bypass visibility and disabled checks. Default: false.
|
|
686
|
-
- `waitForOptions` (boolean): Wait for dropdown options to load. Default: true.
|
|
687
|
-
- `maxScrollAttempts` (number): Maximum scroll attempts for virtualized dropdowns. Default: 10.
|
|
688
|
-
|
|
689
|
-
#### `async waitForElementPositionToStabilize(selector: string, timeout?: number, checkInterval?: number, stabilityThreshold?: number, tolerance?: number): Promise<void>`
|
|
690
|
-
Waits for an element's position to stabilize by polling its bounding box. Useful before interactions after scrolling/animations.
|
|
691
|
-
|
|
692
|
-
- **Parameters**:
|
|
693
|
-
- `selector` (string): Target element selector
|
|
694
|
-
- `timeout` (number): Max time to wait. Default: 2000
|
|
695
|
-
- `checkInterval` (number): Polling interval. Default: 100
|
|
696
|
-
- `stabilityThreshold` (number): Consecutive stable checks required. Default: 3
|
|
697
|
-
- `tolerance` (number): Max pixel delta to consider stable. Default: 1
|
|
698
|
-
|
|
699
|
-
- **Usage Examples**:
|
|
700
|
-
```typescript
|
|
701
|
-
// Regular HTML select element - works directly
|
|
702
|
-
await page.selectOption('select#country', 'option[value="us"]');
|
|
703
|
-
|
|
704
|
-
// Multiple selection in HTML select
|
|
705
|
-
await page.selectOption('select#languages', ['option[value="en"]', 'option[value="es"]']);
|
|
706
|
-
|
|
707
|
-
// Custom dropdown - automatically finds and clicks trigger
|
|
708
|
-
await page.selectOption('#dropdown-menu', '[data-value="premium"]');
|
|
709
|
-
|
|
710
|
-
// Virtualized dropdown - automatically scrolls to find option
|
|
711
|
-
await page.selectOption('#large-dropdown', '[data-item="item-500"]');
|
|
712
|
-
|
|
713
|
-
// With custom configuration
|
|
714
|
-
await page.selectOption(
|
|
715
|
-
'#complex-dropdown',
|
|
716
|
-
'.option[data-category="business"]',
|
|
717
|
-
{
|
|
718
|
-
timeout: 10000,
|
|
719
|
-
maxScrollAttempts: 15
|
|
720
|
-
}
|
|
721
|
-
);
|
|
722
|
-
|
|
723
|
-
// Bootstrap/Material-UI dropdowns work automatically
|
|
724
|
-
await page.selectOption('.MuiSelect-menu', '[data-value="option1"]');
|
|
725
|
-
await page.selectOption('.dropdown-menu', '.dropdown-item[data-value="choice2"]');
|
|
726
|
-
```
|
|
727
|
-
|
|
728
|
-
- **How Trigger Detection Works**:
|
|
729
|
-
The method automatically detects dropdown triggers using multiple strategies:
|
|
730
|
-
1. **Accessibility patterns**: `[aria-haspopup]`, `[role="button"]`
|
|
731
|
-
2. **Common class names**: `.dropdown-trigger`, `.select-trigger`
|
|
732
|
-
3. **Sibling elements**: Previous sibling of the dropdown container
|
|
733
|
-
4. **ID pattern matching**: `#menu-id` → `#trigger-id`, `#dropdown-menu` → `#dropdown-trigger`
|
|
734
|
-
|
|
735
|
-
- **Migration from Previous API**:
|
|
736
|
-
```typescript
|
|
737
|
-
// OLD - Complex API with manual configuration
|
|
738
|
-
await page.selectOption('#trigger', '[data-value="item"]', {
|
|
739
|
-
dropdownSelector: '#menu',
|
|
740
|
-
optionSelector: '.dropdown-item'
|
|
741
|
-
});
|
|
742
|
-
|
|
743
|
-
// NEW - Simplified API with automatic detection
|
|
744
|
-
await page.selectOption('#menu', '[data-value="item"]');
|
|
745
|
-
```
|
|
746
|
-
|
|
747
|
-
---
|
|
748
|
-
|
|
749
|
-
## Utility Functions
|
|
750
|
-
|
|
751
|
-
### `createCDPRouter`
|
|
752
|
-
|
|
753
|
-
#### `export function createCDPRouter<Context extends CDPCrawlingContext = CDPCrawlingContext, UserData extends Dictionary = GetUserDataFromRequest<Context['request']>>(routes?: RouterRoutes<Context, UserData>): Router<Context>`
|
|
754
|
-
|
|
755
|
-
Creates a custom router for handling crawling routes using CDP.
|
|
756
|
-
|
|
757
|
-
- **Parameters**:
|
|
758
|
-
- `routes` (RouterRoutes<Context, UserData>): Optional routes for defining crawl logic.
|
|
759
|
-
|
|
760
|
-
- **Returns**:
|
|
761
|
-
- `Router<Context>`: A configured router instance.
|
|
762
|
-
|
|
763
|
-
---
|
|
764
|
-
|
|
765
|
-
### `saveSession`
|
|
766
|
-
|
|
767
|
-
#### `export async function saveSession(page: CdpPage, options?: SaveSessionOptions): Promise<SessionBundle>`
|
|
768
|
-
|
|
769
|
-
Captures the full browser state of a running page — cookies, per-origin web storage, fingerprintConfig, fingerprint object, resolved locale, and the Chrome user-data-dir (inlined as base64-gzipped-tar) — into a single JSON-serializable bundle.
|
|
770
|
-
|
|
771
|
-
- **Parameters**:
|
|
772
|
-
- `page` (CdpPage): a page produced by a `CDPCrawler` instance.
|
|
773
|
-
- `options` (SaveSessionOptions): optional knobs:
|
|
774
|
-
- `includeUserDataDir` (boolean, default `true`): pack and inline the Chrome user-data-dir.
|
|
775
|
-
- `flushCookies` (boolean, default `true`): call `Storage.flushBrowserCookies` before snapshotting cookies.
|
|
776
|
-
- `userDataDirPath` (string): override the user-data-dir path; defaults to the one used at launch.
|
|
777
|
-
- `cookieUrls` (string[]): forwarded to `Network.getCookies` when `Storage.getCookies` is unavailable.
|
|
778
|
-
- `proxyMeta` ({ proxyUrl?, sessionId?, countryCode? }): stamped on the bundle for inspection only.
|
|
779
|
-
|
|
780
|
-
- **Returns**:
|
|
781
|
-
- `Promise<SessionBundle>`: a fully populated, JSON-serializable bundle.
|
|
782
|
-
|
|
783
|
-
See [Session Bundle](#session-bundle-save--restore-browser-state) for usage patterns.
|
|
784
|
-
|
|
785
|
-
### `restoreSession`
|
|
786
|
-
|
|
787
|
-
#### `export async function restoreSession(page: CdpPage, bundle: SessionBundle): Promise<void>`
|
|
788
|
-
|
|
789
|
-
Manually applies a bundle's cookies and per-origin web storage to an arbitrary page. Use this only for advanced flows (e.g. opening extra targets mid-crawl). The canonical path is `launchOptions.sessionBundle`, which also restores the user-data-dir and fingerprintConfig — `restoreSession` cannot swap those on a running browser.
|
|
790
|
-
|
|
791
|
-
- **Parameters**:
|
|
792
|
-
- `page` (CdpPage): the target page.
|
|
793
|
-
- `bundle` (SessionBundle): a bundle produced by `saveSession`.
|
|
794
|
-
|
|
795
|
-
---
|
|
1
|
+
# jumpy-lion
|