@rafikidota/scoutee 0.16.1 → 0.19.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +362 -0
- package/dist/common/namespace/env.namespace.d.ts +14 -0
- package/dist/common/namespace/env.namespace.js +40 -0
- package/dist/common/namespace/env.namespace.js.map +1 -0
- package/dist/common/util/boolean.d.ts +3 -1
- package/dist/common/util/boolean.js +18 -16
- package/dist/common/util/boolean.js.map +1 -1
- package/dist/modules/camoufox/constant/camoufox.env.constant.d.ts +6 -0
- package/dist/modules/camoufox/constant/camoufox.env.constant.js +6 -0
- package/dist/modules/camoufox/constant/camoufox.env.constant.js.map +1 -1
- package/dist/modules/camoufox/enum/camoufox.os.d.ts +5 -0
- package/dist/modules/camoufox/enum/camoufox.os.js +10 -0
- package/dist/modules/camoufox/enum/camoufox.os.js.map +1 -0
- package/dist/modules/camoufox/env/camoufox.env.config.d.ts +10 -4
- package/dist/modules/camoufox/env/camoufox.env.config.js +33 -10
- package/dist/modules/camoufox/env/camoufox.env.config.js.map +1 -1
- package/dist/modules/camoufox/env/camoufox.env.validation.js +10 -0
- package/dist/modules/camoufox/env/camoufox.env.validation.js.map +1 -1
- package/dist/modules/camoufox/env/camoufox.env.zod.d.ts +7 -0
- package/dist/modules/camoufox/env/camoufox.env.zod.js +7 -0
- package/dist/modules/camoufox/env/camoufox.env.zod.js.map +1 -1
- package/dist/modules/camoufox/services/camoufox.browser.service.d.ts +2 -1
- package/dist/modules/camoufox/services/camoufox.browser.service.js +7 -3
- package/dist/modules/camoufox/services/camoufox.browser.service.js.map +1 -1
- package/dist/modules/camoufox/services/camoufox.config.service.d.ts +7 -0
- package/dist/modules/camoufox/services/camoufox.config.service.js +28 -0
- package/dist/modules/camoufox/services/camoufox.config.service.js.map +1 -1
- package/dist/modules/camoufox/services/camoufox.hook.service.js.map +1 -1
- package/dist/modules/camoufox/types/camoufox.config.d.ts +7 -0
- package/dist/modules/cheerio/env/cheerio.env.config.d.ts +1 -1
- package/dist/modules/cheerio/env/cheerio.env.config.js +13 -6
- package/dist/modules/cheerio/env/cheerio.env.config.js.map +1 -1
- package/dist/modules/cheerio/services/cheerio.hook.service.js +1 -1
- package/dist/modules/cheerio/services/cheerio.hook.service.js.map +1 -1
- package/dist/modules/http/env/http.env.config.d.ts +1 -1
- package/dist/modules/http/env/http.env.config.js +13 -6
- package/dist/modules/http/env/http.env.config.js.map +1 -1
- package/dist/modules/http/services/http.hook.service.js +1 -1
- package/dist/modules/http/services/http.hook.service.js.map +1 -1
- package/dist/modules/playwright/constant/playwright.browser.constant.js +3 -3
- package/dist/modules/playwright/constant/playwright.browser.constant.js.map +1 -1
- package/dist/modules/playwright/env/playwright.env.config.d.ts +3 -3
- package/dist/modules/playwright/env/playwright.env.config.js +21 -10
- package/dist/modules/playwright/env/playwright.env.config.js.map +1 -1
- package/dist/modules/playwright/services/playwright.hook.service.js.map +1 -1
- package/package.json +24 -23
- package/.github/workflows/publish.yml +0 -39
- package/pnpm-workspace.yaml +0 -3
package/README.md
ADDED
|
@@ -0,0 +1,362 @@
|
|
|
1
|
+
# 🕵️ @rafikidota/scoutee
|
|
2
|
+
|
|
3
|
+
> *"Sometimes, the best way to solve your own problems is to help someone else."*
|
|
4
|
+
|
|
5
|
+
[](https://www.npmjs.com/package/@rafikidota/scoutee)
|
|
6
|
+
[](./LICENSE)
|
|
7
|
+
[](https://nodejs.org)
|
|
8
|
+
[](https://pnpm.io)
|
|
9
|
+
|
|
10
|
+
**Scoutee** is a **NestJS** library that wraps [Crawlee](https://crawlee.dev) crawlers into injectable, environment-driven modules. It gives you production-ready `HttpCrawler`, `CheerioCrawler`, `PlaywrightCrawler`, and stealth **Camoufox** crawlers — all wired up with pre/post navigation hooks, structured logging, and full `ConfigService` integration out of the box.
|
|
11
|
+
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
## 📦 Installation
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
pnpm add @rafikidota/scoutee
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
### Peer dependencies
|
|
21
|
+
|
|
22
|
+
Install the crawlers you actually need:
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
# HTTP / Cheerio (lightweight)
|
|
26
|
+
pnpm add crawlee
|
|
27
|
+
|
|
28
|
+
# Playwright (full browser)
|
|
29
|
+
pnpm add crawlee @crawlee/playwright playwright
|
|
30
|
+
|
|
31
|
+
# Camoufox (stealth browser — anti-bot fingerprint spoofing)
|
|
32
|
+
pnpm add crawlee @crawlee/playwright playwright camoufox-js
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
Scoutee also requires a NestJS application context:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
pnpm add @nestjs/common @nestjs/core @nestjs/config
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
## 🗂️ Package exports
|
|
44
|
+
|
|
45
|
+
Each crawler ships as a separate entry point so you only bundle what you use:
|
|
46
|
+
|
|
47
|
+
| Import path | What you get |
|
|
48
|
+
|---|---|
|
|
49
|
+
| `@rafikidota/scoutee` | All four modules |
|
|
50
|
+
| `@rafikidota/scoutee/http` | `HttpModule` + `HttpService` |
|
|
51
|
+
| `@rafikidota/scoutee/cheerio` | `CheerioModule` + `CheerioService` |
|
|
52
|
+
| `@rafikidota/scoutee/playwright` | `PlaywrightModule` + `PlaywrightService` |
|
|
53
|
+
| `@rafikidota/scoutee/camoufox` | `CamoufoxModule` + `CamoufoxService` |
|
|
54
|
+
|
|
55
|
+
---
|
|
56
|
+
|
|
57
|
+
## 🚀 Quick start
|
|
58
|
+
|
|
59
|
+
### 1. Register the module
|
|
60
|
+
|
|
61
|
+
Import **only** the modules you need. Each one is self-contained.
|
|
62
|
+
|
|
63
|
+
```typescript
|
|
64
|
+
// app.module.ts
|
|
65
|
+
import { Module } from '@nestjs/common';
|
|
66
|
+
import { ConfigModule } from '@nestjs/config';
|
|
67
|
+
import { PlaywrightModule } from '@rafikidota/scoutee/playwright';
|
|
68
|
+
|
|
69
|
+
@Module({
|
|
70
|
+
imports: [
|
|
71
|
+
ConfigModule.forRoot({ isGlobal: true }),
|
|
72
|
+
PlaywrightModule,
|
|
73
|
+
],
|
|
74
|
+
})
|
|
75
|
+
export class AppModule {}
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### 2. Inject the service and create a crawler
|
|
79
|
+
|
|
80
|
+
```typescript
|
|
81
|
+
import { Injectable } from '@nestjs/common';
|
|
82
|
+
import { PlaywrightService } from '@rafikidota/scoutee/playwright';
|
|
83
|
+
import { Dataset } from 'crawlee';
|
|
84
|
+
|
|
85
|
+
@Injectable()
|
|
86
|
+
export class ScraperService {
|
|
87
|
+
constructor(private readonly playwright: PlaywrightService) {}
|
|
88
|
+
|
|
89
|
+
async run() {
|
|
90
|
+
const crawler = this.playwright.create({
|
|
91
|
+
async requestHandler({ page, request }) {
|
|
92
|
+
const title = await page.title();
|
|
93
|
+
await Dataset.pushData({ url: request.url, title });
|
|
94
|
+
},
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
await crawler.run(['https://example.com']);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
## 🧩 Modules
|
|
105
|
+
|
|
106
|
+
### 🌐 HttpModule
|
|
107
|
+
|
|
108
|
+
Thin wrapper around Crawlee's `HttpCrawler`. Best for raw HTTP requests without a browser.
|
|
109
|
+
|
|
110
|
+
```typescript
|
|
111
|
+
import { HttpModule, HttpService } from '@rafikidota/scoutee/http';
|
|
112
|
+
// or from '@rafikidota/scoutee'
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
**Service method:**
|
|
116
|
+
|
|
117
|
+
```typescript
|
|
118
|
+
const crawler = httpService.create(options: HttpCrawlerOptions): HttpCrawler
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
**Environment variables:**
|
|
122
|
+
|
|
123
|
+
| Variable | Description |
|
|
124
|
+
|---|---|
|
|
125
|
+
| `CRAWLEE_HTTP_MAX_CONCURRENCY` | Maximum parallel requests |
|
|
126
|
+
| `CRAWLEE_HTTP_MIN_CONCURRENCY` | Minimum parallel requests |
|
|
127
|
+
| `CRAWLEE_HTTP_MAX_REQUEST_RETRIES` | Retry count per request |
|
|
128
|
+
| `CRAWLEE_HTTP_TIMEOUT_SECS` | Request handler timeout (seconds) |
|
|
129
|
+
| `CRAWLEE_HTTP_MAX_REQUESTS` | Total request cap per run |
|
|
130
|
+
| `CRAWLEE_HTTP_INITIAL_PAGE` | Starting page number |
|
|
131
|
+
|
|
132
|
+
---
|
|
133
|
+
|
|
134
|
+
### 🍋 CheerioModule
|
|
135
|
+
|
|
136
|
+
Wrapper around Crawlee's `CheerioCrawler`. Automatically parses HTML with [Cheerio](https://cheerio.js.org) — ideal for static or server-rendered pages.
|
|
137
|
+
|
|
138
|
+
```typescript
|
|
139
|
+
import { CheerioModule, CheerioService } from '@rafikidota/scoutee/cheerio';
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
**Service method:**
|
|
143
|
+
|
|
144
|
+
```typescript
|
|
145
|
+
const crawler = cheerioService.create(options: CheerioCrawlerOptions): CheerioCrawler
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
**Environment variables:**
|
|
149
|
+
|
|
150
|
+
| Variable | Description |
|
|
151
|
+
|---|---|
|
|
152
|
+
| `CRAWLEE_CHEERIO_MAX_CONCURRENCY` | Maximum parallel requests |
|
|
153
|
+
| `CRAWLEE_CHEERIO_MIN_CONCURRENCY` | Minimum parallel requests |
|
|
154
|
+
| `CRAWLEE_CHEERIO_MAX_REQUEST_RETRIES` | Retry count per request |
|
|
155
|
+
| `CRAWLEE_CHEERIO_TIMEOUT_SECS` | Request handler timeout (seconds) |
|
|
156
|
+
| `CRAWLEE_CHEERIO_MAX_REQUESTS` | Total request cap per run |
|
|
157
|
+
| `CRAWLEE_CHEERIO_INITIAL_PAGE` | Starting page number |
|
|
158
|
+
|
|
159
|
+
---
|
|
160
|
+
|
|
161
|
+
### 🎭 PlaywrightModule
|
|
162
|
+
|
|
163
|
+
Full browser automation via Crawlee's `PlaywrightCrawler`. Supports **Chromium**, **Firefox**, and **WebKit** with session pooling, fingerprinting, and built-in Cloudflare challenge handling.
|
|
164
|
+
|
|
165
|
+
```typescript
|
|
166
|
+
import { PlaywrightModule, PlaywrightService } from '@rafikidota/scoutee/playwright';
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
**Service methods:**
|
|
170
|
+
|
|
171
|
+
```typescript
|
|
172
|
+
// Create a crawler instance
|
|
173
|
+
const crawler = playwrightService.create(options: PlaywrightCrawlerOptions): PlaywrightCrawler
|
|
174
|
+
|
|
175
|
+
// Get a raw browser instance
|
|
176
|
+
const browser = await playwrightService.getBrowser()
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
**Environment variables:**
|
|
180
|
+
|
|
181
|
+
| Variable | Description |
|
|
182
|
+
|---|---|
|
|
183
|
+
| `CRAWLEE_PLAYWRIGHT_BROWSER` | Browser engine: `chromium` \| `firefox` \| `webkit` |
|
|
184
|
+
| `CRAWLEE_PLAYWRIGHT_MAX_CONCURRENCY` | Maximum parallel browser pages |
|
|
185
|
+
| `CRAWLEE_PLAYWRIGHT_MIN_CONCURRENCY` | Minimum parallel browser pages |
|
|
186
|
+
| `CRAWLEE_PLAYWRIGHT_MAX_REQUEST_RETRIES` | Retry count per request |
|
|
187
|
+
| `CRAWLEE_PLAYWRIGHT_TIMEOUT_SECS` | Navigation and handler timeout (seconds) |
|
|
188
|
+
| `CRAWLEE_PLAYWRIGHT_MAX_REQUESTS` | Total request cap per run |
|
|
189
|
+
| `CRAWLEE_PLAYWRIGHT_INITIAL_PAGE` | Starting page number |
|
|
190
|
+
| `CRAWLEE_PLAYWRIGHT_HEADLESS` | Run browser headless (`true` \| `false`) |
|
|
191
|
+
| `CRAWLEE_PLAYWRIGHT_USE_INCOGNITO_PAGES` | Use incognito context (`true` \| `false`) |
|
|
192
|
+
| `CRAWLEE_PLAYWRIGHT_HANDLE_CLOUDFLARE_CHALLENGE` | Auto-solve Cloudflare challenges (`true` \| `false`) |
|
|
193
|
+
|
|
194
|
+
**Browser types (`BrowserType` enum):**
|
|
195
|
+
|
|
196
|
+
```typescript
|
|
197
|
+
import { BrowserType } from '@rafikidota/scoutee/playwright';
|
|
198
|
+
|
|
199
|
+
BrowserType.CHROMIUM // 'chromium'
|
|
200
|
+
BrowserType.FIREFOX // 'firefox'
|
|
201
|
+
BrowserType.WEBKIT // 'webkit'
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
---
|
|
205
|
+
|
|
206
|
+
### 🦊 CamoufoxModule
|
|
207
|
+
|
|
208
|
+
Stealth browser powered by [Camoufox](https://camoufox.com) — a hardened Firefox fork designed to bypass bot detection. Uses `PlaywrightCrawler` under the hood with fingerprint spoofing, GeoIP emulation, WebRTC blocking, and human-like behavior simulation.
|
|
209
|
+
|
|
210
|
+
```typescript
|
|
211
|
+
import { CamoufoxModule, CamoufoxService } from '@rafikidota/scoutee/camoufox';
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
**Service methods:**
|
|
215
|
+
|
|
216
|
+
```typescript
|
|
217
|
+
// Create a stealth crawler instance
|
|
218
|
+
const crawler = await camoufoxService.create(options: PlaywrightCrawlerOptions): Promise<PlaywrightCrawler>
|
|
219
|
+
|
|
220
|
+
// Get a raw Camoufox browser instance
|
|
221
|
+
const browser = await camoufoxService.getBrowser()
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
**Environment variables:**
|
|
225
|
+
|
|
226
|
+
| Variable | Description |
|
|
227
|
+
|---|---|
|
|
228
|
+
| `CRAWLEE_CAMOUFOX_MAX_CONCURRENCY` | Maximum parallel browser pages |
|
|
229
|
+
| `CRAWLEE_CAMOUFOX_MIN_CONCURRENCY` | Minimum parallel browser pages |
|
|
230
|
+
| `CRAWLEE_CAMOUFOX_MAX_REQUEST_RETRIES` | Retry count per request |
|
|
231
|
+
| `CRAWLEE_CAMOUFOX_TIMEOUT_SECS` | Navigation and handler timeout (seconds) |
|
|
232
|
+
| `CRAWLEE_CAMOUFOX_MAX_REQUESTS` | Total request cap per run |
|
|
233
|
+
| `CRAWLEE_CAMOUFOX_INITIAL_PAGE` | Starting page number |
|
|
234
|
+
| `CRAWLEE_CAMOUFOX_HEADLESS` | Run browser headless (`true` \| `false`) |
|
|
235
|
+
| `CRAWLEE_CAMOUFOX_EXECUTABLE_PATH` | Custom Camoufox binary path (optional) |
|
|
236
|
+
| `CRAWLEE_CAMOUFOX_HANDLE_CLOUDFLARE_CHALLENGE` | Auto-solve Cloudflare challenges (`true` \| `false`) |
|
|
237
|
+
| `CRAWLEE_CAMOUFOX_USE_INCOGNITO_PAGES` | Use incognito context (`true` \| `false`) |
|
|
238
|
+
| `CRAWLEE_CAMOUFOX_GEOIP` | Enable GeoIP emulation (`true` \| `false`) |
|
|
239
|
+
| `CRAWLEE_CAMOUFOX_OS` | Spoof OS fingerprint: `windows` \| `macos` \| `linux` |
|
|
240
|
+
| `CRAWLEE_CAMOUFOX_BLOCK_WEBRTC` | Block WebRTC leaks (`true` \| `false`) |
|
|
241
|
+
| `CRAWLEE_CAMOUFOX_HUMANIZE` | Human-like mouse delay multiplier (number) |
|
|
242
|
+
| `CRAWLEE_CAMOUFOX_BLOCK_IMAGES` | Block image loading for speed (`true` \| `false`) |
|
|
243
|
+
| `CRAWLEE_CAMOUFOX_ENABLE_CACHE` | Enable browser cache (`true` \| `false`) |
|
|
244
|
+
|
|
245
|
+
**OS spoof options (`CamoufoxOS` enum):**
|
|
246
|
+
|
|
247
|
+
```typescript
|
|
248
|
+
import { CamoufoxOS } from '@rafikidota/scoutee/camoufox';
|
|
249
|
+
|
|
250
|
+
CamoufoxOS.WINDOWS // 'windows'
|
|
251
|
+
CamoufoxOS.MACOS // 'macos'
|
|
252
|
+
CamoufoxOS.LINUX // 'linux'
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
---
|
|
256
|
+
|
|
257
|
+
## ⚙️ Environment file example
|
|
258
|
+
|
|
259
|
+
```dotenv
|
|
260
|
+
# --- HTTP ---
|
|
261
|
+
CRAWLEE_HTTP_MAX_CONCURRENCY=5
|
|
262
|
+
CRAWLEE_HTTP_MIN_CONCURRENCY=1
|
|
263
|
+
CRAWLEE_HTTP_MAX_REQUEST_RETRIES=3
|
|
264
|
+
CRAWLEE_HTTP_TIMEOUT_SECS=30
|
|
265
|
+
CRAWLEE_HTTP_MAX_REQUESTS=100
|
|
266
|
+
CRAWLEE_HTTP_INITIAL_PAGE=1
|
|
267
|
+
|
|
268
|
+
# --- Cheerio ---
|
|
269
|
+
CRAWLEE_CHEERIO_MAX_CONCURRENCY=5
|
|
270
|
+
CRAWLEE_CHEERIO_MIN_CONCURRENCY=1
|
|
271
|
+
CRAWLEE_CHEERIO_MAX_REQUEST_RETRIES=3
|
|
272
|
+
CRAWLEE_CHEERIO_TIMEOUT_SECS=30
|
|
273
|
+
CRAWLEE_CHEERIO_MAX_REQUESTS=100
|
|
274
|
+
CRAWLEE_CHEERIO_INITIAL_PAGE=1
|
|
275
|
+
|
|
276
|
+
# --- Playwright ---
|
|
277
|
+
CRAWLEE_PLAYWRIGHT_BROWSER=chromium
|
|
278
|
+
CRAWLEE_PLAYWRIGHT_MAX_CONCURRENCY=3
|
|
279
|
+
CRAWLEE_PLAYWRIGHT_MIN_CONCURRENCY=1
|
|
280
|
+
CRAWLEE_PLAYWRIGHT_MAX_REQUEST_RETRIES=2
|
|
281
|
+
CRAWLEE_PLAYWRIGHT_TIMEOUT_SECS=60
|
|
282
|
+
CRAWLEE_PLAYWRIGHT_MAX_REQUESTS=50
|
|
283
|
+
CRAWLEE_PLAYWRIGHT_INITIAL_PAGE=1
|
|
284
|
+
CRAWLEE_PLAYWRIGHT_HEADLESS=true
|
|
285
|
+
CRAWLEE_PLAYWRIGHT_USE_INCOGNITO_PAGES=false
|
|
286
|
+
CRAWLEE_PLAYWRIGHT_HANDLE_CLOUDFLARE_CHALLENGE=false
|
|
287
|
+
|
|
288
|
+
# --- Camoufox ---
|
|
289
|
+
CRAWLEE_CAMOUFOX_MAX_CONCURRENCY=2
|
|
290
|
+
CRAWLEE_CAMOUFOX_MIN_CONCURRENCY=1
|
|
291
|
+
CRAWLEE_CAMOUFOX_MAX_REQUEST_RETRIES=2
|
|
292
|
+
CRAWLEE_CAMOUFOX_TIMEOUT_SECS=60
|
|
293
|
+
CRAWLEE_CAMOUFOX_MAX_REQUESTS=50
|
|
294
|
+
CRAWLEE_CAMOUFOX_INITIAL_PAGE=1
|
|
295
|
+
CRAWLEE_CAMOUFOX_HEADLESS=true
|
|
296
|
+
CRAWLEE_CAMOUFOX_HANDLE_CLOUDFLARE_CHALLENGE=true
|
|
297
|
+
CRAWLEE_CAMOUFOX_USE_INCOGNITO_PAGES=false
|
|
298
|
+
CRAWLEE_CAMOUFOX_GEOIP=true
|
|
299
|
+
CRAWLEE_CAMOUFOX_OS=linux
|
|
300
|
+
CRAWLEE_CAMOUFOX_BLOCK_WEBRTC=true
|
|
301
|
+
CRAWLEE_CAMOUFOX_HUMANIZE=1
|
|
302
|
+
CRAWLEE_CAMOUFOX_BLOCK_IMAGES=false
|
|
303
|
+
CRAWLEE_CAMOUFOX_ENABLE_CACHE=false
|
|
304
|
+
```
|
|
305
|
+
|
|
306
|
+
---
|
|
307
|
+
|
|
308
|
+
## 🏗️ Architecture overview
|
|
309
|
+
|
|
310
|
+
```
|
|
311
|
+
@rafikidota/scoutee
|
|
312
|
+
├── HttpModule → HttpService (HttpCrawler)
|
|
313
|
+
├── CheerioModule → CheerioService (CheerioCrawler)
|
|
314
|
+
├── PlaywrightModule → PlaywrightService (PlaywrightCrawler)
|
|
315
|
+
│ ├── BrowserService → browser launcher selection
|
|
316
|
+
│ ├── ConfigService → env-driven configuration
|
|
317
|
+
│ └── HookService → pre/post navigation hooks + logging
|
|
318
|
+
└── CamoufoxModule → CamoufoxService (PlaywrightCrawler + Camoufox)
|
|
319
|
+
├── BrowserService → Camoufox launch options
|
|
320
|
+
├── ConfigService → env-driven configuration
|
|
321
|
+
└── HookService → pre/post navigation hooks + Cloudflare handling
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
Every module ships with:
|
|
325
|
+
- 📋 **ConfigService** — reads all settings from `@nestjs/config`'s `ConfigService`
|
|
326
|
+
- 🪝 **HookService** — injects default pre/post navigation hooks (URL logging, HTTP status logging, Cloudflare challenge handling)
|
|
327
|
+
- 🏭 **Service** — exposes a `create()` factory that merges default options with any overrides you pass in
|
|
328
|
+
|
|
329
|
+
---
|
|
330
|
+
|
|
331
|
+
## 📋 Choosing a crawler
|
|
332
|
+
|
|
333
|
+
| Scenario | Recommended module |
|
|
334
|
+
|---|---|
|
|
335
|
+
| Fast data extraction, no JS needed | 🌐 `HttpModule` |
|
|
336
|
+
| Static HTML with CSS selectors | 🍋 `CheerioModule` |
|
|
337
|
+
| JavaScript-heavy SPAs | 🎭 `PlaywrightModule` |
|
|
338
|
+
| Anti-bot / Cloudflare protected sites | 🦊 `CamoufoxModule` |
|
|
339
|
+
|
|
340
|
+
---
|
|
341
|
+
|
|
342
|
+
## 🛠️ Development
|
|
343
|
+
|
|
344
|
+
```bash
|
|
345
|
+
# Install dependencies
|
|
346
|
+
pnpm install
|
|
347
|
+
|
|
348
|
+
# Build
|
|
349
|
+
pnpm run build
|
|
350
|
+
|
|
351
|
+
# Lint & format
|
|
352
|
+
pnpm run lint
|
|
353
|
+
pnpm run format
|
|
354
|
+
```
|
|
355
|
+
|
|
356
|
+
Publishing is automated via GitHub Actions on every `v*` tag push.
|
|
357
|
+
|
|
358
|
+
---
|
|
359
|
+
|
|
360
|
+
## 📄 License
|
|
361
|
+
|
|
362
|
+
[MIT](./LICENSE) © [rafiki](https://github.com/rafikidota)
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
export declare namespace Env {
|
|
2
|
+
function Number(value: string): number;
|
|
3
|
+
namespace Number {
|
|
4
|
+
function Optional(value?: string): number | undefined;
|
|
5
|
+
}
|
|
6
|
+
function Boolean(value: string): boolean;
|
|
7
|
+
namespace Boolean {
|
|
8
|
+
function Optional(value?: string): boolean | undefined;
|
|
9
|
+
}
|
|
10
|
+
function String(value: string): string;
|
|
11
|
+
namespace String {
|
|
12
|
+
function Optional(value?: string): string | undefined;
|
|
13
|
+
}
|
|
14
|
+
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.Env = void 0;
|
|
4
|
+
const boolean_1 = require("../util/boolean");
|
|
5
|
+
const NativeNumber = Number;
|
|
6
|
+
const NativeString = String;
|
|
7
|
+
var Env;
|
|
8
|
+
(function (Env) {
|
|
9
|
+
function Number(value) {
|
|
10
|
+
return NativeNumber(value);
|
|
11
|
+
}
|
|
12
|
+
Env.Number = Number;
|
|
13
|
+
(function (Number) {
|
|
14
|
+
function Optional(value) {
|
|
15
|
+
return value ? NativeNumber(value) : undefined;
|
|
16
|
+
}
|
|
17
|
+
Number.Optional = Optional;
|
|
18
|
+
})(Number = Env.Number || (Env.Number = {}));
|
|
19
|
+
function Boolean(value) {
|
|
20
|
+
return boolean_1.BooleanParser.parse(value);
|
|
21
|
+
}
|
|
22
|
+
Env.Boolean = Boolean;
|
|
23
|
+
(function (Boolean) {
|
|
24
|
+
function Optional(value) {
|
|
25
|
+
return boolean_1.BooleanParser.parse(value);
|
|
26
|
+
}
|
|
27
|
+
Boolean.Optional = Optional;
|
|
28
|
+
})(Boolean = Env.Boolean || (Env.Boolean = {}));
|
|
29
|
+
function String(value) {
|
|
30
|
+
return NativeString(value);
|
|
31
|
+
}
|
|
32
|
+
Env.String = String;
|
|
33
|
+
(function (String) {
|
|
34
|
+
function Optional(value) {
|
|
35
|
+
return value ? NativeString(value) : undefined;
|
|
36
|
+
}
|
|
37
|
+
String.Optional = Optional;
|
|
38
|
+
})(String = Env.String || (Env.String = {}));
|
|
39
|
+
})(Env || (exports.Env = Env = {}));
|
|
40
|
+
//# sourceMappingURL=env.namespace.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"env.namespace.js","sourceRoot":"","sources":["../../../src/common/namespace/env.namespace.ts"],"names":[],"mappings":";;;AAAA,6CAAgD;AAEhD,MAAM,YAAY,GAAG,MAAM,CAAC;AAC5B,MAAM,YAAY,GAAG,MAAM,CAAC;AAE5B,IAAiB,GAAG,CA8BnB;AA9BD,WAAiB,GAAG;IAClB,SAAgB,MAAM,CAAC,KAAa;QAClC,OAAO,YAAY,CAAC,KAAK,CAAC,CAAC;IAC7B,CAAC;IAFe,UAAM,SAErB,CAAA;IAED,WAAiB,MAAM;QACrB,SAAgB,QAAQ,CAAC,KAAc;YACrC,OAAO,KAAK,CAAC,CAAC,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;QACjD,CAAC;QAFe,eAAQ,WAEvB,CAAA;IACH,CAAC,EAJgB,MAAM,GAAN,UAAM,KAAN,UAAM,QAItB;IAED,SAAgB,OAAO,CAAC,KAAa;QACnC,OAAO,uBAAa,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACpC,CAAC;IAFe,WAAO,UAEtB,CAAA;IAED,WAAiB,OAAO;QACtB,SAAgB,QAAQ,CAAC,KAAc;YACrC,OAAO,uBAAa,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QACpC,CAAC;QAFe,gBAAQ,WAEvB,CAAA;IACH,CAAC,EAJgB,OAAO,GAAP,WAAO,KAAP,WAAO,QAIvB;IAED,SAAgB,MAAM,CAAC,KAAa;QAClC,OAAO,YAAY,CAAC,KAAK,CAAC,CAAC;IAC7B,CAAC;IAFe,UAAM,SAErB,CAAA;IAED,WAAiB,MAAM;QACrB,SAAgB,QAAQ,CAAC,KAAc;YACrC,OAAO,KAAK,CAAC,CAAC,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;QACjD,CAAC;QAFe,eAAQ,WAEvB,CAAA;IACH,CAAC,EAJgB,MAAM,GAAN,UAAM,KAAN,UAAM,QAItB;AACH,CAAC,EA9BgB,GAAG,mBAAH,GAAG,QA8BnB"}
|
|
@@ -1,22 +1,24 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.
|
|
3
|
+
exports.BooleanParser = void 0;
|
|
4
4
|
const boolean_constant_1 = require("../constant/boolean.constant");
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
5
|
+
class BooleanParser {
|
|
6
|
+
static parse(value) {
|
|
7
|
+
try {
|
|
8
|
+
const trimmed = value.trim().toLowerCase();
|
|
9
|
+
switch (true) {
|
|
10
|
+
case boolean_constant_1.TRUTHY_VALUES.has(trimmed):
|
|
11
|
+
return true;
|
|
12
|
+
case boolean_constant_1.FALSY_VALUES.has(trimmed):
|
|
13
|
+
return false;
|
|
14
|
+
default:
|
|
15
|
+
return undefined;
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
catch {
|
|
19
|
+
return undefined;
|
|
15
20
|
}
|
|
16
21
|
}
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
}
|
|
20
|
-
};
|
|
21
|
-
exports.bool = bool;
|
|
22
|
+
}
|
|
23
|
+
exports.BooleanParser = BooleanParser;
|
|
22
24
|
//# sourceMappingURL=boolean.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"boolean.js","sourceRoot":"","sources":["../../../src/common/util/boolean.ts"],"names":[],"mappings":";;;AACA,mEAA2E;
|
|
1
|
+
{"version":3,"file":"boolean.js","sourceRoot":"","sources":["../../../src/common/util/boolean.ts"],"names":[],"mappings":";;;AACA,mEAA2E;AAE3E,MAAa,aAAa;IACxB,MAAM,CAAC,KAAK,CAAC,KAAa;QACxB,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC,WAAW,EAAwB,CAAC;YACjE,QAAQ,IAAI,EAAE,CAAC;gBACb,KAAK,gCAAa,CAAC,GAAG,CAAC,OAAO,CAAC;oBAC7B,OAAO,IAAI,CAAC;gBACd,KAAK,+BAAY,CAAC,GAAG,CAAC,OAAO,CAAC;oBAC5B,OAAO,KAAK,CAAC;gBACf;oBACE,OAAO,SAAS,CAAC;YACrB,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,SAAS,CAAC;QACnB,CAAC;IACH,CAAC;CACF;AAhBD,sCAgBC"}
|
|
@@ -9,4 +9,10 @@ export declare const CAMOUFOX_ENV: {
|
|
|
9
9
|
readonly EXECUTABLE_PATH: "CRAWLEE_CAMOUFOX_EXECUTABLE_PATH";
|
|
10
10
|
readonly HANDLE_CLOUDFLARE_CHALLENGE: "CRAWLEE_CAMOUFOX_HANDLE_CLOUDFLARE_CHALLENGE";
|
|
11
11
|
readonly INCOGNITO_PAGES: "CRAWLEE_CAMOUFOX_USE_INCOGNITO_PAGES";
|
|
12
|
+
readonly GEOIP: "CRAWLEE_CAMOUFOX_GEOIP";
|
|
13
|
+
readonly OS: "CRAWLEE_CAMOUFOX_OS";
|
|
14
|
+
readonly BLOCK_WEBRTC: "CRAWLEE_CAMOUFOX_BLOCK_WEBRTC";
|
|
15
|
+
readonly HUMANIZE: "CRAWLEE_CAMOUFOX_HUMANIZE";
|
|
16
|
+
readonly BLOCK_IMAGES: "CRAWLEE_CAMOUFOX_BLOCK_IMAGES";
|
|
17
|
+
readonly ENABLE_CACHE: "CRAWLEE_CAMOUFOX_ENABLE_CACHE";
|
|
12
18
|
};
|
|
@@ -12,5 +12,11 @@ exports.CAMOUFOX_ENV = {
|
|
|
12
12
|
EXECUTABLE_PATH: 'CRAWLEE_CAMOUFOX_EXECUTABLE_PATH',
|
|
13
13
|
HANDLE_CLOUDFLARE_CHALLENGE: 'CRAWLEE_CAMOUFOX_HANDLE_CLOUDFLARE_CHALLENGE',
|
|
14
14
|
INCOGNITO_PAGES: 'CRAWLEE_CAMOUFOX_USE_INCOGNITO_PAGES',
|
|
15
|
+
GEOIP: 'CRAWLEE_CAMOUFOX_GEOIP',
|
|
16
|
+
OS: 'CRAWLEE_CAMOUFOX_OS',
|
|
17
|
+
BLOCK_WEBRTC: 'CRAWLEE_CAMOUFOX_BLOCK_WEBRTC',
|
|
18
|
+
HUMANIZE: 'CRAWLEE_CAMOUFOX_HUMANIZE',
|
|
19
|
+
BLOCK_IMAGES: 'CRAWLEE_CAMOUFOX_BLOCK_IMAGES',
|
|
20
|
+
ENABLE_CACHE: 'CRAWLEE_CAMOUFOX_ENABLE_CACHE',
|
|
15
21
|
};
|
|
16
22
|
//# sourceMappingURL=camoufox.env.constant.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"camoufox.env.constant.js","sourceRoot":"","sources":["../../../../src/modules/camoufox/constant/camoufox.env.constant.ts"],"names":[],"mappings":";;;AAAa,QAAA,YAAY,GAAG;IAC1B,eAAe,EAAE,kCAAkC;IACnD,eAAe,EAAE,kCAAkC;IACnD,mBAAmB,EAAE,sCAAsC;IAC3D,YAAY,EAAE,+BAA+B;IAC7C,YAAY,EAAE,+BAA+B;IAC7C,YAAY,EAAE,+BAA+B;IAC7C,QAAQ,EAAE,2BAA2B;IACrC,eAAe,EAAE,kCAAkC;IACnD,2BAA2B,EAAE,8CAA8C;IAC3E,eAAe,EAAE,sCAAsC;
|
|
1
|
+
{"version":3,"file":"camoufox.env.constant.js","sourceRoot":"","sources":["../../../../src/modules/camoufox/constant/camoufox.env.constant.ts"],"names":[],"mappings":";;;AAAa,QAAA,YAAY,GAAG;IAC1B,eAAe,EAAE,kCAAkC;IACnD,eAAe,EAAE,kCAAkC;IACnD,mBAAmB,EAAE,sCAAsC;IAC3D,YAAY,EAAE,+BAA+B;IAC7C,YAAY,EAAE,+BAA+B;IAC7C,YAAY,EAAE,+BAA+B;IAC7C,QAAQ,EAAE,2BAA2B;IACrC,eAAe,EAAE,kCAAkC;IACnD,2BAA2B,EAAE,8CAA8C;IAC3E,eAAe,EAAE,sCAAsC;IACvD,KAAK,EAAE,wBAAwB;IAC/B,EAAE,EAAE,qBAAqB;IACzB,YAAY,EAAE,+BAA+B;IAC7C,QAAQ,EAAE,2BAA2B;IACrC,YAAY,EAAE,+BAA+B;IAC7C,YAAY,EAAE,+BAA+B;CACrC,CAAC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.CamoufoxOS = void 0;
|
|
4
|
+
var CamoufoxOS;
|
|
5
|
+
(function (CamoufoxOS) {
|
|
6
|
+
CamoufoxOS["WINDOWS"] = "windows";
|
|
7
|
+
CamoufoxOS["MACOS"] = "macos";
|
|
8
|
+
CamoufoxOS["LINUX"] = "linux";
|
|
9
|
+
})(CamoufoxOS || (exports.CamoufoxOS = CamoufoxOS = {}));
|
|
10
|
+
//# sourceMappingURL=camoufox.os.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"camoufox.os.js","sourceRoot":"","sources":["../../../../src/modules/camoufox/enum/camoufox.os.ts"],"names":[],"mappings":";;;AAAA,IAAY,UAIX;AAJD,WAAY,UAAU;IACpB,iCAAmB,CAAA;IACnB,6BAAe,CAAA;IACf,6BAAe,CAAA;AACjB,CAAC,EAJW,UAAU,0BAAV,UAAU,QAIrB"}
|
|
@@ -1,12 +1,18 @@
|
|
|
1
1
|
export declare const CamoufoxEnvConfig: () => {
|
|
2
|
-
CRAWLEE_CAMOUFOX_MAX_CONCURRENCY: number;
|
|
3
2
|
CRAWLEE_CAMOUFOX_MIN_CONCURRENCY: number;
|
|
3
|
+
CRAWLEE_CAMOUFOX_MAX_CONCURRENCY: number;
|
|
4
4
|
CRAWLEE_CAMOUFOX_MAX_REQUEST_RETRIES: number;
|
|
5
5
|
CRAWLEE_CAMOUFOX_TIMEOUT_SECS: number;
|
|
6
6
|
CRAWLEE_CAMOUFOX_MAX_REQUESTS: number;
|
|
7
7
|
CRAWLEE_CAMOUFOX_INITIAL_PAGE: number;
|
|
8
|
-
CRAWLEE_CAMOUFOX_HEADLESS:
|
|
9
|
-
CRAWLEE_CAMOUFOX_HANDLE_CLOUDFLARE_CHALLENGE:
|
|
10
|
-
CRAWLEE_CAMOUFOX_USE_INCOGNITO_PAGES:
|
|
8
|
+
CRAWLEE_CAMOUFOX_HEADLESS: boolean;
|
|
9
|
+
CRAWLEE_CAMOUFOX_HANDLE_CLOUDFLARE_CHALLENGE: boolean;
|
|
10
|
+
CRAWLEE_CAMOUFOX_USE_INCOGNITO_PAGES: boolean;
|
|
11
11
|
CRAWLEE_CAMOUFOX_EXECUTABLE_PATH: string;
|
|
12
|
+
CRAWLEE_CAMOUFOX_GEOIP: boolean;
|
|
13
|
+
CRAWLEE_CAMOUFOX_OS: string;
|
|
14
|
+
CRAWLEE_CAMOUFOX_BLOCK_WEBRTC: boolean;
|
|
15
|
+
CRAWLEE_CAMOUFOX_HUMANIZE: number;
|
|
16
|
+
CRAWLEE_CAMOUFOX_BLOCK_IMAGES: boolean;
|
|
17
|
+
CRAWLEE_CAMOUFOX_ENABLE_CACHE: boolean;
|
|
12
18
|
};
|
|
@@ -35,18 +35,41 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
35
35
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
36
|
exports.CamoufoxEnvConfig = void 0;
|
|
37
37
|
const dotenv = __importStar(require("dotenv"));
|
|
38
|
+
const env_namespace_1 = require("../../../common/namespace/env.namespace");
|
|
38
39
|
dotenv.config();
|
|
40
|
+
const { CRAWLEE_CAMOUFOX_MIN_CONCURRENCY = '1' } = process.env;
|
|
41
|
+
const { CRAWLEE_CAMOUFOX_MAX_CONCURRENCY = '1' } = process.env;
|
|
42
|
+
const { CRAWLEE_CAMOUFOX_MAX_REQUEST_RETRIES = '3' } = process.env;
|
|
43
|
+
const { CRAWLEE_CAMOUFOX_TIMEOUT_SECS = '30' } = process.env;
|
|
44
|
+
const { CRAWLEE_CAMOUFOX_MAX_REQUESTS = '100' } = process.env;
|
|
45
|
+
const { CRAWLEE_CAMOUFOX_INITIAL_PAGE = '1' } = process.env;
|
|
46
|
+
const { CRAWLEE_CAMOUFOX_HEADLESS = 'true' } = process.env;
|
|
47
|
+
const { CRAWLEE_CAMOUFOX_HANDLE_CLOUDFLARE_CHALLENGE = 'false' } = process.env;
|
|
48
|
+
const { CRAWLEE_CAMOUFOX_USE_INCOGNITO_PAGES = 'true' } = process.env;
|
|
49
|
+
const { CRAWLEE_CAMOUFOX_EXECUTABLE_PATH } = process.env;
|
|
50
|
+
const { CRAWLEE_CAMOUFOX_GEOIP = 'false' } = process.env;
|
|
51
|
+
const { CRAWLEE_CAMOUFOX_OS } = process.env;
|
|
52
|
+
const { CRAWLEE_CAMOUFOX_BLOCK_WEBRTC = 'true' } = process.env;
|
|
53
|
+
const { CRAWLEE_CAMOUFOX_HUMANIZE } = process.env;
|
|
54
|
+
const { CRAWLEE_CAMOUFOX_BLOCK_IMAGES = 'false' } = process.env;
|
|
55
|
+
const { CRAWLEE_CAMOUFOX_ENABLE_CACHE = 'false' } = process.env;
|
|
39
56
|
const CamoufoxEnvConfig = () => ({
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
CRAWLEE_CAMOUFOX_MAX_REQUEST_RETRIES: Number(
|
|
43
|
-
CRAWLEE_CAMOUFOX_TIMEOUT_SECS: Number(
|
|
44
|
-
CRAWLEE_CAMOUFOX_MAX_REQUESTS: Number(
|
|
45
|
-
CRAWLEE_CAMOUFOX_INITIAL_PAGE: Number(
|
|
46
|
-
CRAWLEE_CAMOUFOX_HEADLESS:
|
|
47
|
-
CRAWLEE_CAMOUFOX_HANDLE_CLOUDFLARE_CHALLENGE:
|
|
48
|
-
CRAWLEE_CAMOUFOX_USE_INCOGNITO_PAGES:
|
|
49
|
-
CRAWLEE_CAMOUFOX_EXECUTABLE_PATH:
|
|
57
|
+
CRAWLEE_CAMOUFOX_MIN_CONCURRENCY: env_namespace_1.Env.Number(CRAWLEE_CAMOUFOX_MIN_CONCURRENCY),
|
|
58
|
+
CRAWLEE_CAMOUFOX_MAX_CONCURRENCY: env_namespace_1.Env.Number(CRAWLEE_CAMOUFOX_MAX_CONCURRENCY),
|
|
59
|
+
CRAWLEE_CAMOUFOX_MAX_REQUEST_RETRIES: env_namespace_1.Env.Number(CRAWLEE_CAMOUFOX_MAX_REQUEST_RETRIES),
|
|
60
|
+
CRAWLEE_CAMOUFOX_TIMEOUT_SECS: env_namespace_1.Env.Number(CRAWLEE_CAMOUFOX_TIMEOUT_SECS),
|
|
61
|
+
CRAWLEE_CAMOUFOX_MAX_REQUESTS: env_namespace_1.Env.Number(CRAWLEE_CAMOUFOX_MAX_REQUESTS),
|
|
62
|
+
CRAWLEE_CAMOUFOX_INITIAL_PAGE: env_namespace_1.Env.Number(CRAWLEE_CAMOUFOX_INITIAL_PAGE),
|
|
63
|
+
CRAWLEE_CAMOUFOX_HEADLESS: env_namespace_1.Env.Boolean(CRAWLEE_CAMOUFOX_HEADLESS),
|
|
64
|
+
CRAWLEE_CAMOUFOX_HANDLE_CLOUDFLARE_CHALLENGE: env_namespace_1.Env.Boolean(CRAWLEE_CAMOUFOX_HANDLE_CLOUDFLARE_CHALLENGE),
|
|
65
|
+
CRAWLEE_CAMOUFOX_USE_INCOGNITO_PAGES: env_namespace_1.Env.Boolean(CRAWLEE_CAMOUFOX_USE_INCOGNITO_PAGES),
|
|
66
|
+
CRAWLEE_CAMOUFOX_EXECUTABLE_PATH: env_namespace_1.Env.String.Optional(CRAWLEE_CAMOUFOX_EXECUTABLE_PATH),
|
|
67
|
+
CRAWLEE_CAMOUFOX_GEOIP: env_namespace_1.Env.Boolean(CRAWLEE_CAMOUFOX_GEOIP),
|
|
68
|
+
CRAWLEE_CAMOUFOX_OS: env_namespace_1.Env.String.Optional(CRAWLEE_CAMOUFOX_OS),
|
|
69
|
+
CRAWLEE_CAMOUFOX_BLOCK_WEBRTC: env_namespace_1.Env.Boolean(CRAWLEE_CAMOUFOX_BLOCK_WEBRTC),
|
|
70
|
+
CRAWLEE_CAMOUFOX_HUMANIZE: env_namespace_1.Env.Number.Optional(CRAWLEE_CAMOUFOX_HUMANIZE),
|
|
71
|
+
CRAWLEE_CAMOUFOX_BLOCK_IMAGES: env_namespace_1.Env.Boolean(CRAWLEE_CAMOUFOX_BLOCK_IMAGES),
|
|
72
|
+
CRAWLEE_CAMOUFOX_ENABLE_CACHE: env_namespace_1.Env.Boolean(CRAWLEE_CAMOUFOX_ENABLE_CACHE),
|
|
50
73
|
});
|
|
51
74
|
exports.CamoufoxEnvConfig = CamoufoxEnvConfig;
|
|
52
75
|
//# sourceMappingURL=camoufox.env.config.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"camoufox.env.config.js","sourceRoot":"","sources":["../../../../src/modules/camoufox/env/camoufox.env.config.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,+CAAiC;
|
|
1
|
+
{"version":3,"file":"camoufox.env.config.js","sourceRoot":"","sources":["../../../../src/modules/camoufox/env/camoufox.env.config.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,+CAAiC;AACjC,2EAA8D;AAC9D,MAAM,CAAC,MAAM,EAAE,CAAC;AAEhB,MAAM,EAAE,gCAAgC,GAAG,GAAG,EAAE,GAAG,OAAO,CAAC,GAAG,CAAC;AAC/D,MAAM,EAAE,gCAAgC,GAAG,GAAG,EAAE,GAAG,OAAO,CAAC,GAAG,CAAC;AAC/D,MAAM,EAAE,oCAAoC,GAAG,GAAG,EAAE,GAAG,OAAO,CAAC,GAAG,CAAC;AACnE,MAAM,EAAE,6BAA6B,GAAG,IAAI,EAAE,GAAG,OAAO,CAAC,GAAG,CAAC;AAC7D,MAAM,EAAE,6BAA6B,GAAG,KAAK,EAAE,GAAG,OAAO,CAAC,GAAG,CAAC;AAC9D,MAAM,EAAE,6BAA6B,GAAG,GAAG,EAAE,GAAG,OAAO,CAAC,GAAG,CAAC;AAC5D,MAAM,EAAE,yBAAyB,GAAG,MAAM,EAAE,GAAG,OAAO,CAAC,GAAG,CAAC;AAC3D,MAAM,EAAE,4CAA4C,GAAG,OAAO,EAAE,GAAG,OAAO,CAAC,GAAG,CAAC;AAC/E,MAAM,EAAE,oCAAoC,GAAG,MAAM,EAAE,GAAG,OAAO,CAAC,GAAG,CAAC;AACtE,MAAM,EAAE,gCAAgC,EAAE,GAAG,OAAO,CAAC,GAAG,CAAC;AACzD,MAAM,EAAE,sBAAsB,GAAG,OAAO,EAAE,GAAG,OAAO,CAAC,GAAG,CAAC;AACzD,MAAM,EAAE,mBAAmB,EAAE,GAAG,OAAO,CAAC,GAAG,CAAC;AAC5C,MAAM,EAAE,6BAA6B,GAAG,MAAM,EAAE,GAAG,OAAO,CAAC,GAAG,CAAC;AAC/D,MAAM,EAAE,yBAAyB,EAAE,GAAG,OAAO,CAAC,GAAG,CAAC;AAClD,MAAM,EAAE,6BAA6B,GAAG,OAAO,EAAE,GAAG,OAAO,CAAC,GAAG,CAAC;AAChE,MAAM,EAAE,6BAA6B,GAAG,OAAO,EAAE,GAAG,OAAO,CAAC,GAAG,CAAC;AAEzD,MAAM,iBAAiB,GAAG,GAAG,EAAE,CAAC,CAAC;IACtC,gCAAgC,EAAE,mBAAG,CAAC,MAAM,CAC1C,gCAAgC,CACjC;IACD,gCAAgC,EAAE,mBAAG,CAAC,MAAM,CAC1C,gCAAgC,CACjC;IACD,oCAAoC,EAAE,mBAAG,CAAC,MAAM,CAC9C,oCAAoC,CACrC;IACD,6BAA6B,EAAE,mBAAG,CAAC,MAAM,CAAC,6BAA6B,CAAC;IACxE,6BAA6B,EAAE,mBAAG,CAAC,MAAM,CAAC,6BAA6B,CAAC;IACxE,6BAA6B,EAAE,mBAAG,CAAC,MAAM,CAAC,6BAA6B,CAAC;IACxE,yBAAyB,EAAE,mBAAG,CAAC,OAAO,CAAC,yBAAyB,CAAC;IACjE,4CAA4C,EAAE,mBAAG,CAAC,OAAO,CACvD,4CAA4C,CAC7C;IACD,oCAAoC,EAAE,mBAAG,CAAC,OAAO,CAC/C,oCAAoC,CACrC;IACD,gCAAgC,EAAE,mBAAG,CAAC,MAAM,CAAC,QAAQ,CACnD,gCAAgC,CACjC;IACD,sBAAsB,EAAE,mBAAG,CAAC,OAAO,CAAC,sBAAsB,CAAC;IAC3D,mBAAmB,EAAE,mBAAG,CAAC,MAAM,CAAC,QAAQ,CAAC,mBAAmB,CAAC;IAC7D,6BAA6B,EAAE,mBAAG,CAAC,OAAO,CAAC,6BAA6B,CAAC;IACzE,yBAAyB,EAAE,mBAAG,CAAC,MAAM,CAAC,QAAQ,CAAC,yBAAyB,CAAC;IACzE,6BAA6B,EAAE,mBAAG,CAAC,OAAO,CAAC,6BAA6B,CAAC;IACzE,6BAA6B,EAAE,mBAAG,CAAC,OAAO,CAAC,6BAA6B,CAAC;CAC1E,CAAC,CAAC;AA7BU,QAAA,iBAAiB,qBA6B3B"}
|
|
@@ -36,6 +36,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
36
36
|
exports.CamoufoxConfigValidationSchema = void 0;
|
|
37
37
|
const joi = __importStar(require("joi"));
|
|
38
38
|
const boolean_joi_1 = require("../../../common/util/boolean.joi");
|
|
39
|
+
const camoufox_os_1 = require("../enum/camoufox.os");
|
|
39
40
|
exports.CamoufoxConfigValidationSchema = joi.object({
|
|
40
41
|
CRAWLEE_CAMOUFOX_MIN_CONCURRENCY: joi.number().optional().default(1),
|
|
41
42
|
CRAWLEE_CAMOUFOX_MAX_CONCURRENCY: joi.number().optional().default(5),
|
|
@@ -47,5 +48,14 @@ exports.CamoufoxConfigValidationSchema = joi.object({
|
|
|
47
48
|
CRAWLEE_CAMOUFOX_HEADLESS: boolean_joi_1.bool.optional().default(true),
|
|
48
49
|
CRAWLEE_CAMOUFOX_HANDLE_CLOUDFLARE_CHALLENGE: boolean_joi_1.bool.optional().default(false),
|
|
49
50
|
CRAWLEE_CAMOUFOX_USE_INCOGNITO_PAGES: boolean_joi_1.bool.optional().default(true),
|
|
51
|
+
CRAWLEE_CAMOUFOX_GEOIP: boolean_joi_1.bool.optional().default(false),
|
|
52
|
+
CRAWLEE_CAMOUFOX_OS: joi
|
|
53
|
+
.string()
|
|
54
|
+
.valid(...Object.values(camoufox_os_1.CamoufoxOS))
|
|
55
|
+
.optional(),
|
|
56
|
+
CRAWLEE_CAMOUFOX_BLOCK_WEBRTC: boolean_joi_1.bool.optional().default(true),
|
|
57
|
+
CRAWLEE_CAMOUFOX_HUMANIZE: joi.number().optional(),
|
|
58
|
+
CRAWLEE_CAMOUFOX_BLOCK_IMAGES: boolean_joi_1.bool.optional().default(false),
|
|
59
|
+
CRAWLEE_CAMOUFOX_ENABLE_CACHE: boolean_joi_1.bool.optional().default(false),
|
|
50
60
|
});
|
|
51
61
|
//# sourceMappingURL=camoufox.env.validation.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"camoufox.env.validation.js","sourceRoot":"","sources":["../../../../src/modules/camoufox/env/camoufox.env.validation.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,yCAA2B;AAC3B,kEAAwD;
|
|
1
|
+
{"version":3,"file":"camoufox.env.validation.js","sourceRoot":"","sources":["../../../../src/modules/camoufox/env/camoufox.env.validation.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,yCAA2B;AAC3B,kEAAwD;AACxD,qDAAiD;AAEpC,QAAA,8BAA8B,GAAG,GAAG,CAAC,MAAM,CAAC;IACvD,gCAAgC,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC;IACpE,gCAAgC,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC;IACpE,oCAAoC,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,EAAE,CAAC;IACzE,6BAA6B,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC;IACnE,6BAA6B,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,MAAM,CAAC;IACtE,6BAA6B,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC;IACjE,gCAAgC,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IACzD,yBAAyB,EAAE,kBAAI,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC;IACxD,4CAA4C,EAAE,kBAAI,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,KAAK,CAAC;IAC5E,oCAAoC,EAAE,kBAAI,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC;IACnE,sBAAsB,EAAE,kBAAI,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,KAAK,CAAC;IACtD,mBAAmB,EAAE,GAAG;SACrB,MAAM,EAAE;SACR,KAAK,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,wBAAU,CAAC,CAAC;SACnC,QAAQ,EAAE;IACb,6BAA6B,EAAE,kBAAI,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC;IAC5D,yBAAyB,EAAE,GAAG,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IAClD,6BAA6B,EAAE,kBAAI,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,KAAK,CAAC;IAC7D,6BAA6B,EAAE,kBAAI,CAAC,QAAQ,EAAE,CAAC,OAAO,CAAC,KAAK,CAAC;CAC9D,CAAC,CAAC"}
|