@hanivanrizky/nestjs-browser-action 0.15.0 → 0.16.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +230 -286
- package/dist/interfaces/browser-action-options.d.ts +2 -2
- package/dist/interfaces/cleansing-options.d.ts +3 -8
- package/dist/interfaces/types.d.ts +32 -1
- package/dist/interfaces/workflow-options.d.ts +2 -2
- package/dist/pipes/alt-flag.pipe.d.ts +1 -1
- package/dist/pipes/alt-flag.pipe.js.map +1 -1
- package/dist/pipes/clean-html.pipe.d.ts +5 -0
- package/dist/pipes/clean-html.pipe.js +52 -0
- package/dist/pipes/clean-html.pipe.js.map +1 -0
- package/dist/pipes/cleansing-pipe.d.ts +4 -3
- package/dist/pipes/cleansing-pipe.js +3 -17
- package/dist/pipes/cleansing-pipe.js.map +1 -1
- package/dist/pipes/date-format-special.pipe.d.ts +6 -0
- package/dist/pipes/date-format-special.pipe.js +24 -0
- package/dist/pipes/date-format-special.pipe.js.map +1 -0
- package/dist/pipes/date-format.pipe.d.ts +1 -1
- package/dist/pipes/date-format.pipe.js.map +1 -1
- package/dist/pipes/extract-email.pipe.d.ts +6 -0
- package/dist/pipes/extract-email.pipe.js +19 -0
- package/dist/pipes/extract-email.pipe.js.map +1 -0
- package/dist/pipes/extract-url-params.pipe.d.ts +10 -0
- package/dist/pipes/extract-url-params.pipe.js +57 -0
- package/dist/pipes/extract-url-params.pipe.js.map +1 -0
- package/dist/pipes/index.d.ts +16 -0
- package/dist/pipes/index.js +16 -0
- package/dist/pipes/index.js.map +1 -1
- package/dist/pipes/json-path.pipe.d.ts +8 -0
- package/dist/pipes/json-path.pipe.js +40 -0
- package/dist/pipes/json-path.pipe.js.map +1 -0
- package/dist/pipes/media-filter.pipe.d.ts +7 -0
- package/dist/pipes/media-filter.pipe.js +21 -0
- package/dist/pipes/media-filter.pipe.js.map +1 -0
- package/dist/pipes/normalize-whitespace.pipe.d.ts +1 -1
- package/dist/pipes/normalize-whitespace.pipe.js.map +1 -1
- package/dist/pipes/number-normalize.pipe.d.ts +6 -0
- package/dist/pipes/number-normalize.pipe.js +38 -0
- package/dist/pipes/number-normalize.pipe.js.map +1 -0
- package/dist/pipes/parse-as-url.pipe.d.ts +7 -0
- package/dist/pipes/parse-as-url.pipe.js +45 -0
- package/dist/pipes/parse-as-url.pipe.js.map +1 -0
- package/dist/pipes/pipe-engine.d.ts +20 -0
- package/dist/pipes/pipe-engine.js +81 -0
- package/dist/pipes/pipe-engine.js.map +1 -0
- package/dist/pipes/pipe-registry.d.ts +3 -0
- package/dist/pipes/pipe-registry.js +89 -0
- package/dist/pipes/pipe-registry.js.map +1 -0
- package/dist/pipes/profiles/currency.profile.js +11 -12
- package/dist/pipes/profiles/currency.profile.js.map +1 -1
- package/dist/pipes/profiles/date.profile.js +10 -7
- package/dist/pipes/profiles/date.profile.js.map +1 -1
- package/dist/pipes/profiles/email.profile.js +9 -9
- package/dist/pipes/profiles/email.profile.js.map +1 -1
- package/dist/pipes/profiles/phone.profile.js +11 -11
- package/dist/pipes/profiles/phone.profile.js.map +1 -1
- package/dist/pipes/profiles/price.profile.js +11 -12
- package/dist/pipes/profiles/price.profile.js.map +1 -1
- package/dist/pipes/profiles.d.ts +2 -2
- package/dist/pipes/profiles.js +5 -5
- package/dist/pipes/profiles.js.map +1 -1
- package/dist/pipes/query-append.pipe.d.ts +9 -0
- package/dist/pipes/query-append.pipe.js +47 -0
- package/dist/pipes/query-append.pipe.js.map +1 -0
- package/dist/pipes/query-remover.pipe.d.ts +22 -0
- package/dist/pipes/query-remover.pipe.js +83 -0
- package/dist/pipes/query-remover.pipe.js.map +1 -0
- package/dist/pipes/regex-extract.pipe.d.ts +1 -1
- package/dist/pipes/regex-extract.pipe.js.map +1 -1
- package/dist/pipes/regex-extraction.pipe.d.ts +25 -0
- package/dist/pipes/regex-extraction.pipe.js +90 -0
- package/dist/pipes/regex-extraction.pipe.js.map +1 -0
- package/dist/pipes/regex-replace-x.pipe.d.ts +28 -0
- package/dist/pipes/regex-replace-x.pipe.js +104 -0
- package/dist/pipes/regex-replace-x.pipe.js.map +1 -0
- package/dist/pipes/regex-replace.pipe.d.ts +1 -1
- package/dist/pipes/regex-replace.pipe.js.map +1 -1
- package/dist/pipes/regex.pipe.d.ts +12 -0
- package/dist/pipes/regex.pipe.js +42 -0
- package/dist/pipes/regex.pipe.js.map +1 -0
- package/dist/pipes/remove-currency-symbol.pipe.d.ts +1 -1
- package/dist/pipes/remove-currency-symbol.pipe.js.map +1 -1
- package/dist/pipes/remove-line-breaks.pipe.d.ts +1 -1
- package/dist/pipes/remove-line-breaks.pipe.js.map +1 -1
- package/dist/pipes/remove-special-chars.pipe.d.ts +1 -1
- package/dist/pipes/remove-special-chars.pipe.js.map +1 -1
- package/dist/pipes/sanitize-text.pipe.d.ts +1 -1
- package/dist/pipes/sanitize-text.pipe.js.map +1 -1
- package/dist/pipes/to-lower-case.pipe.d.ts +1 -1
- package/dist/pipes/to-lower-case.pipe.js.map +1 -1
- package/dist/pipes/to-number.pipe.d.ts +1 -1
- package/dist/pipes/to-number.pipe.js.map +1 -1
- package/dist/pipes/to-upper-case.pipe.d.ts +1 -1
- package/dist/pipes/to-upper-case.pipe.js.map +1 -1
- package/dist/pipes/trim.pipe.d.ts +1 -1
- package/dist/pipes/trim.pipe.js.map +1 -1
- package/dist/pipes/url-resolve.pipe.d.ts +7 -0
- package/dist/pipes/url-resolve.pipe.js +52 -0
- package/dist/pipes/url-resolve.pipe.js.map +1 -0
- package/dist/services/browser-action.service.d.ts +3 -3
- package/dist/services/browser-action.service.js +54 -27
- package/dist/services/browser-action.service.js.map +1 -1
- package/dist/services/browser-pool.service.js +3 -1
- package/dist/services/browser-pool.service.js.map +1 -1
- package/dist/services/cleansing.service.d.ts +2 -4
- package/dist/services/cleansing.service.js +24 -81
- package/dist/services/cleansing.service.js.map +1 -1
- package/dist/services/cookie.service.js +3 -1
- package/dist/services/cookie.service.js.map +1 -1
- package/dist/tsconfig.build.tsbuildinfo +1 -1
- package/dist/validators/workflow.validator.js +2 -2
- package/dist/validators/workflow.validator.js.map +1 -1
- package/package.json +5 -1
package/README.md
CHANGED
|
@@ -1,54 +1,73 @@
|
|
|
1
1
|
# @hanivanrizky/nestjs-browser-action
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
3
|
+
<p align="center">
|
|
4
|
+
<a href="http://nestjs.com/" target="_blank"><img src="https://nestjs.com/img/logo-small.svg" width="120" alt="Nest Logo" /></a>
|
|
5
|
+
</p>
|
|
6
6
|
|
|
7
|
-
>
|
|
8
|
-
>
|
|
9
|
-
> This project is currently in **experimental** stage and intended for **personal use only**. The API is subject to change, and production use is not recommended.
|
|
7
|
+
<p align="center">A NestJS module for stealth browser automation using CloakBrowser + puppeteer-core with support for proxy rotation, connection pooling, cookie persistence, and flexible data extraction.</p>
|
|
10
8
|
|
|
11
|
-
|
|
9
|
+
<p align="center">
|
|
10
|
+
<a href="https://www.npmjs.com/package/@hanivanrizky/nestjs-browser-action" target="_blank"><img src="https://img.shields.io/npm/v/@hanivanrizky/nestjs-browser-action.svg" alt="NPM Version" /></a>
|
|
11
|
+
<a href="https://www.npmjs.com/package/@hanivanrizky/nestjs-browser-action" target="_blank"><img src="https://img.shields.io/npm/l/@hanivanrizky/nestjs-browser-action.svg" alt="Package License" /></a>
|
|
12
|
+
<a href="https://www.npmjs.com/package/@hanivanrizky/nestjs-browser-action" target="_blank"><img src="https://img.shields.io/npm/dm/@hanivanrizky/nestjs-browser-action.svg" alt="NPM Downloads" /></a>
|
|
13
|
+
<img src="https://img.shields.io/badge/tests-363%20passed-brightgreen.svg" alt="Tests: 363 passed" />
|
|
14
|
+
</p>
|
|
15
|
+
|
|
16
|
+
> **⚠️ Status: Experimental** — personal use only; API subject to change.
|
|
17
|
+
|
|
18
|
+
## Table of Contents
|
|
19
|
+
|
|
20
|
+
- [Features](#features)
|
|
21
|
+
- [Installation](#installation)
|
|
22
|
+
- [Quick Start](#quick-start)
|
|
23
|
+
- [Documentation](#documentation)
|
|
24
|
+
- [Quick Examples](#quick-examples)
|
|
25
|
+
- [Development](#development)
|
|
26
|
+
- [Contributing](#contributing)
|
|
27
|
+
- [License](#license)
|
|
12
28
|
|
|
13
29
|
## Features
|
|
14
30
|
|
|
15
|
-
- (
|
|
16
|
-
- (
|
|
17
|
-
- (
|
|
18
|
-
- (
|
|
19
|
-
- (
|
|
20
|
-
- (
|
|
21
|
-
- (
|
|
22
|
-
- (
|
|
23
|
-
- (
|
|
24
|
-
- (
|
|
25
|
-
- (
|
|
31
|
+
- **(☆^O^☆) Pattern-Based Extraction**: Define extraction patterns with `PatternField` — API-compatible with `nestjs-xpath-parser`
|
|
32
|
+
- **(.\_.) Container Extraction**: Extract lists of items from repeating DOM nodes with pagination
|
|
33
|
+
- **(>\_<) Workflow Automation**: Declarative step-by-step browser automation (navigate, click, fill, extract, screenshot…)
|
|
34
|
+
- **(・\_・) Data Cleaning Pipes**: 33 built-in transformations (trim, case, replace, decode HTML, number, regex, jsonpath, clean-html…)
|
|
35
|
+
- **(☆^O^☆) Custom Pipes**: Extensible pipe registry — `PIPE_REGISTRY['my-type'] = MyPipe`
|
|
36
|
+
- **(>\_<) Connection Pooling**: Efficient browser instance reuse with configurable min/max/idle/acquire timeouts
|
|
37
|
+
- **(.\_.) Cookie Persistence**: Save/load browser sessions for authentication flows
|
|
38
|
+
- **(o_o) Stealth**: CloakBrowser Chromium with proxy, humanize, geoip, timezone/locale spoofing, and anti-detect flags
|
|
39
|
+
- **(.\_.) Remote Chrome**: Connect to remote Chrome instances via CDP (browserURL / browserWSEndpoint)
|
|
40
|
+
- **(>\_<) TLS Fingerprint**: Capture the browser's own TLS/HTTP handshake (ja3/ja4, ciphers, http2 akamai, headers) for use with `nestjs-xpath-parser`'s CycleTLS engine
|
|
41
|
+
- **(☆^O^☆) TypeScript Generics**: Full generic type support for type-safe results
|
|
42
|
+
- **(o_o) Fully Tested**: 363 tests across 37 suites
|
|
26
43
|
|
|
27
44
|
## Installation
|
|
28
45
|
|
|
29
|
-
### From npm
|
|
30
|
-
|
|
31
46
|
```bash
|
|
32
|
-
|
|
47
|
+
pnpm add @hanivanrizky/nestjs-browser-action
|
|
33
48
|
# or
|
|
34
49
|
yarn add @hanivanrizky/nestjs-browser-action
|
|
35
50
|
# or
|
|
36
|
-
|
|
51
|
+
npm install @hanivanrizky/nestjs-browser-action
|
|
37
52
|
```
|
|
38
53
|
|
|
39
|
-
|
|
54
|
+
## Quick Start
|
|
40
55
|
|
|
41
|
-
|
|
42
|
-
npm install https://github.com/Hanivan/nestjs-browser-action.git
|
|
43
|
-
# or
|
|
44
|
-
pnpm add https://github.com/Hanivan/nestjs-browser-action.git
|
|
45
|
-
# or using SSH
|
|
46
|
-
pnpm add git@github.com:Hanivan/nestjs-browser-action.git
|
|
47
|
-
```
|
|
56
|
+
### Import the Module
|
|
48
57
|
|
|
49
|
-
|
|
58
|
+
**Basic usage:**
|
|
50
59
|
|
|
51
|
-
|
|
60
|
+
```typescript
|
|
61
|
+
import { Module } from '@nestjs/common';
|
|
62
|
+
import { BrowserActionModule } from '@hanivanrizky/nestjs-browser-action';
|
|
63
|
+
|
|
64
|
+
@Module({
|
|
65
|
+
imports: [BrowserActionModule.forRoot()],
|
|
66
|
+
})
|
|
67
|
+
export class AppModule {}
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
**With pool and cookie options:**
|
|
52
71
|
|
|
53
72
|
```typescript
|
|
54
73
|
import { Module } from '@nestjs/common';
|
|
@@ -58,335 +77,260 @@ import { BrowserActionModule } from '@hanivanrizky/nestjs-browser-action';
|
|
|
58
77
|
imports: [
|
|
59
78
|
BrowserActionModule.forRoot({
|
|
60
79
|
pool: { min: 2, max: 10 },
|
|
61
|
-
cookies: { enabled: true },
|
|
80
|
+
cookies: { enabled: true, cookiesDir: './cookies' },
|
|
81
|
+
logLevel: 'log',
|
|
82
|
+
}),
|
|
83
|
+
],
|
|
84
|
+
})
|
|
85
|
+
export class AppModule {}
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
**Async configuration:**
|
|
89
|
+
|
|
90
|
+
```typescript
|
|
91
|
+
import { Module } from '@nestjs/common';
|
|
92
|
+
import { BrowserActionModule } from '@hanivanrizky/nestjs-browser-action';
|
|
93
|
+
import { ConfigModule, ConfigService } from '@nestjs/config';
|
|
94
|
+
|
|
95
|
+
@Module({
|
|
96
|
+
imports: [
|
|
97
|
+
ConfigModule.forRoot(),
|
|
98
|
+
BrowserActionModule.forRootAsync({
|
|
99
|
+
imports: [ConfigModule],
|
|
100
|
+
useFactory: (configService: ConfigService) => ({
|
|
101
|
+
pool: {
|
|
102
|
+
min: configService.get<number>('POOL_MIN', 2),
|
|
103
|
+
max: configService.get<number>('POOL_MAX', 10),
|
|
104
|
+
},
|
|
105
|
+
cloak: {
|
|
106
|
+
proxy: { server: configService.get<string>('PROXY_URL', '') },
|
|
107
|
+
},
|
|
108
|
+
logLevel: configService.get<string>('LOG_LEVEL', 'log'),
|
|
109
|
+
}),
|
|
110
|
+
inject: [ConfigService],
|
|
62
111
|
}),
|
|
63
112
|
],
|
|
64
113
|
})
|
|
65
114
|
export class AppModule {}
|
|
66
115
|
```
|
|
67
116
|
|
|
68
|
-
###
|
|
117
|
+
### Inject the Service
|
|
69
118
|
|
|
70
119
|
```typescript
|
|
71
120
|
import { Injectable } from '@nestjs/common';
|
|
72
121
|
import { BrowserActionService } from '@hanivanrizky/nestjs-browser-action';
|
|
73
122
|
|
|
74
123
|
@Injectable()
|
|
75
|
-
export class
|
|
76
|
-
constructor(
|
|
77
|
-
|
|
78
|
-
) {
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
124
|
+
export class YourService {
|
|
125
|
+
constructor(private readonly browserAction: BrowserActionService) {}
|
|
126
|
+
|
|
127
|
+
async scrapeProducts() {
|
|
128
|
+
const result = await this.browserAction.evaluateWebsite({
|
|
129
|
+
url: 'https://www.scrapingcourse.com/ecommerce/',
|
|
130
|
+
patterns: [
|
|
131
|
+
{
|
|
132
|
+
key: 'container',
|
|
133
|
+
patternType: 'css',
|
|
134
|
+
returnType: 'text',
|
|
135
|
+
patterns: ['.product'],
|
|
136
|
+
meta: { isContainer: true },
|
|
137
|
+
},
|
|
138
|
+
{
|
|
139
|
+
key: 'name',
|
|
140
|
+
patternType: 'css',
|
|
141
|
+
returnType: 'text',
|
|
142
|
+
patterns: ['h2.woocommerce-loop-product__title'],
|
|
143
|
+
pipes: { trim: true },
|
|
144
|
+
},
|
|
145
|
+
{
|
|
146
|
+
key: 'price',
|
|
147
|
+
patternType: 'css',
|
|
148
|
+
returnType: 'text',
|
|
149
|
+
patterns: ['.price'],
|
|
150
|
+
pipes: { trim: true },
|
|
151
|
+
},
|
|
152
|
+
],
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
return result.results;
|
|
91
156
|
}
|
|
92
157
|
}
|
|
93
158
|
```
|
|
94
159
|
|
|
95
160
|
## Documentation
|
|
96
161
|
|
|
97
|
-
###
|
|
98
|
-
|
|
99
|
-
| Method | Description |
|
|
100
|
-
|--------|-------------|
|
|
101
|
-
| [`scrape()`](./docs/methods/scrape.md) | Extract single elements |
|
|
102
|
-
| [`scrapeAll()`](./docs/methods/scrape-all.md) | Extract multiple elements |
|
|
103
|
-
| [`scrapeContainerFields()`](./docs/api-reference.md#scrapecontainerfields) | Extract structured lists with pagination |
|
|
104
|
-
| [`scrapeWithWorkflow()`](./docs/methods/workflow.md) | Workflow-based automation |
|
|
105
|
-
| [`scrapeAllWithWorkflow()`](./docs/methods/workflow.md) | Workflow with multi-element |
|
|
106
|
-
| [`takeScreenshot()`](./docs/methods/screenshots.md) | Capture screenshots |
|
|
107
|
-
| [`generatePDF()`](./docs/methods/screenshots.md) | Generate PDFs |
|
|
108
|
-
| [`captureTlsFingerprint()`](./docs/api-reference.md#capturetlsfingerprintpath-url-promisetlsfingerprint) | Capture browser's TLS/HTTP fingerprint |
|
|
109
|
-
| [Browser & Page Control](./docs/methods/browser-control.md) | Low-level control |
|
|
110
|
-
|
|
111
|
-
### (☆^O^☆) Feature Guides
|
|
162
|
+
### Features
|
|
112
163
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
164
|
+
- [Pattern-Based Extraction](docs/methods/scrape.md#evaluatewebsite---unified-xpath-parser-compatible-api) - `evaluateWebsite()` with `PatternField` patterns
|
|
165
|
+
- [Container-Based Extraction](docs/methods/scrape.md#evaluatewebsite---unified-xpath-parser-compatible-api) - Extract lists with `meta.isContainer`
|
|
166
|
+
- [Data Cleaning Pipes](docs/features/pipes.md) - Transform extracted data with pipes
|
|
167
|
+
- [Cookie Management](docs/features/cookies.md) - Session persistence
|
|
168
|
+
- [Workflow Actions](docs/methods/workflow.md) - Declarative step-by-step automation
|
|
118
169
|
|
|
119
|
-
###
|
|
170
|
+
### Reference
|
|
120
171
|
|
|
121
|
-
- [API Reference](
|
|
122
|
-
- [
|
|
123
|
-
- [
|
|
172
|
+
- [API Reference](docs/api-reference.md) - Complete service API documentation
|
|
173
|
+
- [Workflow Actions Reference](docs/workflow-actions.md) - All action types
|
|
174
|
+
- [Browser & Page Control](docs/methods/browser-control.md) - Low-level control
|
|
124
175
|
|
|
125
176
|
## Quick Examples
|
|
126
177
|
|
|
127
|
-
### Simple Scraping
|
|
178
|
+
### Simple Product Scraping
|
|
128
179
|
|
|
129
180
|
```typescript
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
price:
|
|
133
|
-
}
|
|
134
|
-
```
|
|
135
|
-
|
|
136
|
-
### Multi-Element Scraping
|
|
181
|
+
interface Product {
|
|
182
|
+
name: string;
|
|
183
|
+
price: string;
|
|
184
|
+
}
|
|
137
185
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
186
|
+
const result = await browserAction.evaluateWebsite<Product>({
|
|
187
|
+
url: 'https://example.com/products',
|
|
188
|
+
patterns: [
|
|
189
|
+
{
|
|
190
|
+
key: 'container',
|
|
191
|
+
patternType: 'css',
|
|
192
|
+
returnType: 'text',
|
|
193
|
+
patterns: ['.product-card'],
|
|
194
|
+
meta: { isContainer: true },
|
|
195
|
+
},
|
|
196
|
+
{
|
|
197
|
+
key: 'name',
|
|
198
|
+
patternType: 'css',
|
|
199
|
+
returnType: 'text',
|
|
200
|
+
patterns: ['h2.name'],
|
|
201
|
+
pipes: { trim: true },
|
|
202
|
+
},
|
|
203
|
+
{
|
|
204
|
+
key: 'price',
|
|
205
|
+
patternType: 'css',
|
|
206
|
+
returnType: 'text',
|
|
207
|
+
patterns: ['.price'],
|
|
208
|
+
pipes: {
|
|
209
|
+
trim: true,
|
|
210
|
+
replace: [{ from: '$', to: '' }],
|
|
211
|
+
},
|
|
212
|
+
},
|
|
213
|
+
],
|
|
142
214
|
});
|
|
143
215
|
```
|
|
144
216
|
|
|
145
|
-
###
|
|
217
|
+
### Article Extraction with Fallbacks
|
|
146
218
|
|
|
147
219
|
```typescript
|
|
148
|
-
const
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
{
|
|
152
|
-
|
|
153
|
-
|
|
220
|
+
const result = await browserAction.evaluateWebsite({
|
|
221
|
+
url: 'https://example.com/article',
|
|
222
|
+
patterns: [
|
|
223
|
+
{
|
|
224
|
+
key: 'title',
|
|
225
|
+
patternType: 'css',
|
|
226
|
+
returnType: 'text',
|
|
227
|
+
patterns: ['meta[property="og:title"]'],
|
|
228
|
+
meta: {
|
|
229
|
+
alterPattern: ['h1', 'title'],
|
|
230
|
+
},
|
|
231
|
+
pipes: { trim: true },
|
|
232
|
+
},
|
|
233
|
+
{
|
|
234
|
+
key: 'description',
|
|
235
|
+
patternType: 'css',
|
|
236
|
+
returnType: 'text',
|
|
237
|
+
patterns: ['meta[name="description"]'],
|
|
238
|
+
pipes: { trim: true, decode: true },
|
|
239
|
+
},
|
|
154
240
|
],
|
|
155
|
-
};
|
|
156
|
-
|
|
157
|
-
const result = await this.actionHelpers.scrapeWithWorkflow(workflow);
|
|
158
|
-
```
|
|
159
|
-
|
|
160
|
-
### Container Extraction (lists + pagination)
|
|
161
|
-
|
|
162
|
-
```typescript
|
|
163
|
-
import type { ContainerDescriptor } from '@hanivanrizky/nestjs-browser-action';
|
|
164
|
-
|
|
165
|
-
interface Product { name: string; price: string; }
|
|
166
|
-
|
|
167
|
-
const descriptor: ContainerDescriptor<Product> = {
|
|
168
|
-
container: '.product-card', // CSS or XPath — one node per item
|
|
169
|
-
fields: {
|
|
170
|
-
name: { selector: 'h2.name' },
|
|
171
|
-
price: { selector: '.price' },
|
|
172
|
-
},
|
|
173
|
-
pagination: {
|
|
174
|
-
container: '.pagination',
|
|
175
|
-
linkSelector: 'a',
|
|
176
|
-
labelSelector: 'a',
|
|
177
|
-
},
|
|
178
|
-
};
|
|
179
|
-
|
|
180
|
-
const { items, pagination } = await this.actionHelpers.scrapeContainerFields<Product>(
|
|
181
|
-
'https://example.com/products',
|
|
182
|
-
descriptor,
|
|
183
|
-
{ currentPage: 1, interceptResource: true, useRandomUserAgent: true },
|
|
184
|
-
);
|
|
185
|
-
|
|
186
|
-
console.log(items); // [{ name, price }, ...]
|
|
187
|
-
console.log(pagination?.nextUrl); // URL of next page, or null
|
|
241
|
+
});
|
|
188
242
|
```
|
|
189
243
|
|
|
190
|
-
###
|
|
244
|
+
### XPath Extraction
|
|
191
245
|
|
|
192
246
|
```typescript
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
{
|
|
202
|
-
|
|
203
|
-
|
|
247
|
+
const result = await browserAction.evaluateWebsite({
|
|
248
|
+
url: 'https://example.com/sitemap.xml',
|
|
249
|
+
patterns: [
|
|
250
|
+
{
|
|
251
|
+
key: 'container',
|
|
252
|
+
patternType: 'xpath',
|
|
253
|
+
returnType: 'text',
|
|
254
|
+
patterns: ['//url'],
|
|
255
|
+
meta: { isContainer: true },
|
|
256
|
+
},
|
|
257
|
+
{
|
|
258
|
+
key: 'loc',
|
|
259
|
+
patternType: 'xpath',
|
|
260
|
+
returnType: 'text',
|
|
261
|
+
patterns: ['.//loc/text()'],
|
|
262
|
+
},
|
|
263
|
+
],
|
|
204
264
|
});
|
|
205
265
|
```
|
|
206
266
|
|
|
207
|
-
###
|
|
267
|
+
### Workflow Automation
|
|
208
268
|
|
|
209
269
|
```typescript
|
|
210
|
-
const
|
|
211
|
-
version: '1.0'
|
|
270
|
+
const result = await browserAction.scrapeWithWorkflow({
|
|
271
|
+
version: '1.0',
|
|
212
272
|
actions: [
|
|
213
|
-
{ action: '
|
|
214
|
-
{ action: '
|
|
215
|
-
{ action: '
|
|
273
|
+
{ action: 'navigate', value: 'https://example.com/login' },
|
|
274
|
+
{ action: 'fill', target: { type: 'css', value: '#username' }, value: 'user' },
|
|
275
|
+
{ action: 'fill', target: { type: 'css', value: '#password' }, value: 'pass' },
|
|
276
|
+
{ action: 'click', target: { type: 'css', value: '[type=submit]' } },
|
|
277
|
+
{ action: 'saveCookies', value: 'user-session', options: { overwrite: true } },
|
|
278
|
+
{ id: 'title', action: 'extract', target: { type: 'css', value: 'h1' } },
|
|
216
279
|
],
|
|
217
|
-
};
|
|
280
|
+
});
|
|
218
281
|
```
|
|
219
282
|
|
|
220
283
|
### Stealth (CloakBrowser)
|
|
221
284
|
|
|
222
|
-
Local browsers launch through CloakBrowser stealth Chromium. Configure anti-detect
|
|
223
|
-
features via the `cloak` option:
|
|
224
|
-
|
|
225
285
|
```typescript
|
|
226
286
|
BrowserActionModule.forRoot({
|
|
227
287
|
cloak: {
|
|
228
288
|
proxy: { server: 'http://host:port', username: 'user', password: 'pass' },
|
|
229
|
-
humanize: true,
|
|
230
|
-
geoip: true,
|
|
231
|
-
timezone: 'America/New_York',
|
|
232
|
-
locale: 'en-US',
|
|
233
|
-
stealthArgs: true,
|
|
234
|
-
extensionPaths: ['/path/ext'], // load unpacked extensions
|
|
235
|
-
userDataDir: './profile', // persistent profile (launchPersistentContext)
|
|
236
|
-
launchOptions: { headless: true, args: ['--no-sandbox'] }, // raw puppeteer-core passthrough
|
|
289
|
+
humanize: true,
|
|
290
|
+
geoip: true,
|
|
291
|
+
timezone: 'America/New_York',
|
|
292
|
+
locale: 'en-US',
|
|
293
|
+
stealthArgs: true,
|
|
237
294
|
},
|
|
238
295
|
pool: { min: 2, max: 5 },
|
|
239
296
|
})
|
|
240
297
|
```
|
|
241
298
|
|
|
242
|
-
|
|
243
|
-
passthrough for backward compatibility. `cloak` is ignored when `remote` is set
|
|
244
|
-
(remote uses plain CDP connect).
|
|
245
|
-
|
|
246
|
-
**Per-call cloak override (proxy/UA rotation):** pass `cloak` per request to launch a
|
|
247
|
-
dedicated off-pool browser with its own stealth config — useful for rotating proxies or
|
|
248
|
-
fingerprints across requests. Not supported in remote CDP mode.
|
|
249
|
-
|
|
250
|
-
```typescript
|
|
251
|
-
// scrape / scrapeAll
|
|
252
|
-
await actions.scrape(url, { title: 'h1' }, {
|
|
253
|
-
cloak: { proxy: { server: 'http://rotating-proxy:8080' } },
|
|
254
|
-
});
|
|
255
|
-
|
|
256
|
-
// workflow
|
|
257
|
-
await actions.scrapeWithWorkflow(url, {
|
|
258
|
-
version: '1.0',
|
|
259
|
-
cloak: { proxy: { server: 'http://rotating-proxy:8080' } },
|
|
260
|
-
actions: [...],
|
|
261
|
-
});
|
|
262
|
-
```
|
|
263
|
-
|
|
264
|
-
### Remote Chrome Connection
|
|
265
|
-
|
|
266
|
-
Connect to remote Chrome instances via Chrome DevTools Protocol (CDP):
|
|
267
|
-
|
|
268
|
-
```typescript
|
|
269
|
-
BrowserActionModule.forRoot({
|
|
270
|
-
remote: {
|
|
271
|
-
browserURL: 'http://localhost:9222', // Or use browserWSEndpoint
|
|
272
|
-
retryMax: 3, // Connection retry attempts
|
|
273
|
-
retryDelay: 1000, // Delay between retries (ms)
|
|
274
|
-
},
|
|
275
|
-
pool: { min: 2, max: 5 },
|
|
276
|
-
})
|
|
277
|
-
```
|
|
278
|
-
|
|
279
|
-
**Using browserWSEndpoint:**
|
|
280
|
-
|
|
281
|
-
```typescript
|
|
282
|
-
BrowserActionModule.forRoot({
|
|
283
|
-
remote: {
|
|
284
|
-
browserWSEndpoint: 'ws://localhost:9222/devtools/page/abc123',
|
|
285
|
-
},
|
|
286
|
-
})
|
|
287
|
-
```
|
|
288
|
-
|
|
289
|
-
**Remote-first priority:** When both `remote` and `launchOptions` are provided, remote connection takes precedence.
|
|
290
|
-
|
|
291
|
-
**See:** [Remote Chrome Configuration](./docs/api-reference.md#remote-chrome-configuration) for details.
|
|
292
|
-
|
|
293
|
-
## Services
|
|
294
|
-
|
|
295
|
-
| Service | Description |
|
|
296
|
-
|---------|-------------|
|
|
297
|
-
| **BrowserActionService** | High-level automation methods (scrape, screenshot, PDF, workflows) |
|
|
298
|
-
| **BrowserManagerService** | Browser pool management |
|
|
299
|
-
| **PageService** | Page lifecycle and navigation |
|
|
300
|
-
| **CookieService** | Cookie persistence |
|
|
301
|
-
| **CleansingService** | Data cleansing with pipes |
|
|
299
|
+
### TLS Fingerprint Capture
|
|
302
300
|
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
### Basic Configuration
|
|
301
|
+
Capture the browser's own TLS fingerprint for use with `nestjs-xpath-parser`'s CycleTLS engine:
|
|
306
302
|
|
|
307
303
|
```typescript
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
min: 2,
|
|
311
|
-
max: 10,
|
|
312
|
-
idleTimeoutMs: 30000, // reap idle browsers down to min (0 disables)
|
|
313
|
-
acquireTimeoutMs: 30000, // reject acquire() if none free in time (0 waits forever)
|
|
314
|
-
strategy: 'round-robin',
|
|
315
|
-
},
|
|
316
|
-
cookies: {
|
|
317
|
-
enabled: true,
|
|
318
|
-
cookiesDir: './cookies',
|
|
319
|
-
},
|
|
320
|
-
logLevel: 'log',
|
|
321
|
-
})
|
|
322
|
-
```
|
|
323
|
-
|
|
324
|
-
### All Options
|
|
325
|
-
|
|
326
|
-
See [Configuration Reference](./docs/api-reference.md#configuration) for complete options.
|
|
327
|
-
|
|
328
|
-
## Type Safety
|
|
329
|
-
|
|
330
|
-
Full TypeScript support with generics:
|
|
331
|
-
|
|
332
|
-
```typescript
|
|
333
|
-
// Type-safe selectors
|
|
334
|
-
interface ProductSelectors {
|
|
335
|
-
title: string;
|
|
336
|
-
price: number;
|
|
337
|
-
}
|
|
338
|
-
|
|
339
|
-
const result = await this.actionHelpers.scrape<ProductSelectors>(url, {
|
|
340
|
-
title: 'h1',
|
|
341
|
-
price: '.price',
|
|
342
|
-
});
|
|
343
|
-
|
|
344
|
-
// Type-safe workflow results
|
|
345
|
-
const workflow = await this.actionHelpers.scrapeWithWorkflow<{
|
|
346
|
-
title: string;
|
|
347
|
-
price: number;
|
|
348
|
-
}>(url, workflow);
|
|
304
|
+
const fingerprint = await browserAction.captureTlsFingerprint('./fingerprint.json');
|
|
305
|
+
// fingerprint.json can be passed to ScraperHtmlModule.forRoot({ fingerprint: './fingerprint.json' })
|
|
349
306
|
```
|
|
350
307
|
|
|
351
308
|
## Development
|
|
352
309
|
|
|
353
|
-
### Scripts
|
|
354
|
-
|
|
355
310
|
```bash
|
|
311
|
+
# Install dependencies
|
|
312
|
+
pnpm install
|
|
313
|
+
|
|
356
314
|
# Build
|
|
357
315
|
pnpm build
|
|
358
316
|
|
|
359
|
-
#
|
|
317
|
+
# Test
|
|
360
318
|
pnpm test
|
|
319
|
+
pnpm test:cov
|
|
361
320
|
|
|
362
|
-
# Lint
|
|
321
|
+
# Lint
|
|
363
322
|
pnpm lint
|
|
364
|
-
|
|
365
|
-
# Format code
|
|
366
323
|
pnpm format
|
|
367
324
|
```
|
|
368
325
|
|
|
369
|
-
|
|
326
|
+
## Contributing
|
|
370
327
|
|
|
371
|
-
|
|
372
|
-
|
|
328
|
+
1. Fork the repository
|
|
329
|
+
2. Create your feature branch (`git checkout -b feature/yourusername/amazing-feature`)
|
|
330
|
+
3. Commit your changes (`git commit -m 'Add some amazing feature'`)
|
|
331
|
+
4. Push to the branch (`git push origin feature/yourusername/amazing-feature`)
|
|
332
|
+
5. Open a Pull Request
|
|
373
333
|
|
|
374
334
|
## License
|
|
375
335
|
|
|
376
|
-
MIT
|
|
377
|
-
|
|
378
|
-
## Support
|
|
379
|
-
|
|
380
|
-
For issues and questions, please use [GitHub Issues](https://github.com/Hanivan/nestjs-browser-action/issues).
|
|
381
|
-
|
|
382
|
-
## Examples
|
|
383
|
-
|
|
384
|
-
Check out the test project for complete examples: [test-browser-action](https://github.com/Hanivan/test-browser-action)
|
|
385
|
-
|
|
386
|
-
---
|
|
387
|
-
|
|
388
|
-
**Documentation:**
|
|
389
|
-
- [Methods](./docs/methods) - Method-specific guides
|
|
390
|
-
- [Features](./docs/features) - Feature guides
|
|
391
|
-
- [API Reference](./docs/api-reference.md) - Complete API
|
|
392
|
-
- [Workflow Actions](./docs/workflow-actions.md) - Action reference
|
|
336
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
@@ -25,9 +25,9 @@ export interface BrowserActionOptions {
|
|
|
25
25
|
contextOptions?: BrowserContextOptions;
|
|
26
26
|
pool?: PoolOptions;
|
|
27
27
|
multiContext?: boolean;
|
|
28
|
-
logLevel?: LogLevel;
|
|
28
|
+
logLevel?: LogLevel | LogLevel[];
|
|
29
29
|
debugLogMaxLength?: number;
|
|
30
30
|
remote?: RemoteOptions;
|
|
31
31
|
cookies?: CookieOptions;
|
|
32
|
-
customPipes?: Record<string, new () => CleansingPipe>;
|
|
32
|
+
customPipes?: Record<string, new (...args: unknown[]) => CleansingPipe>;
|
|
33
33
|
}
|
|
@@ -1,14 +1,9 @@
|
|
|
1
1
|
import { CleansingProfile } from '../enums/cleansing-profile.enum';
|
|
2
|
-
import type {
|
|
2
|
+
import type { CleanerStepRules } from '../pipes/pipe-engine';
|
|
3
3
|
export interface CleansingOptions {
|
|
4
|
-
pipes?:
|
|
4
|
+
pipes?: CleanerStepRules;
|
|
5
5
|
profile?: CleansingProfile;
|
|
6
6
|
}
|
|
7
|
-
export interface CleansingWithAltOptions {
|
|
8
|
-
primaryPipes: PipeConfig[];
|
|
9
|
-
fallbackPipes: PipeConfig[];
|
|
10
|
-
fallbackOn?: 'empty' | 'null' | 'undefined' | 'all';
|
|
11
|
-
}
|
|
12
7
|
export interface ScrapeCleansingOptions {
|
|
13
|
-
pipes?: Record<string,
|
|
8
|
+
pipes?: Record<string, CleanerStepRules>;
|
|
14
9
|
}
|