@hanivanrizky/nestjs-browser-action 0.14.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/README.md +230 -254
  2. package/dist/interfaces/browser-action-options.d.ts +1 -1
  3. package/dist/interfaces/cleansing-options.d.ts +3 -8
  4. package/dist/interfaces/types.d.ts +32 -1
  5. package/dist/interfaces/workflow-options.d.ts +2 -2
  6. package/dist/pipes/alt-flag.pipe.d.ts +1 -1
  7. package/dist/pipes/alt-flag.pipe.js.map +1 -1
  8. package/dist/pipes/clean-html.pipe.d.ts +5 -0
  9. package/dist/pipes/clean-html.pipe.js +52 -0
  10. package/dist/pipes/clean-html.pipe.js.map +1 -0
  11. package/dist/pipes/cleansing-pipe.d.ts +4 -3
  12. package/dist/pipes/cleansing-pipe.js +3 -17
  13. package/dist/pipes/cleansing-pipe.js.map +1 -1
  14. package/dist/pipes/date-format-special.pipe.d.ts +6 -0
  15. package/dist/pipes/date-format-special.pipe.js +24 -0
  16. package/dist/pipes/date-format-special.pipe.js.map +1 -0
  17. package/dist/pipes/date-format.pipe.d.ts +1 -1
  18. package/dist/pipes/date-format.pipe.js.map +1 -1
  19. package/dist/pipes/extract-email.pipe.d.ts +6 -0
  20. package/dist/pipes/extract-email.pipe.js +19 -0
  21. package/dist/pipes/extract-email.pipe.js.map +1 -0
  22. package/dist/pipes/extract-url-params.pipe.d.ts +10 -0
  23. package/dist/pipes/extract-url-params.pipe.js +57 -0
  24. package/dist/pipes/extract-url-params.pipe.js.map +1 -0
  25. package/dist/pipes/index.d.ts +16 -0
  26. package/dist/pipes/index.js +16 -0
  27. package/dist/pipes/index.js.map +1 -1
  28. package/dist/pipes/json-path.pipe.d.ts +8 -0
  29. package/dist/pipes/json-path.pipe.js +40 -0
  30. package/dist/pipes/json-path.pipe.js.map +1 -0
  31. package/dist/pipes/media-filter.pipe.d.ts +7 -0
  32. package/dist/pipes/media-filter.pipe.js +21 -0
  33. package/dist/pipes/media-filter.pipe.js.map +1 -0
  34. package/dist/pipes/normalize-whitespace.pipe.d.ts +1 -1
  35. package/dist/pipes/normalize-whitespace.pipe.js.map +1 -1
  36. package/dist/pipes/number-normalize.pipe.d.ts +6 -0
  37. package/dist/pipes/number-normalize.pipe.js +38 -0
  38. package/dist/pipes/number-normalize.pipe.js.map +1 -0
  39. package/dist/pipes/parse-as-url.pipe.d.ts +7 -0
  40. package/dist/pipes/parse-as-url.pipe.js +45 -0
  41. package/dist/pipes/parse-as-url.pipe.js.map +1 -0
  42. package/dist/pipes/pipe-engine.d.ts +20 -0
  43. package/dist/pipes/pipe-engine.js +81 -0
  44. package/dist/pipes/pipe-engine.js.map +1 -0
  45. package/dist/pipes/pipe-registry.d.ts +3 -0
  46. package/dist/pipes/pipe-registry.js +89 -0
  47. package/dist/pipes/pipe-registry.js.map +1 -0
  48. package/dist/pipes/profiles/currency.profile.js +11 -12
  49. package/dist/pipes/profiles/currency.profile.js.map +1 -1
  50. package/dist/pipes/profiles/date.profile.js +10 -7
  51. package/dist/pipes/profiles/date.profile.js.map +1 -1
  52. package/dist/pipes/profiles/email.profile.js +9 -9
  53. package/dist/pipes/profiles/email.profile.js.map +1 -1
  54. package/dist/pipes/profiles/phone.profile.js +11 -11
  55. package/dist/pipes/profiles/phone.profile.js.map +1 -1
  56. package/dist/pipes/profiles/price.profile.js +11 -12
  57. package/dist/pipes/profiles/price.profile.js.map +1 -1
  58. package/dist/pipes/profiles.d.ts +2 -2
  59. package/dist/pipes/profiles.js +5 -5
  60. package/dist/pipes/profiles.js.map +1 -1
  61. package/dist/pipes/query-append.pipe.d.ts +9 -0
  62. package/dist/pipes/query-append.pipe.js +47 -0
  63. package/dist/pipes/query-append.pipe.js.map +1 -0
  64. package/dist/pipes/query-remover.pipe.d.ts +22 -0
  65. package/dist/pipes/query-remover.pipe.js +83 -0
  66. package/dist/pipes/query-remover.pipe.js.map +1 -0
  67. package/dist/pipes/regex-extract.pipe.d.ts +1 -1
  68. package/dist/pipes/regex-extract.pipe.js.map +1 -1
  69. package/dist/pipes/regex-extraction.pipe.d.ts +25 -0
  70. package/dist/pipes/regex-extraction.pipe.js +90 -0
  71. package/dist/pipes/regex-extraction.pipe.js.map +1 -0
  72. package/dist/pipes/regex-replace-x.pipe.d.ts +28 -0
  73. package/dist/pipes/regex-replace-x.pipe.js +104 -0
  74. package/dist/pipes/regex-replace-x.pipe.js.map +1 -0
  75. package/dist/pipes/regex-replace.pipe.d.ts +1 -1
  76. package/dist/pipes/regex-replace.pipe.js.map +1 -1
  77. package/dist/pipes/regex.pipe.d.ts +12 -0
  78. package/dist/pipes/regex.pipe.js +42 -0
  79. package/dist/pipes/regex.pipe.js.map +1 -0
  80. package/dist/pipes/remove-currency-symbol.pipe.d.ts +1 -1
  81. package/dist/pipes/remove-currency-symbol.pipe.js.map +1 -1
  82. package/dist/pipes/remove-line-breaks.pipe.d.ts +1 -1
  83. package/dist/pipes/remove-line-breaks.pipe.js.map +1 -1
  84. package/dist/pipes/remove-special-chars.pipe.d.ts +1 -1
  85. package/dist/pipes/remove-special-chars.pipe.js.map +1 -1
  86. package/dist/pipes/sanitize-text.pipe.d.ts +1 -1
  87. package/dist/pipes/sanitize-text.pipe.js.map +1 -1
  88. package/dist/pipes/to-lower-case.pipe.d.ts +1 -1
  89. package/dist/pipes/to-lower-case.pipe.js.map +1 -1
  90. package/dist/pipes/to-number.pipe.d.ts +1 -1
  91. package/dist/pipes/to-number.pipe.js.map +1 -1
  92. package/dist/pipes/to-upper-case.pipe.d.ts +1 -1
  93. package/dist/pipes/to-upper-case.pipe.js.map +1 -1
  94. package/dist/pipes/trim.pipe.d.ts +1 -1
  95. package/dist/pipes/trim.pipe.js.map +1 -1
  96. package/dist/pipes/url-resolve.pipe.d.ts +7 -0
  97. package/dist/pipes/url-resolve.pipe.js +52 -0
  98. package/dist/pipes/url-resolve.pipe.js.map +1 -0
  99. package/dist/services/browser-action.service.d.ts +3 -3
  100. package/dist/services/browser-action.service.js +54 -27
  101. package/dist/services/browser-action.service.js.map +1 -1
  102. package/dist/services/cleansing.service.d.ts +2 -4
  103. package/dist/services/cleansing.service.js +24 -81
  104. package/dist/services/cleansing.service.js.map +1 -1
  105. package/dist/tsconfig.build.tsbuildinfo +1 -1
  106. package/dist/utils/user-agent.util.js +6 -11
  107. package/dist/utils/user-agent.util.js.map +1 -1
  108. package/dist/validators/workflow.validator.js +2 -2
  109. package/dist/validators/workflow.validator.js.map +1 -1
  110. package/package.json +7 -2
package/README.md CHANGED
@@ -1,53 +1,73 @@
1
1
  # @hanivanrizky/nestjs-browser-action
2
2
 
3
- [![npm version](https://badge.fury.io/js/%40hanivanrizky%2Fnestjs-browser-action.svg)](https://www.npmjs.com/package/@hanivanrizky/nestjs-browser-action)
4
- [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
5
- [![Tests: 322 passed](https://img.shields.io/badge/tests-322%20passed-brightgreen.svg)]()
3
+ <p align="center">
4
+ <a href="http://nestjs.com/" target="_blank"><img src="https://nestjs.com/img/logo-small.svg" width="120" alt="Nest Logo" /></a>
5
+ </p>
6
6
 
7
- > **⚠️ Status: Experimental**
8
- >
9
- > This project is currently in **experimental** stage and intended for **personal use only**. The API is subject to change, and production use is not recommended.
7
+ <p align="center">A NestJS module for stealth browser automation using CloakBrowser + puppeteer-core with support for proxy rotation, connection pooling, cookie persistence, and flexible data extraction.</p>
10
8
 
11
- A NestJS module that provides stealth browser automation (CloakBrowser + puppeteer-core) with configurable options, connection pooling, and data cleansing capabilities.
9
+ <p align="center">
10
+ <a href="https://www.npmjs.com/package/@hanivanrizky/nestjs-browser-action" target="_blank"><img src="https://img.shields.io/npm/v/@hanivanrizky/nestjs-browser-action.svg" alt="NPM Version" /></a>
11
+ <a href="https://www.npmjs.com/package/@hanivanrizky/nestjs-browser-action" target="_blank"><img src="https://img.shields.io/npm/l/@hanivanrizky/nestjs-browser-action.svg" alt="Package License" /></a>
12
+ <a href="https://www.npmjs.com/package/@hanivanrizky/nestjs-browser-action" target="_blank"><img src="https://img.shields.io/npm/dm/@hanivanrizky/nestjs-browser-action.svg" alt="NPM Downloads" /></a>
13
+ <img src="https://img.shields.io/badge/tests-363%20passed-brightgreen.svg" alt="Tests: 363 passed" />
14
+ </p>
15
+
16
+ > **⚠️ Status: Experimental** — personal use only; API subject to change.
17
+
18
+ ## Table of Contents
19
+
20
+ - [Features](#features)
21
+ - [Installation](#installation)
22
+ - [Quick Start](#quick-start)
23
+ - [Documentation](#documentation)
24
+ - [Quick Examples](#quick-examples)
25
+ - [Development](#development)
26
+ - [Contributing](#contributing)
27
+ - [License](#license)
12
28
 
13
29
  ## Features
14
30
 
15
- - (・_・) **Browser Automation**: Declarative workflow-based browser automation
16
- - (☆^O^☆) **Data Scraping**: Single and multi-element scraping with CSS/XPath selectors
17
- - (>_>) **Connection Pooling**: Efficient browser instance management
18
- - (♡˙︶˙♡) **Cookie Persistence**: Save/load browser sessions for authentication
19
- - (。•̀ᴗ-) **Data Cleansing**: 14 built-in transformation pipes
20
- - (°_°)! **Shadow DOM**: Support for web components
21
- - (^_^) **Type-Safe**: Full TypeScript support with generics
22
- - (^^) **Remote Chrome**: Connect to remote Chrome instances via CDP (browserURL/browserWSEndpoint)
23
- - (•̀ᴗ•́) **Stealth**: Local launches use CloakBrowser stealth Chromium (proxy, humanize, geoip, timezone/locale spoofing)
24
- - (⌐■_) **TLS Fingerprint**: Capture the browser's own TLS/HTTP handshake (ja3/ja4, ciphers, http2 akamai, headers)
31
+ - **(☆^O^☆) Pattern-Based Extraction**: Define extraction patterns with `PatternField` — API-compatible with `nestjs-xpath-parser`
32
+ - **(.\_.) Container Extraction**: Extract lists of items from repeating DOM nodes with pagination
33
+ - **(>\_<) Workflow Automation**: Declarative step-by-step browser automation (navigate, click, fill, extract, screenshot…)
34
+ - **(・\_・) Data Cleaning Pipes**: 33 built-in transformations (trim, case, replace, decode HTML, number, regex, jsonpath, clean-html…)
35
+ - **(☆^O^☆) Custom Pipes**: Extensible pipe registry — `PIPE_REGISTRY['my-type'] = MyPipe`
36
+ - **(>\_<) Connection Pooling**: Efficient browser instance reuse with configurable min/max/idle/acquire timeouts
37
+ - **(.\_.) Cookie Persistence**: Save/load browser sessions for authentication flows
38
+ - **(o_o) Stealth**: CloakBrowser Chromium with proxy, humanize, geoip, timezone/locale spoofing, and anti-detect flags
39
+ - **(.\_.) Remote Chrome**: Connect to remote Chrome instances via CDP (browserURL / browserWSEndpoint)
40
+ - **(>\_<) TLS Fingerprint**: Capture the browser's own TLS/HTTP handshake (ja3/ja4, ciphers, http2 akamai, headers) for use with `nestjs-xpath-parser`'s CycleTLS engine
41
+ - **(☆^O^☆) TypeScript Generics**: Full generic type support for type-safe results
42
+ - **(o_o) Fully Tested**: 363 tests across 37 suites
25
43
 
26
44
  ## Installation
27
45
 
28
- ### From npm
29
-
30
46
  ```bash
31
- npm install @hanivanrizky/nestjs-browser-action
47
+ pnpm add @hanivanrizky/nestjs-browser-action
32
48
  # or
33
49
  yarn add @hanivanrizky/nestjs-browser-action
34
50
  # or
35
- pnpm add @hanivanrizky/nestjs-browser-action
51
+ npm install @hanivanrizky/nestjs-browser-action
36
52
  ```
37
53
 
38
- ### From GitHub
54
+ ## Quick Start
39
55
 
40
- ```bash
41
- npm install https://github.com/Hanivan/nestjs-browser-action.git
42
- # or
43
- pnpm add https://github.com/Hanivan/nestjs-browser-action.git
44
- # or using SSH
45
- pnpm add git@github.com:Hanivan/nestjs-browser-action.git
46
- ```
56
+ ### Import the Module
47
57
 
48
- ## Quick Start
58
+ **Basic usage:**
49
59
 
50
- ### 1. Configure Module
60
+ ```typescript
61
+ import { Module } from '@nestjs/common';
62
+ import { BrowserActionModule } from '@hanivanrizky/nestjs-browser-action';
63
+
64
+ @Module({
65
+ imports: [BrowserActionModule.forRoot()],
66
+ })
67
+ export class AppModule {}
68
+ ```
69
+
70
+ **With pool and cookie options:**
51
71
 
52
72
  ```typescript
53
73
  import { Module } from '@nestjs/common';
@@ -57,304 +77,260 @@ import { BrowserActionModule } from '@hanivanrizky/nestjs-browser-action';
57
77
  imports: [
58
78
  BrowserActionModule.forRoot({
59
79
  pool: { min: 2, max: 10 },
60
- cookies: { enabled: true },
80
+ cookies: { enabled: true, cookiesDir: './cookies' },
81
+ logLevel: 'log',
61
82
  }),
62
83
  ],
63
84
  })
64
85
  export class AppModule {}
65
86
  ```
66
87
 
67
- ### 2. Inject Service
88
+ **Async configuration:**
89
+
90
+ ```typescript
91
+ import { Module } from '@nestjs/common';
92
+ import { BrowserActionModule } from '@hanivanrizky/nestjs-browser-action';
93
+ import { ConfigModule, ConfigService } from '@nestjs/config';
94
+
95
+ @Module({
96
+ imports: [
97
+ ConfigModule.forRoot(),
98
+ BrowserActionModule.forRootAsync({
99
+ imports: [ConfigModule],
100
+ useFactory: (configService: ConfigService) => ({
101
+ pool: {
102
+ min: configService.get<number>('POOL_MIN', 2),
103
+ max: configService.get<number>('POOL_MAX', 10),
104
+ },
105
+ cloak: {
106
+ proxy: { server: configService.get<string>('PROXY_URL', '') },
107
+ },
108
+ logLevel: configService.get<string>('LOG_LEVEL', 'log'),
109
+ }),
110
+ inject: [ConfigService],
111
+ }),
112
+ ],
113
+ })
114
+ export class AppModule {}
115
+ ```
116
+
117
+ ### Inject the Service
68
118
 
69
119
  ```typescript
70
120
  import { Injectable } from '@nestjs/common';
71
121
  import { BrowserActionService } from '@hanivanrizky/nestjs-browser-action';
72
122
 
73
123
  @Injectable()
74
- export class MyService {
75
- constructor(
76
- private readonly actionHelpers: BrowserActionService,
77
- ) {}
78
-
79
- async scrapeData() {
80
- const result = await this.actionHelpers.scrape(
81
- 'https://example.com',
82
- {
83
- title: 'h1',
84
- description: 'meta[name="description"]@content',
85
- }
86
- );
87
-
88
- console.log(result.title); // "Example Domain"
89
- console.log(result.description); // "This domain is for use in..."
124
+ export class YourService {
125
+ constructor(private readonly browserAction: BrowserActionService) {}
126
+
127
+ async scrapeProducts() {
128
+ const result = await this.browserAction.evaluateWebsite({
129
+ url: 'https://www.scrapingcourse.com/ecommerce/',
130
+ patterns: [
131
+ {
132
+ key: 'container',
133
+ patternType: 'css',
134
+ returnType: 'text',
135
+ patterns: ['.product'],
136
+ meta: { isContainer: true },
137
+ },
138
+ {
139
+ key: 'name',
140
+ patternType: 'css',
141
+ returnType: 'text',
142
+ patterns: ['h2.woocommerce-loop-product__title'],
143
+ pipes: { trim: true },
144
+ },
145
+ {
146
+ key: 'price',
147
+ patternType: 'css',
148
+ returnType: 'text',
149
+ patterns: ['.price'],
150
+ pipes: { trim: true },
151
+ },
152
+ ],
153
+ });
154
+
155
+ return result.results;
90
156
  }
91
157
  }
92
158
  ```
93
159
 
94
160
  ## Documentation
95
161
 
96
- ### (^_^) Method Documentation
97
-
98
- | Method | Description |
99
- |--------|-------------|
100
- | [`scrape()`](./docs/methods/scrape.md) | Extract single elements |
101
- | [`scrapeAll()`](./docs/methods/scrape-all.md) | Extract multiple elements |
102
- | [`scrapeWithWorkflow()`](./docs/methods/workflow.md) | Workflow-based automation |
103
- | [`scrapeAllWithWorkflow()`](./docs/methods/workflow.md) | Workflow with multi-element |
104
- | [`takeScreenshot()`](./docs/methods/screenshots.md) | Capture screenshots |
105
- | [`generatePDF()`](./docs/methods/screenshots.md) | Generate PDFs |
106
- | [`captureTlsFingerprint()`](./docs/api-reference.md#capturetlsfingerprintpath-url-promisetlsfingerprint) | Capture browser's TLS/HTTP fingerprint |
107
- | [Browser & Page Control](./docs/methods/browser-control.md) | Low-level control |
108
-
109
- ### (☆^O^☆) Feature Guides
162
+ ### Features
110
163
 
111
- | Feature | Description |
112
- |---------|-------------|
113
- | [Cookie Management](./docs/features/cookies.md) | Session persistence |
114
- | [Pipe System](./docs/features/pipes.md) | Data transformation |
115
- | [Workflow Actions](./docs/workflow-actions.md) | All action types reference |
164
+ - [Pattern-Based Extraction](docs/methods/scrape.md#evaluatewebsite---unified-xpath-parser-compatible-api) - `evaluateWebsite()` with `PatternField` patterns
165
+ - [Container-Based Extraction](docs/methods/scrape.md#evaluatewebsite---unified-xpath-parser-compatible-api) - Extract lists with `meta.isContainer`
166
+ - [Data Cleaning Pipes](docs/features/pipes.md) - Transform extracted data with pipes
167
+ - [Cookie Management](docs/features/cookies.md) - Session persistence
168
+ - [Workflow Actions](docs/methods/workflow.md) - Declarative step-by-step automation
116
169
 
117
- ### (^_^) API Reference
170
+ ### Reference
118
171
 
119
- - [API Reference](./docs/api-reference.md) - Complete API documentation
120
- - [Configuration](./docs/api-reference.md#configuration) - All options
121
- - [Types](./docs/api-reference.md#types) - TypeScript interfaces
172
+ - [API Reference](docs/api-reference.md) - Complete service API documentation
173
+ - [Workflow Actions Reference](docs/workflow-actions.md) - All action types
174
+ - [Browser & Page Control](docs/methods/browser-control.md) - Low-level control
122
175
 
123
176
  ## Quick Examples
124
177
 
125
- ### Simple Scraping
178
+ ### Simple Product Scraping
126
179
 
127
180
  ```typescript
128
- const data = await this.actionHelpers.scrape('https://example.com', {
129
- title: 'h1',
130
- price: '.price',
131
- });
132
- ```
133
-
134
- ### Multi-Element Scraping
181
+ interface Product {
182
+ name: string;
183
+ price: string;
184
+ }
135
185
 
136
- ```typescript
137
- const data = await this.actionHelpers.scrapeAll('https://example.com', {
138
- titles: '.card h2',
139
- links: '.card a',
186
+ const result = await browserAction.evaluateWebsite<Product>({
187
+ url: 'https://example.com/products',
188
+ patterns: [
189
+ {
190
+ key: 'container',
191
+ patternType: 'css',
192
+ returnType: 'text',
193
+ patterns: ['.product-card'],
194
+ meta: { isContainer: true },
195
+ },
196
+ {
197
+ key: 'name',
198
+ patternType: 'css',
199
+ returnType: 'text',
200
+ patterns: ['h2.name'],
201
+ pipes: { trim: true },
202
+ },
203
+ {
204
+ key: 'price',
205
+ patternType: 'css',
206
+ returnType: 'text',
207
+ patterns: ['.price'],
208
+ pipes: {
209
+ trim: true,
210
+ replace: [{ from: '$', to: '' }],
211
+ },
212
+ },
213
+ ],
140
214
  });
141
215
  ```
142
216
 
143
- ### Workflow Automation
217
+ ### Article Extraction with Fallbacks
144
218
 
145
219
  ```typescript
146
- const workflow = {
147
- version: '1.0' as const,
148
- actions: [
149
- { action: 'navigate' as const, value: 'https://example.com' },
150
- { id: 'title', action: 'extract' as const, target: { type: 'css' as const, value: 'h1' } },
151
- { action: 'click' as const, target: { type: 'css' as const, value: '#button' } },
220
+ const result = await browserAction.evaluateWebsite({
221
+ url: 'https://example.com/article',
222
+ patterns: [
223
+ {
224
+ key: 'title',
225
+ patternType: 'css',
226
+ returnType: 'text',
227
+ patterns: ['meta[property="og:title"]'],
228
+ meta: {
229
+ alterPattern: ['h1', 'title'],
230
+ },
231
+ pipes: { trim: true },
232
+ },
233
+ {
234
+ key: 'description',
235
+ patternType: 'css',
236
+ returnType: 'text',
237
+ patterns: ['meta[name="description"]'],
238
+ pipes: { trim: true, decode: true },
239
+ },
152
240
  ],
153
- };
154
-
155
- const result = await this.actionHelpers.scrapeWithWorkflow(workflow);
241
+ });
156
242
  ```
157
243
 
158
- ### With Data Cleansing
244
+ ### XPath Extraction
159
245
 
160
246
  ```typescript
161
- import { CleansingType } from '@hanivanrizky/nestjs-browser-action';
162
-
163
- const data = await this.actionHelpers.scrape('https://example.com', {
164
- price: '.price',
165
- }, {
166
- pipes: {
167
- price: [
168
- { type: CleansingType.REMOVE_CURRENCY_SYMBOL },
169
- { type: CleansingType.TO_NUMBER },
170
- ],
171
- },
247
+ const result = await browserAction.evaluateWebsite({
248
+ url: 'https://example.com/sitemap.xml',
249
+ patterns: [
250
+ {
251
+ key: 'container',
252
+ patternType: 'xpath',
253
+ returnType: 'text',
254
+ patterns: ['//url'],
255
+ meta: { isContainer: true },
256
+ },
257
+ {
258
+ key: 'loc',
259
+ patternType: 'xpath',
260
+ returnType: 'text',
261
+ patterns: ['.//loc/text()'],
262
+ },
263
+ ],
172
264
  });
173
265
  ```
174
266
 
175
- ### Cookie Persistence
267
+ ### Workflow Automation
176
268
 
177
269
  ```typescript
178
- const workflow = {
179
- version: '1.0' as const,
270
+ const result = await browserAction.scrapeWithWorkflow({
271
+ version: '1.0',
180
272
  actions: [
181
- { action: 'loadCookies' as const, value: 'user-session', onError: 'skip' as const },
182
- { action: 'navigate' as const, value: 'https://example.com/dashboard' },
183
- { action: 'saveCookies' as const, value: 'user-session', options: { overwrite: true } },
273
+ { action: 'navigate', value: 'https://example.com/login' },
274
+ { action: 'fill', target: { type: 'css', value: '#username' }, value: 'user' },
275
+ { action: 'fill', target: { type: 'css', value: '#password' }, value: 'pass' },
276
+ { action: 'click', target: { type: 'css', value: '[type=submit]' } },
277
+ { action: 'saveCookies', value: 'user-session', options: { overwrite: true } },
278
+ { id: 'title', action: 'extract', target: { type: 'css', value: 'h1' } },
184
279
  ],
185
- };
280
+ });
186
281
  ```
187
282
 
188
283
  ### Stealth (CloakBrowser)
189
284
 
190
- Local browsers launch through CloakBrowser stealth Chromium. Configure anti-detect
191
- features via the `cloak` option:
192
-
193
285
  ```typescript
194
286
  BrowserActionModule.forRoot({
195
287
  cloak: {
196
288
  proxy: { server: 'http://host:port', username: 'user', password: 'pass' },
197
- humanize: true, // human-like mouse/typing
198
- geoip: true, // spoof geolocation from proxy IP
199
- timezone: 'America/New_York', // spoof timezone
200
- locale: 'en-US', // spoof locale
201
- stealthArgs: true, // anti-detect Chromium flags
202
- extensionPaths: ['/path/ext'], // load unpacked extensions
203
- userDataDir: './profile', // persistent profile (launchPersistentContext)
204
- launchOptions: { headless: true, args: ['--no-sandbox'] }, // raw puppeteer-core passthrough
289
+ humanize: true,
290
+ geoip: true,
291
+ timezone: 'America/New_York',
292
+ locale: 'en-US',
293
+ stealthArgs: true,
205
294
  },
206
295
  pool: { min: 2, max: 5 },
207
296
  })
208
297
  ```
209
298
 
210
- `launchOptions` (top-level) is also forwarded to CloakBrowser's `launchOptions`
211
- passthrough for backward compatibility. `cloak` is ignored when `remote` is set
212
- (remote uses plain CDP connect).
299
+ ### TLS Fingerprint Capture
213
300
 
214
- **Per-call cloak override (proxy/UA rotation):** pass `cloak` per request to launch a
215
- dedicated off-pool browser with its own stealth config — useful for rotating proxies or
216
- fingerprints across requests. Not supported in remote CDP mode.
301
+ Capture the browser's own TLS fingerprint for use with `nestjs-xpath-parser`'s CycleTLS engine:
217
302
 
218
303
  ```typescript
219
- // scrape / scrapeAll
220
- await actions.scrape(url, { title: 'h1' }, {
221
- cloak: { proxy: { server: 'http://rotating-proxy:8080' } },
222
- });
223
-
224
- // workflow
225
- await actions.scrapeWithWorkflow(url, {
226
- version: '1.0',
227
- cloak: { proxy: { server: 'http://rotating-proxy:8080' } },
228
- actions: [...],
229
- });
230
- ```
231
-
232
- ### Remote Chrome Connection
233
-
234
- Connect to remote Chrome instances via Chrome DevTools Protocol (CDP):
235
-
236
- ```typescript
237
- BrowserActionModule.forRoot({
238
- remote: {
239
- browserURL: 'http://localhost:9222', // Or use browserWSEndpoint
240
- retryMax: 3, // Connection retry attempts
241
- retryDelay: 1000, // Delay between retries (ms)
242
- },
243
- pool: { min: 2, max: 5 },
244
- })
245
- ```
246
-
247
- **Using browserWSEndpoint:**
248
-
249
- ```typescript
250
- BrowserActionModule.forRoot({
251
- remote: {
252
- browserWSEndpoint: 'ws://localhost:9222/devtools/page/abc123',
253
- },
254
- })
255
- ```
256
-
257
- **Remote-first priority:** When both `remote` and `launchOptions` are provided, remote connection takes precedence.
258
-
259
- **See:** [Remote Chrome Configuration](./docs/api-reference.md#remote-chrome-configuration) for details.
260
-
261
- ## Services
262
-
263
- | Service | Description |
264
- |---------|-------------|
265
- | **BrowserActionService** | High-level automation methods (scrape, screenshot, PDF, workflows) |
266
- | **BrowserManagerService** | Browser pool management |
267
- | **PageService** | Page lifecycle and navigation |
268
- | **CookieService** | Cookie persistence |
269
- | **CleansingService** | Data cleansing with pipes |
270
-
271
- ## Configuration
272
-
273
- ### Basic Configuration
274
-
275
- ```typescript
276
- BrowserActionModule.forRoot({
277
- pool: {
278
- min: 2,
279
- max: 10,
280
- idleTimeoutMs: 30000, // reap idle browsers down to min (0 disables)
281
- acquireTimeoutMs: 30000, // reject acquire() if none free in time (0 waits forever)
282
- strategy: 'round-robin',
283
- },
284
- cookies: {
285
- enabled: true,
286
- cookiesDir: './cookies',
287
- },
288
- logLevel: 'log',
289
- })
290
- ```
291
-
292
- ### All Options
293
-
294
- See [Configuration Reference](./docs/api-reference.md#configuration) for complete options.
295
-
296
- ## Type Safety
297
-
298
- Full TypeScript support with generics:
299
-
300
- ```typescript
301
- // Type-safe selectors
302
- interface ProductSelectors {
303
- title: string;
304
- price: number;
305
- }
306
-
307
- const result = await this.actionHelpers.scrape<ProductSelectors>(url, {
308
- title: 'h1',
309
- price: '.price',
310
- });
311
-
312
- // Type-safe workflow results
313
- const workflow = await this.actionHelpers.scrapeWithWorkflow<{
314
- title: string;
315
- price: number;
316
- }>(url, workflow);
304
+ const fingerprint = await browserAction.captureTlsFingerprint('./fingerprint.json');
305
+ // fingerprint.json can be passed to ScraperHtmlModule.forRoot({ fingerprint: './fingerprint.json' })
317
306
  ```
318
307
 
319
308
  ## Development
320
309
 
321
- ### Scripts
322
-
323
310
  ```bash
311
+ # Install dependencies
312
+ pnpm install
313
+
324
314
  # Build
325
315
  pnpm build
326
316
 
327
- # Run tests
317
+ # Test
328
318
  pnpm test
319
+ pnpm test:cov
329
320
 
330
- # Lint code
321
+ # Lint
331
322
  pnpm lint
332
-
333
- # Format code
334
323
  pnpm format
335
324
  ```
336
325
 
337
- ### Git Hooks
326
+ ## Contributing
338
327
 
339
- - **Pre-commit**: Runs ESLint
340
- - **Pre-push**: Runs build and tests
328
+ 1. Fork the repository
329
+ 2. Create your feature branch (`git checkout -b feature/yourusername/amazing-feature`)
330
+ 3. Commit your changes (`git commit -m 'Add some amazing feature'`)
331
+ 4. Push to the branch (`git push origin feature/yourusername/amazing-feature`)
332
+ 5. Open a Pull Request
341
333
 
342
334
  ## License
343
335
 
344
- MIT
345
-
346
- ## Support
347
-
348
- For issues and questions, please use [GitHub Issues](https://github.com/Hanivan/nestjs-browser-action/issues).
349
-
350
- ## Examples
351
-
352
- Check out the test project for complete examples: [test-browser-action](https://github.com/Hanivan/test-browser-action)
353
-
354
- ---
355
-
356
- **Documentation:**
357
- - [Methods](./docs/methods) - Method-specific guides
358
- - [Features](./docs/features) - Feature guides
359
- - [API Reference](./docs/api-reference.md) - Complete API
360
- - [Workflow Actions](./docs/workflow-actions.md) - Action reference
336
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
@@ -29,5 +29,5 @@ export interface BrowserActionOptions {
29
29
  debugLogMaxLength?: number;
30
30
  remote?: RemoteOptions;
31
31
  cookies?: CookieOptions;
32
- customPipes?: Record<string, new () => CleansingPipe>;
32
+ customPipes?: Record<string, new (...args: unknown[]) => CleansingPipe>;
33
33
  }
@@ -1,14 +1,9 @@
1
1
  import { CleansingProfile } from '../enums/cleansing-profile.enum';
2
- import type { PipeConfig } from './types';
2
+ import type { CleanerStepRules } from '../pipes/pipe-engine';
3
3
  export interface CleansingOptions {
4
- pipes?: PipeConfig[];
4
+ pipes?: CleanerStepRules;
5
5
  profile?: CleansingProfile;
6
6
  }
7
- export interface CleansingWithAltOptions {
8
- primaryPipes: PipeConfig[];
9
- fallbackPipes: PipeConfig[];
10
- fallbackOn?: 'empty' | 'null' | 'undefined' | 'all';
11
- }
12
7
  export interface ScrapeCleansingOptions {
13
- pipes?: Record<string, PipeConfig[]>;
8
+ pipes?: Record<string, CleanerStepRules>;
14
9
  }
@@ -1,5 +1,6 @@
1
1
  import { CleansingType } from '../enums/cleansing-type.enum';
2
2
  import type { CloakOptions } from './browser-action-options';
3
+ import type { CleanerStepRules } from '../pipes/pipe-engine';
3
4
  export interface PipeConfig {
4
5
  type: CleansingType | string;
5
6
  pattern?: string;
@@ -12,7 +13,8 @@ export interface PipeConfig {
12
13
  [key: string]: unknown;
13
14
  }
14
15
  export type SelectorMap = Record<string, string>;
15
- export type PipeOptions = Record<string, PipeConfig[]>;
16
+ export type PipeOptions = Record<string, CleanerStepRules>;
17
+ export type { CleanerStepRules };
16
18
  export type ScrapeResult = Partial<Record<string, unknown>>;
17
19
  export type ScrapeAllResult = Partial<Record<string, unknown[]>>;
18
20
  export type ScraperOptions = {
@@ -63,4 +65,33 @@ export interface ContainerScrapeResult<T = Record<string, unknown>> {
63
65
  items: T[];
64
66
  pagination?: PaginationResult;
65
67
  }
68
+ export interface PatternMeta {
69
+ multiple?: boolean | string;
70
+ multiline?: boolean;
71
+ alterPattern?: string[];
72
+ isContainer?: boolean;
73
+ isPage?: boolean;
74
+ pageUrlKey?: string;
75
+ pageTextKey?: string;
76
+ }
77
+ export interface PatternField {
78
+ key: string;
79
+ patternType: 'xpath' | 'css';
80
+ returnType: 'text' | 'rawHTML' | 'html';
81
+ patterns: string[];
82
+ meta?: PatternMeta;
83
+ pipes?: CleanerStepRules;
84
+ }
85
+ export interface EvaluateOptions {
86
+ url?: string;
87
+ patterns: PatternField[];
88
+ waitUntil?: 'load' | 'domcontentloaded' | 'networkidle0' | 'networkidle2';
89
+ timeout?: number;
90
+ cloak?: CloakOptions;
91
+ interceptResource?: boolean;
92
+ useRandomUserAgent?: boolean;
93
+ }
94
+ export interface EvaluateResult<T = Record<string, unknown>> {
95
+ results: T[];
96
+ }
66
97
  export type { ActionTarget, ActionType, ActionOptions, ActionCondition, ErrorStrategy, WorkflowAction, WorkflowDefinition, WorkflowResult, VariableContext, } from './workflow-options';