@hanivanrizky/nestjs-browser-action 0.15.0 → 0.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/README.md +230 -286
  2. package/dist/interfaces/browser-action-options.d.ts +2 -2
  3. package/dist/interfaces/cleansing-options.d.ts +3 -8
  4. package/dist/interfaces/types.d.ts +32 -1
  5. package/dist/interfaces/workflow-options.d.ts +2 -2
  6. package/dist/pipes/alt-flag.pipe.d.ts +1 -1
  7. package/dist/pipes/alt-flag.pipe.js.map +1 -1
  8. package/dist/pipes/clean-html.pipe.d.ts +5 -0
  9. package/dist/pipes/clean-html.pipe.js +52 -0
  10. package/dist/pipes/clean-html.pipe.js.map +1 -0
  11. package/dist/pipes/cleansing-pipe.d.ts +4 -3
  12. package/dist/pipes/cleansing-pipe.js +3 -17
  13. package/dist/pipes/cleansing-pipe.js.map +1 -1
  14. package/dist/pipes/date-format-special.pipe.d.ts +6 -0
  15. package/dist/pipes/date-format-special.pipe.js +24 -0
  16. package/dist/pipes/date-format-special.pipe.js.map +1 -0
  17. package/dist/pipes/date-format.pipe.d.ts +1 -1
  18. package/dist/pipes/date-format.pipe.js.map +1 -1
  19. package/dist/pipes/extract-email.pipe.d.ts +6 -0
  20. package/dist/pipes/extract-email.pipe.js +19 -0
  21. package/dist/pipes/extract-email.pipe.js.map +1 -0
  22. package/dist/pipes/extract-url-params.pipe.d.ts +10 -0
  23. package/dist/pipes/extract-url-params.pipe.js +57 -0
  24. package/dist/pipes/extract-url-params.pipe.js.map +1 -0
  25. package/dist/pipes/index.d.ts +16 -0
  26. package/dist/pipes/index.js +16 -0
  27. package/dist/pipes/index.js.map +1 -1
  28. package/dist/pipes/json-path.pipe.d.ts +8 -0
  29. package/dist/pipes/json-path.pipe.js +40 -0
  30. package/dist/pipes/json-path.pipe.js.map +1 -0
  31. package/dist/pipes/media-filter.pipe.d.ts +7 -0
  32. package/dist/pipes/media-filter.pipe.js +21 -0
  33. package/dist/pipes/media-filter.pipe.js.map +1 -0
  34. package/dist/pipes/normalize-whitespace.pipe.d.ts +1 -1
  35. package/dist/pipes/normalize-whitespace.pipe.js.map +1 -1
  36. package/dist/pipes/number-normalize.pipe.d.ts +6 -0
  37. package/dist/pipes/number-normalize.pipe.js +38 -0
  38. package/dist/pipes/number-normalize.pipe.js.map +1 -0
  39. package/dist/pipes/parse-as-url.pipe.d.ts +7 -0
  40. package/dist/pipes/parse-as-url.pipe.js +45 -0
  41. package/dist/pipes/parse-as-url.pipe.js.map +1 -0
  42. package/dist/pipes/pipe-engine.d.ts +20 -0
  43. package/dist/pipes/pipe-engine.js +81 -0
  44. package/dist/pipes/pipe-engine.js.map +1 -0
  45. package/dist/pipes/pipe-registry.d.ts +3 -0
  46. package/dist/pipes/pipe-registry.js +89 -0
  47. package/dist/pipes/pipe-registry.js.map +1 -0
  48. package/dist/pipes/profiles/currency.profile.js +11 -12
  49. package/dist/pipes/profiles/currency.profile.js.map +1 -1
  50. package/dist/pipes/profiles/date.profile.js +10 -7
  51. package/dist/pipes/profiles/date.profile.js.map +1 -1
  52. package/dist/pipes/profiles/email.profile.js +9 -9
  53. package/dist/pipes/profiles/email.profile.js.map +1 -1
  54. package/dist/pipes/profiles/phone.profile.js +11 -11
  55. package/dist/pipes/profiles/phone.profile.js.map +1 -1
  56. package/dist/pipes/profiles/price.profile.js +11 -12
  57. package/dist/pipes/profiles/price.profile.js.map +1 -1
  58. package/dist/pipes/profiles.d.ts +2 -2
  59. package/dist/pipes/profiles.js +5 -5
  60. package/dist/pipes/profiles.js.map +1 -1
  61. package/dist/pipes/query-append.pipe.d.ts +9 -0
  62. package/dist/pipes/query-append.pipe.js +47 -0
  63. package/dist/pipes/query-append.pipe.js.map +1 -0
  64. package/dist/pipes/query-remover.pipe.d.ts +22 -0
  65. package/dist/pipes/query-remover.pipe.js +83 -0
  66. package/dist/pipes/query-remover.pipe.js.map +1 -0
  67. package/dist/pipes/regex-extract.pipe.d.ts +1 -1
  68. package/dist/pipes/regex-extract.pipe.js.map +1 -1
  69. package/dist/pipes/regex-extraction.pipe.d.ts +25 -0
  70. package/dist/pipes/regex-extraction.pipe.js +90 -0
  71. package/dist/pipes/regex-extraction.pipe.js.map +1 -0
  72. package/dist/pipes/regex-replace-x.pipe.d.ts +28 -0
  73. package/dist/pipes/regex-replace-x.pipe.js +104 -0
  74. package/dist/pipes/regex-replace-x.pipe.js.map +1 -0
  75. package/dist/pipes/regex-replace.pipe.d.ts +1 -1
  76. package/dist/pipes/regex-replace.pipe.js.map +1 -1
  77. package/dist/pipes/regex.pipe.d.ts +12 -0
  78. package/dist/pipes/regex.pipe.js +42 -0
  79. package/dist/pipes/regex.pipe.js.map +1 -0
  80. package/dist/pipes/remove-currency-symbol.pipe.d.ts +1 -1
  81. package/dist/pipes/remove-currency-symbol.pipe.js.map +1 -1
  82. package/dist/pipes/remove-line-breaks.pipe.d.ts +1 -1
  83. package/dist/pipes/remove-line-breaks.pipe.js.map +1 -1
  84. package/dist/pipes/remove-special-chars.pipe.d.ts +1 -1
  85. package/dist/pipes/remove-special-chars.pipe.js.map +1 -1
  86. package/dist/pipes/sanitize-text.pipe.d.ts +1 -1
  87. package/dist/pipes/sanitize-text.pipe.js.map +1 -1
  88. package/dist/pipes/to-lower-case.pipe.d.ts +1 -1
  89. package/dist/pipes/to-lower-case.pipe.js.map +1 -1
  90. package/dist/pipes/to-number.pipe.d.ts +1 -1
  91. package/dist/pipes/to-number.pipe.js.map +1 -1
  92. package/dist/pipes/to-upper-case.pipe.d.ts +1 -1
  93. package/dist/pipes/to-upper-case.pipe.js.map +1 -1
  94. package/dist/pipes/trim.pipe.d.ts +1 -1
  95. package/dist/pipes/trim.pipe.js.map +1 -1
  96. package/dist/pipes/url-resolve.pipe.d.ts +7 -0
  97. package/dist/pipes/url-resolve.pipe.js +52 -0
  98. package/dist/pipes/url-resolve.pipe.js.map +1 -0
  99. package/dist/services/browser-action.service.d.ts +3 -3
  100. package/dist/services/browser-action.service.js +54 -27
  101. package/dist/services/browser-action.service.js.map +1 -1
  102. package/dist/services/browser-pool.service.js +3 -1
  103. package/dist/services/browser-pool.service.js.map +1 -1
  104. package/dist/services/cleansing.service.d.ts +2 -4
  105. package/dist/services/cleansing.service.js +24 -81
  106. package/dist/services/cleansing.service.js.map +1 -1
  107. package/dist/services/cookie.service.js +3 -1
  108. package/dist/services/cookie.service.js.map +1 -1
  109. package/dist/tsconfig.build.tsbuildinfo +1 -1
  110. package/dist/validators/workflow.validator.js +2 -2
  111. package/dist/validators/workflow.validator.js.map +1 -1
  112. package/package.json +5 -1
package/README.md CHANGED
@@ -1,54 +1,73 @@
1
1
  # @hanivanrizky/nestjs-browser-action
2
2
 
3
- [![npm version](https://badge.fury.io/js/%40hanivanrizky%2Fnestjs-browser-action.svg)](https://www.npmjs.com/package/@hanivanrizky/nestjs-browser-action)
4
- [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
5
- [![Tests: 337 passed](https://img.shields.io/badge/tests-337%20passed-brightgreen.svg)]()
3
+ <p align="center">
4
+ <a href="http://nestjs.com/" target="_blank"><img src="https://nestjs.com/img/logo-small.svg" width="120" alt="Nest Logo" /></a>
5
+ </p>
6
6
 
7
- > **⚠️ Status: Experimental**
8
- >
9
- > This project is currently in **experimental** stage and intended for **personal use only**. The API is subject to change, and production use is not recommended.
7
+ <p align="center">A NestJS module for stealth browser automation using CloakBrowser + puppeteer-core with support for proxy rotation, connection pooling, cookie persistence, and flexible data extraction.</p>
10
8
 
11
- A NestJS module that provides stealth browser automation (CloakBrowser + puppeteer-core) with configurable options, connection pooling, and data cleansing capabilities.
9
+ <p align="center">
10
+ <a href="https://www.npmjs.com/package/@hanivanrizky/nestjs-browser-action" target="_blank"><img src="https://img.shields.io/npm/v/@hanivanrizky/nestjs-browser-action.svg" alt="NPM Version" /></a>
11
+ <a href="https://www.npmjs.com/package/@hanivanrizky/nestjs-browser-action" target="_blank"><img src="https://img.shields.io/npm/l/@hanivanrizky/nestjs-browser-action.svg" alt="Package License" /></a>
12
+ <a href="https://www.npmjs.com/package/@hanivanrizky/nestjs-browser-action" target="_blank"><img src="https://img.shields.io/npm/dm/@hanivanrizky/nestjs-browser-action.svg" alt="NPM Downloads" /></a>
13
+ <img src="https://img.shields.io/badge/tests-363%20passed-brightgreen.svg" alt="Tests: 363 passed" />
14
+ </p>
15
+
16
+ > **⚠️ Status: Experimental** — personal use only; API subject to change.
17
+
18
+ ## Table of Contents
19
+
20
+ - [Features](#features)
21
+ - [Installation](#installation)
22
+ - [Quick Start](#quick-start)
23
+ - [Documentation](#documentation)
24
+ - [Quick Examples](#quick-examples)
25
+ - [Development](#development)
26
+ - [Contributing](#contributing)
27
+ - [License](#license)
12
28
 
13
29
  ## Features
14
30
 
15
- - (・_・) **Browser Automation**: Declarative workflow-based browser automation
16
- - (☆^O^☆) **Data Scraping**: Single, multi-element, and container-based list scraping with CSS/XPath
17
- - (◕‿◕) **Pagination**: Automatic next-page resolution from container scrapes
18
- - (>_>) **Connection Pooling**: Efficient browser instance management
19
- - (♡˙︶˙♡) **Cookie Persistence**: Save/load browser sessions for authentication
20
- - (。•̀ᴗ-) **Data Cleansing**: 14 built-in transformation pipes
21
- - (°_°)! **Shadow DOM**: Support for web components
22
- - (^_^) **Type-Safe**: Full TypeScript support with generics
23
- - (^^) **Remote Chrome**: Connect to remote Chrome instances via CDP (browserURL/browserWSEndpoint)
24
- - (•̀ᴗ•́) **Stealth**: Local launches use CloakBrowser stealth Chromium (proxy, humanize, geoip, timezone/locale spoofing)
25
- - (⌐■_■) **TLS Fingerprint**: Capture the browser's own TLS/HTTP handshake (ja3/ja4, ciphers, http2 akamai, headers)
31
+ - **(☆^O^☆) Pattern-Based Extraction**: Define extraction patterns with `PatternField` — API-compatible with `nestjs-xpath-parser`
32
+ - **(.\_.) Container Extraction**: Extract lists of items from repeating DOM nodes with pagination
33
+ - **(>\_<) Workflow Automation**: Declarative step-by-step browser automation (navigate, click, fill, extract, screenshot…)
34
+ - **(・\_) Data Cleaning Pipes**: 33 built-in transformations (trim, case, replace, decode HTML, number, regex, jsonpath, clean-html…)
35
+ - **(☆^O^☆) Custom Pipes**: Extensible pipe registry `PIPE_REGISTRY['my-type'] = MyPipe`
36
+ - **(>\_<) Connection Pooling**: Efficient browser instance reuse with configurable min/max/idle/acquire timeouts
37
+ - **(.\_.) Cookie Persistence**: Save/load browser sessions for authentication flows
38
+ - **(o_o) Stealth**: CloakBrowser Chromium with proxy, humanize, geoip, timezone/locale spoofing, and anti-detect flags
39
+ - **(.\_.) Remote Chrome**: Connect to remote Chrome instances via CDP (browserURL / browserWSEndpoint)
40
+ - **(>\_<) TLS Fingerprint**: Capture the browser's own TLS/HTTP handshake (ja3/ja4, ciphers, http2 akamai, headers) for use with `nestjs-xpath-parser`'s CycleTLS engine
41
+ - **(☆^O^☆) TypeScript Generics**: Full generic type support for type-safe results
42
+ - **(o_o) Fully Tested**: 363 tests across 37 suites
26
43
 
27
44
  ## Installation
28
45
 
29
- ### From npm
30
-
31
46
  ```bash
32
- npm install @hanivanrizky/nestjs-browser-action
47
+ pnpm add @hanivanrizky/nestjs-browser-action
33
48
  # or
34
49
  yarn add @hanivanrizky/nestjs-browser-action
35
50
  # or
36
- pnpm add @hanivanrizky/nestjs-browser-action
51
+ npm install @hanivanrizky/nestjs-browser-action
37
52
  ```
38
53
 
39
- ### From GitHub
54
+ ## Quick Start
40
55
 
41
- ```bash
42
- npm install https://github.com/Hanivan/nestjs-browser-action.git
43
- # or
44
- pnpm add https://github.com/Hanivan/nestjs-browser-action.git
45
- # or using SSH
46
- pnpm add git@github.com:Hanivan/nestjs-browser-action.git
47
- ```
56
+ ### Import the Module
48
57
 
49
- ## Quick Start
58
+ **Basic usage:**
50
59
 
51
- ### 1. Configure Module
60
+ ```typescript
61
+ import { Module } from '@nestjs/common';
62
+ import { BrowserActionModule } from '@hanivanrizky/nestjs-browser-action';
63
+
64
+ @Module({
65
+ imports: [BrowserActionModule.forRoot()],
66
+ })
67
+ export class AppModule {}
68
+ ```
69
+
70
+ **With pool and cookie options:**
52
71
 
53
72
  ```typescript
54
73
  import { Module } from '@nestjs/common';
@@ -58,335 +77,260 @@ import { BrowserActionModule } from '@hanivanrizky/nestjs-browser-action';
58
77
  imports: [
59
78
  BrowserActionModule.forRoot({
60
79
  pool: { min: 2, max: 10 },
61
- cookies: { enabled: true },
80
+ cookies: { enabled: true, cookiesDir: './cookies' },
81
+ logLevel: 'log',
82
+ }),
83
+ ],
84
+ })
85
+ export class AppModule {}
86
+ ```
87
+
88
+ **Async configuration:**
89
+
90
+ ```typescript
91
+ import { Module } from '@nestjs/common';
92
+ import { BrowserActionModule } from '@hanivanrizky/nestjs-browser-action';
93
+ import { ConfigModule, ConfigService } from '@nestjs/config';
94
+
95
+ @Module({
96
+ imports: [
97
+ ConfigModule.forRoot(),
98
+ BrowserActionModule.forRootAsync({
99
+ imports: [ConfigModule],
100
+ useFactory: (configService: ConfigService) => ({
101
+ pool: {
102
+ min: configService.get<number>('POOL_MIN', 2),
103
+ max: configService.get<number>('POOL_MAX', 10),
104
+ },
105
+ cloak: {
106
+ proxy: { server: configService.get<string>('PROXY_URL', '') },
107
+ },
108
+ logLevel: configService.get<string>('LOG_LEVEL', 'log'),
109
+ }),
110
+ inject: [ConfigService],
62
111
  }),
63
112
  ],
64
113
  })
65
114
  export class AppModule {}
66
115
  ```
67
116
 
68
- ### 2. Inject Service
117
+ ### Inject the Service
69
118
 
70
119
  ```typescript
71
120
  import { Injectable } from '@nestjs/common';
72
121
  import { BrowserActionService } from '@hanivanrizky/nestjs-browser-action';
73
122
 
74
123
  @Injectable()
75
- export class MyService {
76
- constructor(
77
- private readonly actionHelpers: BrowserActionService,
78
- ) {}
79
-
80
- async scrapeData() {
81
- const result = await this.actionHelpers.scrape(
82
- 'https://example.com',
83
- {
84
- title: 'h1',
85
- description: 'meta[name="description"]@content',
86
- }
87
- );
88
-
89
- console.log(result.title); // "Example Domain"
90
- console.log(result.description); // "This domain is for use in..."
124
+ export class YourService {
125
+ constructor(private readonly browserAction: BrowserActionService) {}
126
+
127
+ async scrapeProducts() {
128
+ const result = await this.browserAction.evaluateWebsite({
129
+ url: 'https://www.scrapingcourse.com/ecommerce/',
130
+ patterns: [
131
+ {
132
+ key: 'container',
133
+ patternType: 'css',
134
+ returnType: 'text',
135
+ patterns: ['.product'],
136
+ meta: { isContainer: true },
137
+ },
138
+ {
139
+ key: 'name',
140
+ patternType: 'css',
141
+ returnType: 'text',
142
+ patterns: ['h2.woocommerce-loop-product__title'],
143
+ pipes: { trim: true },
144
+ },
145
+ {
146
+ key: 'price',
147
+ patternType: 'css',
148
+ returnType: 'text',
149
+ patterns: ['.price'],
150
+ pipes: { trim: true },
151
+ },
152
+ ],
153
+ });
154
+
155
+ return result.results;
91
156
  }
92
157
  }
93
158
  ```
94
159
 
95
160
  ## Documentation
96
161
 
97
- ### (^_^) Method Documentation
98
-
99
- | Method | Description |
100
- |--------|-------------|
101
- | [`scrape()`](./docs/methods/scrape.md) | Extract single elements |
102
- | [`scrapeAll()`](./docs/methods/scrape-all.md) | Extract multiple elements |
103
- | [`scrapeContainerFields()`](./docs/api-reference.md#scrapecontainerfields) | Extract structured lists with pagination |
104
- | [`scrapeWithWorkflow()`](./docs/methods/workflow.md) | Workflow-based automation |
105
- | [`scrapeAllWithWorkflow()`](./docs/methods/workflow.md) | Workflow with multi-element |
106
- | [`takeScreenshot()`](./docs/methods/screenshots.md) | Capture screenshots |
107
- | [`generatePDF()`](./docs/methods/screenshots.md) | Generate PDFs |
108
- | [`captureTlsFingerprint()`](./docs/api-reference.md#capturetlsfingerprintpath-url-promisetlsfingerprint) | Capture browser's TLS/HTTP fingerprint |
109
- | [Browser & Page Control](./docs/methods/browser-control.md) | Low-level control |
110
-
111
- ### (☆^O^☆) Feature Guides
162
+ ### Features
112
163
 
113
- | Feature | Description |
114
- |---------|-------------|
115
- | [Cookie Management](./docs/features/cookies.md) | Session persistence |
116
- | [Pipe System](./docs/features/pipes.md) | Data transformation |
117
- | [Workflow Actions](./docs/workflow-actions.md) | All action types reference |
164
+ - [Pattern-Based Extraction](docs/methods/scrape.md#evaluatewebsite---unified-xpath-parser-compatible-api) - `evaluateWebsite()` with `PatternField` patterns
165
+ - [Container-Based Extraction](docs/methods/scrape.md#evaluatewebsite---unified-xpath-parser-compatible-api) - Extract lists with `meta.isContainer`
166
+ - [Data Cleaning Pipes](docs/features/pipes.md) - Transform extracted data with pipes
167
+ - [Cookie Management](docs/features/cookies.md) - Session persistence
168
+ - [Workflow Actions](docs/methods/workflow.md) - Declarative step-by-step automation
118
169
 
119
- ### (^_^) API Reference
170
+ ### Reference
120
171
 
121
- - [API Reference](./docs/api-reference.md) - Complete API documentation
122
- - [Configuration](./docs/api-reference.md#configuration) - All options
123
- - [Types](./docs/api-reference.md#types) - TypeScript interfaces
172
+ - [API Reference](docs/api-reference.md) - Complete service API documentation
173
+ - [Workflow Actions Reference](docs/workflow-actions.md) - All action types
174
+ - [Browser & Page Control](docs/methods/browser-control.md) - Low-level control
124
175
 
125
176
  ## Quick Examples
126
177
 
127
- ### Simple Scraping
178
+ ### Simple Product Scraping
128
179
 
129
180
  ```typescript
130
- const data = await this.actionHelpers.scrape('https://example.com', {
131
- title: 'h1',
132
- price: '.price',
133
- });
134
- ```
135
-
136
- ### Multi-Element Scraping
181
+ interface Product {
182
+ name: string;
183
+ price: string;
184
+ }
137
185
 
138
- ```typescript
139
- const data = await this.actionHelpers.scrapeAll('https://example.com', {
140
- titles: '.card h2',
141
- links: '.card a',
186
+ const result = await browserAction.evaluateWebsite<Product>({
187
+ url: 'https://example.com/products',
188
+ patterns: [
189
+ {
190
+ key: 'container',
191
+ patternType: 'css',
192
+ returnType: 'text',
193
+ patterns: ['.product-card'],
194
+ meta: { isContainer: true },
195
+ },
196
+ {
197
+ key: 'name',
198
+ patternType: 'css',
199
+ returnType: 'text',
200
+ patterns: ['h2.name'],
201
+ pipes: { trim: true },
202
+ },
203
+ {
204
+ key: 'price',
205
+ patternType: 'css',
206
+ returnType: 'text',
207
+ patterns: ['.price'],
208
+ pipes: {
209
+ trim: true,
210
+ replace: [{ from: '$', to: '' }],
211
+ },
212
+ },
213
+ ],
142
214
  });
143
215
  ```
144
216
 
145
- ### Workflow Automation
217
+ ### Article Extraction with Fallbacks
146
218
 
147
219
  ```typescript
148
- const workflow = {
149
- version: '1.0' as const,
150
- actions: [
151
- { action: 'navigate' as const, value: 'https://example.com' },
152
- { id: 'title', action: 'extract' as const, target: { type: 'css' as const, value: 'h1' } },
153
- { action: 'click' as const, target: { type: 'css' as const, value: '#button' } },
220
+ const result = await browserAction.evaluateWebsite({
221
+ url: 'https://example.com/article',
222
+ patterns: [
223
+ {
224
+ key: 'title',
225
+ patternType: 'css',
226
+ returnType: 'text',
227
+ patterns: ['meta[property="og:title"]'],
228
+ meta: {
229
+ alterPattern: ['h1', 'title'],
230
+ },
231
+ pipes: { trim: true },
232
+ },
233
+ {
234
+ key: 'description',
235
+ patternType: 'css',
236
+ returnType: 'text',
237
+ patterns: ['meta[name="description"]'],
238
+ pipes: { trim: true, decode: true },
239
+ },
154
240
  ],
155
- };
156
-
157
- const result = await this.actionHelpers.scrapeWithWorkflow(workflow);
158
- ```
159
-
160
- ### Container Extraction (lists + pagination)
161
-
162
- ```typescript
163
- import type { ContainerDescriptor } from '@hanivanrizky/nestjs-browser-action';
164
-
165
- interface Product { name: string; price: string; }
166
-
167
- const descriptor: ContainerDescriptor<Product> = {
168
- container: '.product-card', // CSS or XPath — one node per item
169
- fields: {
170
- name: { selector: 'h2.name' },
171
- price: { selector: '.price' },
172
- },
173
- pagination: {
174
- container: '.pagination',
175
- linkSelector: 'a',
176
- labelSelector: 'a',
177
- },
178
- };
179
-
180
- const { items, pagination } = await this.actionHelpers.scrapeContainerFields<Product>(
181
- 'https://example.com/products',
182
- descriptor,
183
- { currentPage: 1, interceptResource: true, useRandomUserAgent: true },
184
- );
185
-
186
- console.log(items); // [{ name, price }, ...]
187
- console.log(pagination?.nextUrl); // URL of next page, or null
241
+ });
188
242
  ```
189
243
 
190
- ### With Data Cleansing
244
+ ### XPath Extraction
191
245
 
192
246
  ```typescript
193
- import { CleansingType } from '@hanivanrizky/nestjs-browser-action';
194
-
195
- const data = await this.actionHelpers.scrape('https://example.com', {
196
- price: '.price',
197
- }, {
198
- pipes: {
199
- price: [
200
- { type: CleansingType.REMOVE_CURRENCY_SYMBOL },
201
- { type: CleansingType.TO_NUMBER },
202
- ],
203
- },
247
+ const result = await browserAction.evaluateWebsite({
248
+ url: 'https://example.com/sitemap.xml',
249
+ patterns: [
250
+ {
251
+ key: 'container',
252
+ patternType: 'xpath',
253
+ returnType: 'text',
254
+ patterns: ['//url'],
255
+ meta: { isContainer: true },
256
+ },
257
+ {
258
+ key: 'loc',
259
+ patternType: 'xpath',
260
+ returnType: 'text',
261
+ patterns: ['.//loc/text()'],
262
+ },
263
+ ],
204
264
  });
205
265
  ```
206
266
 
207
- ### Cookie Persistence
267
+ ### Workflow Automation
208
268
 
209
269
  ```typescript
210
- const workflow = {
211
- version: '1.0' as const,
270
+ const result = await browserAction.scrapeWithWorkflow({
271
+ version: '1.0',
212
272
  actions: [
213
- { action: 'loadCookies' as const, value: 'user-session', onError: 'skip' as const },
214
- { action: 'navigate' as const, value: 'https://example.com/dashboard' },
215
- { action: 'saveCookies' as const, value: 'user-session', options: { overwrite: true } },
273
+ { action: 'navigate', value: 'https://example.com/login' },
274
+ { action: 'fill', target: { type: 'css', value: '#username' }, value: 'user' },
275
+ { action: 'fill', target: { type: 'css', value: '#password' }, value: 'pass' },
276
+ { action: 'click', target: { type: 'css', value: '[type=submit]' } },
277
+ { action: 'saveCookies', value: 'user-session', options: { overwrite: true } },
278
+ { id: 'title', action: 'extract', target: { type: 'css', value: 'h1' } },
216
279
  ],
217
- };
280
+ });
218
281
  ```
219
282
 
220
283
  ### Stealth (CloakBrowser)
221
284
 
222
- Local browsers launch through CloakBrowser stealth Chromium. Configure anti-detect
223
- features via the `cloak` option:
224
-
225
285
  ```typescript
226
286
  BrowserActionModule.forRoot({
227
287
  cloak: {
228
288
  proxy: { server: 'http://host:port', username: 'user', password: 'pass' },
229
- humanize: true, // human-like mouse/typing
230
- geoip: true, // spoof geolocation from proxy IP
231
- timezone: 'America/New_York', // spoof timezone
232
- locale: 'en-US', // spoof locale
233
- stealthArgs: true, // anti-detect Chromium flags
234
- extensionPaths: ['/path/ext'], // load unpacked extensions
235
- userDataDir: './profile', // persistent profile (launchPersistentContext)
236
- launchOptions: { headless: true, args: ['--no-sandbox'] }, // raw puppeteer-core passthrough
289
+ humanize: true,
290
+ geoip: true,
291
+ timezone: 'America/New_York',
292
+ locale: 'en-US',
293
+ stealthArgs: true,
237
294
  },
238
295
  pool: { min: 2, max: 5 },
239
296
  })
240
297
  ```
241
298
 
242
- `launchOptions` (top-level) is also forwarded to CloakBrowser's `launchOptions`
243
- passthrough for backward compatibility. `cloak` is ignored when `remote` is set
244
- (remote uses plain CDP connect).
245
-
246
- **Per-call cloak override (proxy/UA rotation):** pass `cloak` per request to launch a
247
- dedicated off-pool browser with its own stealth config — useful for rotating proxies or
248
- fingerprints across requests. Not supported in remote CDP mode.
249
-
250
- ```typescript
251
- // scrape / scrapeAll
252
- await actions.scrape(url, { title: 'h1' }, {
253
- cloak: { proxy: { server: 'http://rotating-proxy:8080' } },
254
- });
255
-
256
- // workflow
257
- await actions.scrapeWithWorkflow(url, {
258
- version: '1.0',
259
- cloak: { proxy: { server: 'http://rotating-proxy:8080' } },
260
- actions: [...],
261
- });
262
- ```
263
-
264
- ### Remote Chrome Connection
265
-
266
- Connect to remote Chrome instances via Chrome DevTools Protocol (CDP):
267
-
268
- ```typescript
269
- BrowserActionModule.forRoot({
270
- remote: {
271
- browserURL: 'http://localhost:9222', // Or use browserWSEndpoint
272
- retryMax: 3, // Connection retry attempts
273
- retryDelay: 1000, // Delay between retries (ms)
274
- },
275
- pool: { min: 2, max: 5 },
276
- })
277
- ```
278
-
279
- **Using browserWSEndpoint:**
280
-
281
- ```typescript
282
- BrowserActionModule.forRoot({
283
- remote: {
284
- browserWSEndpoint: 'ws://localhost:9222/devtools/page/abc123',
285
- },
286
- })
287
- ```
288
-
289
- **Remote-first priority:** When both `remote` and `launchOptions` are provided, remote connection takes precedence.
290
-
291
- **See:** [Remote Chrome Configuration](./docs/api-reference.md#remote-chrome-configuration) for details.
292
-
293
- ## Services
294
-
295
- | Service | Description |
296
- |---------|-------------|
297
- | **BrowserActionService** | High-level automation methods (scrape, screenshot, PDF, workflows) |
298
- | **BrowserManagerService** | Browser pool management |
299
- | **PageService** | Page lifecycle and navigation |
300
- | **CookieService** | Cookie persistence |
301
- | **CleansingService** | Data cleansing with pipes |
299
+ ### TLS Fingerprint Capture
302
300
 
303
- ## Configuration
304
-
305
- ### Basic Configuration
301
+ Capture the browser's own TLS fingerprint for use with `nestjs-xpath-parser`'s CycleTLS engine:
306
302
 
307
303
  ```typescript
308
- BrowserActionModule.forRoot({
309
- pool: {
310
- min: 2,
311
- max: 10,
312
- idleTimeoutMs: 30000, // reap idle browsers down to min (0 disables)
313
- acquireTimeoutMs: 30000, // reject acquire() if none free in time (0 waits forever)
314
- strategy: 'round-robin',
315
- },
316
- cookies: {
317
- enabled: true,
318
- cookiesDir: './cookies',
319
- },
320
- logLevel: 'log',
321
- })
322
- ```
323
-
324
- ### All Options
325
-
326
- See [Configuration Reference](./docs/api-reference.md#configuration) for complete options.
327
-
328
- ## Type Safety
329
-
330
- Full TypeScript support with generics:
331
-
332
- ```typescript
333
- // Type-safe selectors
334
- interface ProductSelectors {
335
- title: string;
336
- price: number;
337
- }
338
-
339
- const result = await this.actionHelpers.scrape<ProductSelectors>(url, {
340
- title: 'h1',
341
- price: '.price',
342
- });
343
-
344
- // Type-safe workflow results
345
- const workflow = await this.actionHelpers.scrapeWithWorkflow<{
346
- title: string;
347
- price: number;
348
- }>(url, workflow);
304
+ const fingerprint = await browserAction.captureTlsFingerprint('./fingerprint.json');
305
+ // fingerprint.json can be passed to ScraperHtmlModule.forRoot({ fingerprint: './fingerprint.json' })
349
306
  ```
350
307
 
351
308
  ## Development
352
309
 
353
- ### Scripts
354
-
355
310
  ```bash
311
+ # Install dependencies
312
+ pnpm install
313
+
356
314
  # Build
357
315
  pnpm build
358
316
 
359
- # Run tests
317
+ # Test
360
318
  pnpm test
319
+ pnpm test:cov
361
320
 
362
- # Lint code
321
+ # Lint
363
322
  pnpm lint
364
-
365
- # Format code
366
323
  pnpm format
367
324
  ```
368
325
 
369
- ### Git Hooks
326
+ ## Contributing
370
327
 
371
- - **Pre-commit**: Runs ESLint
372
- - **Pre-push**: Runs build and tests
328
+ 1. Fork the repository
329
+ 2. Create your feature branch (`git checkout -b feature/yourusername/amazing-feature`)
330
+ 3. Commit your changes (`git commit -m 'Add some amazing feature'`)
331
+ 4. Push to the branch (`git push origin feature/yourusername/amazing-feature`)
332
+ 5. Open a Pull Request
373
333
 
374
334
  ## License
375
335
 
376
- MIT
377
-
378
- ## Support
379
-
380
- For issues and questions, please use [GitHub Issues](https://github.com/Hanivan/nestjs-browser-action/issues).
381
-
382
- ## Examples
383
-
384
- Check out the test project for complete examples: [test-browser-action](https://github.com/Hanivan/test-browser-action)
385
-
386
- ---
387
-
388
- **Documentation:**
389
- - [Methods](./docs/methods) - Method-specific guides
390
- - [Features](./docs/features) - Feature guides
391
- - [API Reference](./docs/api-reference.md) - Complete API
392
- - [Workflow Actions](./docs/workflow-actions.md) - Action reference
336
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
@@ -25,9 +25,9 @@ export interface BrowserActionOptions {
25
25
  contextOptions?: BrowserContextOptions;
26
26
  pool?: PoolOptions;
27
27
  multiContext?: boolean;
28
- logLevel?: LogLevel;
28
+ logLevel?: LogLevel | LogLevel[];
29
29
  debugLogMaxLength?: number;
30
30
  remote?: RemoteOptions;
31
31
  cookies?: CookieOptions;
32
- customPipes?: Record<string, new () => CleansingPipe>;
32
+ customPipes?: Record<string, new (...args: unknown[]) => CleansingPipe>;
33
33
  }
@@ -1,14 +1,9 @@
1
1
  import { CleansingProfile } from '../enums/cleansing-profile.enum';
2
- import type { PipeConfig } from './types';
2
+ import type { CleanerStepRules } from '../pipes/pipe-engine';
3
3
  export interface CleansingOptions {
4
- pipes?: PipeConfig[];
4
+ pipes?: CleanerStepRules;
5
5
  profile?: CleansingProfile;
6
6
  }
7
- export interface CleansingWithAltOptions {
8
- primaryPipes: PipeConfig[];
9
- fallbackPipes: PipeConfig[];
10
- fallbackOn?: 'empty' | 'null' | 'undefined' | 'all';
11
- }
12
7
  export interface ScrapeCleansingOptions {
13
- pipes?: Record<string, PipeConfig[]>;
8
+ pipes?: Record<string, CleanerStepRules>;
14
9
  }