@hanivanrizky/nestjs-browser-action 0.14.0 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
[](https://www.npmjs.com/package/@hanivanrizky/nestjs-browser-action)
|
|
4
4
|
[](https://opensource.org/licenses/MIT)
|
|
5
|
-
[]()
|
|
6
6
|
|
|
7
7
|
> **⚠️ Status: Experimental**
|
|
8
8
|
>
|
|
@@ -13,7 +13,8 @@ A NestJS module that provides stealth browser automation (CloakBrowser + puppete
|
|
|
13
13
|
## Features
|
|
14
14
|
|
|
15
15
|
- (・_・) **Browser Automation**: Declarative workflow-based browser automation
|
|
16
|
-
- (☆^O^☆) **Data Scraping**: Single
|
|
16
|
+
- (☆^O^☆) **Data Scraping**: Single, multi-element, and container-based list scraping with CSS/XPath
|
|
17
|
+
- (◕‿◕) **Pagination**: Automatic next-page resolution from container scrapes
|
|
17
18
|
- (>_>) **Connection Pooling**: Efficient browser instance management
|
|
18
19
|
- (♡˙︶˙♡) **Cookie Persistence**: Save/load browser sessions for authentication
|
|
19
20
|
- (。•̀ᴗ-)✧ **Data Cleansing**: 14 built-in transformation pipes
|
|
@@ -99,6 +100,7 @@ export class MyService {
|
|
|
99
100
|
|--------|-------------|
|
|
100
101
|
| [`scrape()`](./docs/methods/scrape.md) | Extract single elements |
|
|
101
102
|
| [`scrapeAll()`](./docs/methods/scrape-all.md) | Extract multiple elements |
|
|
103
|
+
| [`scrapeContainerFields()`](./docs/api-reference.md#scrapecontainerfields) | Extract structured lists with pagination |
|
|
102
104
|
| [`scrapeWithWorkflow()`](./docs/methods/workflow.md) | Workflow-based automation |
|
|
103
105
|
| [`scrapeAllWithWorkflow()`](./docs/methods/workflow.md) | Workflow with multi-element |
|
|
104
106
|
| [`takeScreenshot()`](./docs/methods/screenshots.md) | Capture screenshots |
|
|
@@ -155,6 +157,36 @@ const workflow = {
|
|
|
155
157
|
const result = await this.actionHelpers.scrapeWithWorkflow(workflow);
|
|
156
158
|
```
|
|
157
159
|
|
|
160
|
+
### Container Extraction (lists + pagination)
|
|
161
|
+
|
|
162
|
+
```typescript
|
|
163
|
+
import type { ContainerDescriptor } from '@hanivanrizky/nestjs-browser-action';
|
|
164
|
+
|
|
165
|
+
interface Product { name: string; price: string; }
|
|
166
|
+
|
|
167
|
+
const descriptor: ContainerDescriptor<Product> = {
|
|
168
|
+
container: '.product-card', // CSS or XPath — one node per item
|
|
169
|
+
fields: {
|
|
170
|
+
name: { selector: 'h2.name' },
|
|
171
|
+
price: { selector: '.price' },
|
|
172
|
+
},
|
|
173
|
+
pagination: {
|
|
174
|
+
container: '.pagination',
|
|
175
|
+
linkSelector: 'a',
|
|
176
|
+
labelSelector: 'a',
|
|
177
|
+
},
|
|
178
|
+
};
|
|
179
|
+
|
|
180
|
+
const { items, pagination } = await this.actionHelpers.scrapeContainerFields<Product>(
|
|
181
|
+
'https://example.com/products',
|
|
182
|
+
descriptor,
|
|
183
|
+
{ currentPage: 1, interceptResource: true, useRandomUserAgent: true },
|
|
184
|
+
);
|
|
185
|
+
|
|
186
|
+
console.log(items); // [{ name, price }, ...]
|
|
187
|
+
console.log(pagination?.nextUrl); // URL of next page, or null
|
|
188
|
+
```
|
|
189
|
+
|
|
158
190
|
### With Data Cleansing
|
|
159
191
|
|
|
160
192
|
```typescript
|