@hanivanrizky/nestjs-browser-action 0.8.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -7
- package/dist/browser-action.module.js +6 -6
- package/dist/browser-action.module.js.map +1 -1
- package/dist/constants/browser-action.constants.d.ts +1 -0
- package/dist/constants/browser-action.constants.js +1 -0
- package/dist/constants/browser-action.constants.js.map +1 -1
- package/dist/index.d.ts +2 -2
- package/dist/index.js +2 -2
- package/dist/index.js.map +1 -1
- package/dist/interfaces/browser-action-options.d.ts +2 -0
- package/dist/interfaces/pool-options.d.ts +1 -0
- package/dist/interfaces/types.d.ts +5 -1
- package/dist/interfaces/workflow-options.d.ts +2 -0
- package/dist/pipes/remove-line-breaks.pipe.js +1 -1
- package/dist/pipes/remove-line-breaks.pipe.js.map +1 -1
- package/dist/pipes/remove-special-chars.pipe.js +1 -1
- package/dist/pipes/remove-special-chars.pipe.js.map +1 -1
- package/dist/pipes/sanitize-text.pipe.js +1 -1
- package/dist/pipes/sanitize-text.pipe.js.map +1 -1
- package/dist/{helpers/action-helpers.service.d.ts → services/browser-action.service.d.ts} +9 -6
- package/dist/{helpers/action-helpers.service.js → services/browser-action.service.js} +95 -36
- package/dist/services/browser-action.service.js.map +1 -0
- package/dist/services/browser-manager.service.d.ts +4 -1
- package/dist/services/browser-manager.service.js +10 -2
- package/dist/services/browser-manager.service.js.map +1 -1
- package/dist/services/browser-pool.service.d.ts +17 -1
- package/dist/services/browser-pool.service.js +152 -25
- package/dist/services/browser-pool.service.js.map +1 -1
- package/dist/services/cleansing.service.d.ts +7 -2
- package/dist/services/cleansing.service.js +33 -7
- package/dist/services/cleansing.service.js.map +1 -1
- package/dist/services/cookie.service.d.ts +1 -1
- package/dist/services/cookie.service.js +2 -2
- package/dist/services/cookie.service.js.map +1 -1
- package/dist/services/index.d.ts +2 -1
- package/dist/services/index.js +2 -1
- package/dist/services/index.js.map +1 -1
- package/dist/services/{page-service.d.ts → page.service.d.ts} +4 -2
- package/dist/services/{page-service.js → page.service.js} +22 -7
- package/dist/services/page.service.js.map +1 -0
- package/dist/tsconfig.build.tsbuildinfo +1 -1
- package/dist/utils/cloak.loader.js.map +1 -0
- package/dist/utils/delay.util.js.map +1 -0
- package/dist/utils/dom.util.js.map +1 -0
- package/dist/{helpers → utils}/index.d.ts +2 -1
- package/dist/{helpers → utils}/index.js +2 -1
- package/dist/utils/index.js.map +1 -0
- package/dist/utils/logger.util.js.map +1 -0
- package/dist/utils/string.util.js.map +1 -0
- package/dist/utils/truncate-log.util.js.map +1 -0
- package/package.json +1 -1
- package/dist/helpers/action-helpers.service.js.map +0 -1
- package/dist/helpers/delay.util.js.map +0 -1
- package/dist/helpers/dom.util.js.map +0 -1
- package/dist/helpers/index.js.map +0 -1
- package/dist/helpers/logger.util.js.map +0 -1
- package/dist/helpers/string.util.js.map +0 -1
- package/dist/helpers/truncate-log.util.js.map +0 -1
- package/dist/services/cloak.loader.js.map +0 -1
- package/dist/services/page-service.js.map +0 -1
- /package/dist/{services → utils}/cloak.loader.d.ts +0 -0
- /package/dist/{services → utils}/cloak.loader.js +0 -0
- /package/dist/{helpers → utils}/delay.util.d.ts +0 -0
- /package/dist/{helpers → utils}/delay.util.js +0 -0
- /package/dist/{helpers → utils}/dom.util.d.ts +0 -0
- /package/dist/{helpers → utils}/dom.util.js +0 -0
- /package/dist/{helpers → utils}/logger.util.d.ts +0 -0
- /package/dist/{helpers → utils}/logger.util.js +0 -0
- /package/dist/{helpers → utils}/string.util.d.ts +0 -0
- /package/dist/{helpers → utils}/string.util.js +0 -0
- /package/dist/{helpers → utils}/truncate-log.util.d.ts +0 -0
- /package/dist/{helpers → utils}/truncate-log.util.js +0 -0
package/README.md
CHANGED
|
@@ -66,12 +66,12 @@ export class AppModule {}
|
|
|
66
66
|
|
|
67
67
|
```typescript
|
|
68
68
|
import { Injectable } from '@nestjs/common';
|
|
69
|
-
import {
|
|
69
|
+
import { BrowserActionService } from '@hanivanrizky/nestjs-browser-action';
|
|
70
70
|
|
|
71
71
|
@Injectable()
|
|
72
72
|
export class MyService {
|
|
73
73
|
constructor(
|
|
74
|
-
private readonly actionHelpers:
|
|
74
|
+
private readonly actionHelpers: BrowserActionService,
|
|
75
75
|
) {}
|
|
76
76
|
|
|
77
77
|
async scrapeData() {
|
|
@@ -97,7 +97,7 @@ export class MyService {
|
|
|
97
97
|
|--------|-------------|
|
|
98
98
|
| [`scrape()`](./docs/methods/scrape.md) | Extract single elements |
|
|
99
99
|
| [`scrapeAll()`](./docs/methods/scrape-all.md) | Extract multiple elements |
|
|
100
|
-
| [`
|
|
100
|
+
| [`scrapeWithWorkflow()`](./docs/methods/workflow.md) | Workflow-based automation |
|
|
101
101
|
| [`scrapeAllWithWorkflow()`](./docs/methods/workflow.md) | Workflow with multi-element |
|
|
102
102
|
| [`takeScreenshot()`](./docs/methods/screenshots.md) | Capture screenshots |
|
|
103
103
|
| [`generatePDF()`](./docs/methods/screenshots.md) | Generate PDFs |
|
|
@@ -149,7 +149,7 @@ const workflow = {
|
|
|
149
149
|
],
|
|
150
150
|
};
|
|
151
151
|
|
|
152
|
-
const result = await this.actionHelpers.
|
|
152
|
+
const result = await this.actionHelpers.scrapeWithWorkflow(workflow);
|
|
153
153
|
```
|
|
154
154
|
|
|
155
155
|
### With Data Cleansing
|
|
@@ -208,6 +208,24 @@ BrowserActionModule.forRoot({
|
|
|
208
208
|
passthrough for backward compatibility. `cloak` is ignored when `remote` is set
|
|
209
209
|
(remote uses plain CDP connect).
|
|
210
210
|
|
|
211
|
+
**Per-call cloak override (proxy/UA rotation):** pass `cloak` per request to launch a
|
|
212
|
+
dedicated off-pool browser with its own stealth config — useful for rotating proxies or
|
|
213
|
+
fingerprints across requests. Not supported in remote CDP mode.
|
|
214
|
+
|
|
215
|
+
```typescript
|
|
216
|
+
// scrape / scrapeAll
|
|
217
|
+
await actions.scrape(url, { title: 'h1' }, {
|
|
218
|
+
cloak: { proxy: { server: 'http://rotating-proxy:8080' } },
|
|
219
|
+
});
|
|
220
|
+
|
|
221
|
+
// workflow
|
|
222
|
+
await actions.scrapeWithWorkflow(url, {
|
|
223
|
+
version: '1.0',
|
|
224
|
+
cloak: { proxy: { server: 'http://rotating-proxy:8080' } },
|
|
225
|
+
actions: [...],
|
|
226
|
+
});
|
|
227
|
+
```
|
|
228
|
+
|
|
211
229
|
### Remote Chrome Connection
|
|
212
230
|
|
|
213
231
|
Connect to remote Chrome instances via Chrome DevTools Protocol (CDP):
|
|
@@ -241,7 +259,7 @@ BrowserActionModule.forRoot({
|
|
|
241
259
|
|
|
242
260
|
| Service | Description |
|
|
243
261
|
|---------|-------------|
|
|
244
|
-
| **
|
|
262
|
+
| **BrowserActionService** | High-level automation methods (scrape, screenshot, PDF, workflows) |
|
|
245
263
|
| **BrowserManagerService** | Browser pool management |
|
|
246
264
|
| **PageService** | Page lifecycle and navigation |
|
|
247
265
|
| **CookieService** | Cookie persistence |
|
|
@@ -256,7 +274,8 @@ BrowserActionModule.forRoot({
|
|
|
256
274
|
pool: {
|
|
257
275
|
min: 2,
|
|
258
276
|
max: 10,
|
|
259
|
-
idleTimeoutMs: 30000,
|
|
277
|
+
idleTimeoutMs: 30000, // reap idle browsers down to min (0 disables)
|
|
278
|
+
acquireTimeoutMs: 30000, // reject acquire() if none free in time (0 waits forever)
|
|
260
279
|
strategy: 'round-robin',
|
|
261
280
|
},
|
|
262
281
|
cookies: {
|
|
@@ -288,7 +307,7 @@ const result = await this.actionHelpers.scrape<ProductSelectors>(url, {
|
|
|
288
307
|
});
|
|
289
308
|
|
|
290
309
|
// Type-safe workflow results
|
|
291
|
-
const workflow = await this.actionHelpers.
|
|
310
|
+
const workflow = await this.actionHelpers.scrapeWithWorkflow<{
|
|
292
311
|
title: string;
|
|
293
312
|
price: number;
|
|
294
313
|
}>(url, workflow);
|
|
@@ -11,8 +11,8 @@ exports.BrowserActionModule = void 0;
|
|
|
11
11
|
const common_1 = require("@nestjs/common");
|
|
12
12
|
const browser_pool_service_1 = require("./services/browser-pool.service");
|
|
13
13
|
const browser_manager_service_1 = require("./services/browser-manager.service");
|
|
14
|
-
const page_service_1 = require("./services/page
|
|
15
|
-
const
|
|
14
|
+
const page_service_1 = require("./services/page.service");
|
|
15
|
+
const browser_action_service_1 = require("./services/browser-action.service");
|
|
16
16
|
const cookie_service_1 = require("./services/cookie.service");
|
|
17
17
|
const cleansing_service_1 = require("./services/cleansing.service");
|
|
18
18
|
const constants_1 = require("./constants");
|
|
@@ -40,14 +40,14 @@ let BrowserActionModule = BrowserActionModule_1 = class BrowserActionModule {
|
|
|
40
40
|
browser_pool_service_1.BrowserPoolService,
|
|
41
41
|
browser_manager_service_1.BrowserManagerService,
|
|
42
42
|
page_service_1.PageService,
|
|
43
|
-
|
|
43
|
+
browser_action_service_1.BrowserActionService,
|
|
44
44
|
cookie_service_1.CookieService,
|
|
45
45
|
cleansing_service_1.CleansingService,
|
|
46
46
|
],
|
|
47
47
|
exports: [
|
|
48
48
|
browser_manager_service_1.BrowserManagerService,
|
|
49
49
|
page_service_1.PageService,
|
|
50
|
-
|
|
50
|
+
browser_action_service_1.BrowserActionService,
|
|
51
51
|
cookie_service_1.CookieService,
|
|
52
52
|
cleansing_service_1.CleansingService,
|
|
53
53
|
],
|
|
@@ -69,14 +69,14 @@ let BrowserActionModule = BrowserActionModule_1 = class BrowserActionModule {
|
|
|
69
69
|
browser_pool_service_1.BrowserPoolService,
|
|
70
70
|
browser_manager_service_1.BrowserManagerService,
|
|
71
71
|
page_service_1.PageService,
|
|
72
|
-
|
|
72
|
+
browser_action_service_1.BrowserActionService,
|
|
73
73
|
cookie_service_1.CookieService,
|
|
74
74
|
cleansing_service_1.CleansingService,
|
|
75
75
|
],
|
|
76
76
|
exports: [
|
|
77
77
|
browser_manager_service_1.BrowserManagerService,
|
|
78
78
|
page_service_1.PageService,
|
|
79
|
-
|
|
79
|
+
browser_action_service_1.BrowserActionService,
|
|
80
80
|
cookie_service_1.CookieService,
|
|
81
81
|
cleansing_service_1.CleansingService,
|
|
82
82
|
],
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"browser-action.module.js","sourceRoot":"","sources":["../src/browser-action.module.ts"],"names":[],"mappings":";;;;;;;;;;AAAA,2CAAyE;AAKzE,0EAAqE;AACrE,gFAA2E;AAC3E,0DAAsD;AACtD,
|
|
1
|
+
{"version":3,"file":"browser-action.module.js","sourceRoot":"","sources":["../src/browser-action.module.ts"],"names":[],"mappings":";;;;;;;;;;AAAA,2CAAyE;AAKzE,0EAAqE;AACrE,gFAA2E;AAC3E,0DAAsD;AACtD,8EAAyE;AACzE,8DAA0D;AAC1D,oEAAgE;AAChE,2CAAqD;AAI9C,IAAM,mBAAmB,2BAAzB,MAAM,mBAAmB;IAC9B,MAAM,CAAC,QAAQ,CAAC,OAAmC;QACjD,MAAM,eAAe,GAAa;YAChC,OAAO,EAAE,kCAAsB;YAC/B,QAAQ,EAAE,OAAO;SAClB,CAAC;QAEF,OAAO;YACL,MAAM,EAAE,qBAAmB;YAC3B,SAAS,EAAE,CAAC,eAAe,CAAC;YAC5B,OAAO,EAAE,EAAE;SACZ,CAAC;IACJ,CAAC;IAED,MAAM,CAAC,OAAO,CAAC,OAAmC;QAChD,MAAM,eAAe,GAAa;YAChC,OAAO,EAAE,kCAAsB;YAC/B,QAAQ,EAAE,OAAO;SAClB,CAAC;QAEF,OAAO;YACL,MAAM,EAAE,qBAAmB;YAC3B,SAAS,EAAE;gBACT,eAAe;gBACf,yCAAkB;gBAClB,+CAAqB;gBACrB,0BAAW;gBACX,6CAAoB;gBACpB,8BAAa;gBACb,oCAAgB;aACjB;YACD,OAAO,EAAE;gBACP,+CAAqB;gBACrB,0BAAW;gBACX,6CAAoB;gBACpB,8BAAa;gBACb,oCAAgB;aACjB;SACF,CAAC;IACJ,CAAC;IAED,MAAM,CAAC,YAAY,CAAC,OAAwC;QAC1D,MAAM,oBAAoB,GAAa;YACrC,OAAO,EAAE,kCAAsB;YAC/B,UAAU,EAAE,KAAK,EAAE,GAAG,IAAe,EAAE,EAAE;gBACvC,OAAO,MAAM,OAAO,CAAC,UAAU,CAAC,GAAG,IAAI,CAAC,CAAC;YAC3C,CAAC;YACD,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,EAAE;SAC7B,CAAC;QAEF,OAAO;YACL,MAAM,EAAE,qBAAmB;YAC3B,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,EAAE;YAC9B,SAAS,EAAE;gBACT,oBAAoB;gBACpB,yCAAkB;gBAClB,+CAAqB;gBACrB,0BAAW;gBACX,6CAAoB;gBACpB,8BAAa;gBACb,oCAAgB;aACjB;YACD,OAAO,EAAE;gBACP,+CAAqB;gBACrB,0BAAW;gBACX,6CAAoB;gBACpB,8BAAa;gBACb,oCAAgB;aACjB;SACF,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,eAAe;IAErB,CAAC;CACF,CAAA;AA3EY,kDAAmB;8BAAnB,mBAAmB;IAF/B,IAAA,eAAM,GAAE;IACR,IAAA,eAAM,EAAC,EAAE,CAAC;GACE,mBAAmB,CA2E/B"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"browser-action.constants.js","sourceRoot":"","sources":["../../src/constants/browser-action.constants.ts"],"names":[],"mappings":";;;AAAa,QAAA,sBAAsB,GAAG,wBAAwB,CAAC;AAElD,QAAA,oBAAoB,GAAG;IAClC,GAAG,EAAE,CAAC;IACN,GAAG,EAAE,EAAE;IACP,aAAa,EAAE,KAAK;IACpB,QAAQ,EAAE,aAAsB;CACjC,CAAC;AAEW,QAAA,sBAAsB,GAAG;IACpC,QAAQ,EAAE,IAAI;CACf,CAAC;AAEW,QAAA,sBAAsB,GAAG;IACpC,QAAQ,EAAE,CAAC;IACX,UAAU,EAAE,IAAI;CACR,CAAC;AAEE,QAAA,8BAA8B,GAAG,GAAG,CAAC;AAErC,QAAA,cAAc,GAAG;IAC5B,oBAAoB,EAClB,2EAA2E;IAC7E,oBAAoB,EAClB,iEAAiE;CAC3D,CAAC;AAEE,QAAA,uBAAuB,GAAG;IACrC,QAAQ,EAAE;QACR,KAAK,EAAE,IAAI;QACX,MAAM,EAAE,IAAI;KACb;CACF,CAAC;AAGW,QAAA,sBAAsB,GAAG,KAAK,CAAC;AAC/B,QAAA,4BAA4B,GAAG,GAAG,CAAC;AACnC,QAAA,0BAA0B,GAAG,KAAK,CAAC;AACnC,QAAA,uBAAuB,GAAG,GAAG,CAAC;AAC9B,QAAA,2BAA2B,GAAG,YAAY,CAAC;AAC3C,QAAA,iCAAiC,GAAG,OAAO,CAAC;AAC5C,QAAA,sBAAsB,GAAG;IACpC,OAAO,EAAE,IAAI;IACb,UAAU,EAAE,WAAW;IACvB,QAAQ,EAAE,KAAK;IACf,QAAQ,EAAE,KAAK;IACf,kBAAkB,EAAE,SAAS;CACrB,CAAC"}
|
|
1
|
+
{"version":3,"file":"browser-action.constants.js","sourceRoot":"","sources":["../../src/constants/browser-action.constants.ts"],"names":[],"mappings":";;;AAAa,QAAA,sBAAsB,GAAG,wBAAwB,CAAC;AAElD,QAAA,oBAAoB,GAAG;IAClC,GAAG,EAAE,CAAC;IACN,GAAG,EAAE,EAAE;IACP,aAAa,EAAE,KAAK;IACpB,gBAAgB,EAAE,KAAK;IACvB,QAAQ,EAAE,aAAsB;CACjC,CAAC;AAEW,QAAA,sBAAsB,GAAG;IACpC,QAAQ,EAAE,IAAI;CACf,CAAC;AAEW,QAAA,sBAAsB,GAAG;IACpC,QAAQ,EAAE,CAAC;IACX,UAAU,EAAE,IAAI;CACR,CAAC;AAEE,QAAA,8BAA8B,GAAG,GAAG,CAAC;AAErC,QAAA,cAAc,GAAG;IAC5B,oBAAoB,EAClB,2EAA2E;IAC7E,oBAAoB,EAClB,iEAAiE;CAC3D,CAAC;AAEE,QAAA,uBAAuB,GAAG;IACrC,QAAQ,EAAE;QACR,KAAK,EAAE,IAAI;QACX,MAAM,EAAE,IAAI;KACb;CACF,CAAC;AAGW,QAAA,sBAAsB,GAAG,KAAK,CAAC;AAC/B,QAAA,4BAA4B,GAAG,GAAG,CAAC;AACnC,QAAA,0BAA0B,GAAG,KAAK,CAAC;AACnC,QAAA,uBAAuB,GAAG,GAAG,CAAC;AAC9B,QAAA,2BAA2B,GAAG,YAAY,CAAC;AAC3C,QAAA,iCAAiC,GAAG,OAAO,CAAC;AAC5C,QAAA,sBAAsB,GAAG;IACpC,OAAO,EAAE,IAAI;IACb,UAAU,EAAE,WAAW;IACvB,QAAQ,EAAE,KAAK;IACf,QAAQ,EAAE,KAAK;IACf,kBAAkB,EAAE,SAAS;CACrB,CAAC"}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
export * from './browser-action.module';
|
|
2
2
|
export * from './services/browser-pool.service';
|
|
3
3
|
export * from './services/browser-manager.service';
|
|
4
|
-
export * from './services/page
|
|
4
|
+
export * from './services/page.service';
|
|
5
5
|
export * from './services/cookie.service';
|
|
6
6
|
export * from './services/cleansing.service';
|
|
7
|
-
export * from './
|
|
7
|
+
export * from './services/browser-action.service';
|
|
8
8
|
export * from './pipes';
|
|
9
9
|
export * from './enums';
|
|
10
10
|
export * from './interfaces';
|
package/dist/index.js
CHANGED
|
@@ -17,10 +17,10 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
17
17
|
__exportStar(require("./browser-action.module"), exports);
|
|
18
18
|
__exportStar(require("./services/browser-pool.service"), exports);
|
|
19
19
|
__exportStar(require("./services/browser-manager.service"), exports);
|
|
20
|
-
__exportStar(require("./services/page
|
|
20
|
+
__exportStar(require("./services/page.service"), exports);
|
|
21
21
|
__exportStar(require("./services/cookie.service"), exports);
|
|
22
22
|
__exportStar(require("./services/cleansing.service"), exports);
|
|
23
|
-
__exportStar(require("./
|
|
23
|
+
__exportStar(require("./services/browser-action.service"), exports);
|
|
24
24
|
__exportStar(require("./pipes"), exports);
|
|
25
25
|
__exportStar(require("./enums"), exports);
|
|
26
26
|
__exportStar(require("./interfaces"), exports);
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;AACA,0DAAwC;AAGxC,kEAAgD;AAChD,qEAAmD;AACnD,0DAAwC;AACxC,4DAA0C;AAC1C,+DAA6C;AAG7C,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;AACA,0DAAwC;AAGxC,kEAAgD;AAChD,qEAAmD;AACnD,0DAAwC;AACxC,4DAA0C;AAC1C,+DAA6C;AAG7C,oEAAkD;AAGlD,0CAAwB;AAGxB,0CAAwB;AAGxB,+CAA6B;AAG7B,8CAA4B;AAG5B,+CAA6B"}
|
|
@@ -2,6 +2,7 @@ import type { LogLevel } from '@nestjs/common';
|
|
|
2
2
|
import type { BrowserContextOptions, LaunchOptions } from 'puppeteer-core';
|
|
3
3
|
import type { LaunchOptions as CloakLaunchOptions } from 'cloakbrowser';
|
|
4
4
|
import { PoolOptions } from './pool-options';
|
|
5
|
+
import type { CleansingPipe } from '../pipes/cleansing-pipe';
|
|
5
6
|
export type CloakOptions = CloakLaunchOptions & {
|
|
6
7
|
userDataDir?: string;
|
|
7
8
|
};
|
|
@@ -28,4 +29,5 @@ export interface BrowserActionOptions {
|
|
|
28
29
|
debugLogMaxLength?: number;
|
|
29
30
|
remote?: RemoteOptions;
|
|
30
31
|
cookies?: CookieOptions;
|
|
32
|
+
customPipes?: Record<string, new () => CleansingPipe>;
|
|
31
33
|
}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { CleansingType } from '../enums/cleansing-type.enum';
|
|
2
|
+
import type { CloakOptions } from './browser-action-options';
|
|
2
3
|
export interface PipeConfig {
|
|
3
|
-
type: CleansingType;
|
|
4
|
+
type: CleansingType | string;
|
|
4
5
|
pattern?: string;
|
|
5
6
|
replacement?: string;
|
|
6
7
|
format?: string;
|
|
@@ -16,6 +17,9 @@ export type ScrapeResult = Partial<Record<string, unknown>>;
|
|
|
16
17
|
export type ScrapeAllResult = Partial<Record<string, unknown[]>>;
|
|
17
18
|
export type ScraperOptions = {
|
|
18
19
|
pipes?: PipeOptions;
|
|
20
|
+
waitUntil?: 'load' | 'domcontentloaded' | 'networkidle0' | 'networkidle2';
|
|
21
|
+
timeout?: number;
|
|
22
|
+
cloak?: CloakOptions;
|
|
19
23
|
};
|
|
20
24
|
export type ScraperOptionsTyped<T extends SelectorMap> = {
|
|
21
25
|
pipes?: Partial<Record<keyof T, PipeConfig[]>>;
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { PipeConfig } from './types';
|
|
2
|
+
import type { CloakOptions } from './browser-action-options';
|
|
2
3
|
export interface ActionTarget {
|
|
3
4
|
type: 'css' | 'xpath';
|
|
4
5
|
value?: string;
|
|
@@ -45,6 +46,7 @@ export interface WorkflowDefinition {
|
|
|
45
46
|
actions: WorkflowAction[];
|
|
46
47
|
onError?: WorkflowErrorConfig;
|
|
47
48
|
debugLogMaxLength?: number;
|
|
49
|
+
cloak?: CloakOptions;
|
|
48
50
|
}
|
|
49
51
|
export interface WorkflowResult {
|
|
50
52
|
success: boolean;
|
|
@@ -13,7 +13,7 @@ exports.RemoveLineBreaksPipe = void 0;
|
|
|
13
13
|
const class_validator_1 = require("class-validator");
|
|
14
14
|
const cleansing_pipe_1 = require("./cleansing-pipe");
|
|
15
15
|
const cleansing_type_enum_1 = require("../enums/cleansing-type.enum");
|
|
16
|
-
const string_util_1 = require("../
|
|
16
|
+
const string_util_1 = require("../utils/string.util");
|
|
17
17
|
class RemoveLineBreaksPipe extends cleansing_pipe_1.CleansingPipe {
|
|
18
18
|
type = cleansing_type_enum_1.CleansingType.REMOVE_LINE_BREAKS;
|
|
19
19
|
replaceWithSpace;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"remove-line-breaks.pipe.js","sourceRoot":"","sources":["../../src/pipes/remove-line-breaks.pipe.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,qDAAwD;AACxD,qDAAiD;AACjD,sEAA6D;AAC7D,
|
|
1
|
+
{"version":3,"file":"remove-line-breaks.pipe.js","sourceRoot":"","sources":["../../src/pipes/remove-line-breaks.pipe.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,qDAAwD;AACxD,qDAAiD;AACjD,sEAA6D;AAC7D,sDAA2D;AAK3D,MAAa,oBAAqB,SAAQ,8BAA6B;IACrE,IAAI,GAAG,mCAAa,CAAC,kBAAkB,CAAC;IAIxC,gBAAgB,CAAW;IAE3B,IAAI,CAAC,KAAa;QAChB,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,CAAC,KAAK,EAAE,CAAC;YACxC,OAAO,KAAK,CAAC;QACf,CAAC;QAED,IAAI,MAAM,GAAG,KAAK,CAAC;QAEnB,IAAI,IAAI,CAAC,gBAAgB,EAAE,CAAC;YAC1B,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,iCAAiC,EAAE,GAAG,CAAC,CAAC;YAChE,MAAM,GAAG,IAAA,iCAAmB,EAAC,MAAM,CAAC,CAAC;QACvC,CAAC;aAAM,CAAC;YACN,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,iCAAiC,EAAE,EAAE,CAAC,CAAC;QACjE,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;CACF;AAvBD,oDAuBC;AAlBC;IAFC,IAAA,4BAAU,GAAE;IACZ,IAAA,2BAAS,GAAE;;8DACe"}
|
|
@@ -13,7 +13,7 @@ exports.RemoveSpecialCharsPipe = void 0;
|
|
|
13
13
|
const class_validator_1 = require("class-validator");
|
|
14
14
|
const cleansing_pipe_1 = require("./cleansing-pipe");
|
|
15
15
|
const cleansing_type_enum_1 = require("../enums/cleansing-type.enum");
|
|
16
|
-
const string_util_1 = require("../
|
|
16
|
+
const string_util_1 = require("../utils/string.util");
|
|
17
17
|
class RemoveSpecialCharsPipe extends cleansing_pipe_1.CleansingPipe {
|
|
18
18
|
type = cleansing_type_enum_1.CleansingType.REMOVE_SPECIAL_CHARS;
|
|
19
19
|
allowedChars;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"remove-special-chars.pipe.js","sourceRoot":"","sources":["../../src/pipes/remove-special-chars.pipe.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,qDAAuD;AACvD,qDAAiD;AACjD,sEAA6D;AAC7D,
|
|
1
|
+
{"version":3,"file":"remove-special-chars.pipe.js","sourceRoot":"","sources":["../../src/pipes/remove-special-chars.pipe.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,qDAAuD;AACvD,qDAAiD;AACjD,sEAA6D;AAC7D,sDAA2D;AAK3D,MAAa,sBAAuB,SAAQ,8BAA6B;IACvE,IAAI,GAAG,mCAAa,CAAC,oBAAoB,CAAC;IAI1C,YAAY,CAAU;IAEd,MAAM,CAAU;IAExB,IAAI,CAAC,KAAa;QAChB,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;YAC9B,OAAO,KAAK,CAAC;QACf,CAAC;QAED,IAAI,MAAM,GAAG,KAAK,CAAC;QAEnB,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;YACtB,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;gBACjB,MAAM,OAAO,GAAG,IAAI,CAAC,YAAY;qBAC9B,OAAO,CAAC,qBAAqB,EAAE,MAAM,CAAC;qBACtC,OAAO,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;gBACxB,IAAI,CAAC,MAAM,GAAG,IAAI,MAAM,CAAC,cAAc,OAAO,GAAG,EAAE,GAAG,CAAC,CAAC;YAC1D,CAAC;YACD,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;QAC3C,CAAC;aAAM,CAAC;YACN,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,mBAAmB,EAAE,EAAE,CAAC,CAAC;QACnD,CAAC;QAED,OAAO,IAAA,iCAAmB,EAAC,MAAM,CAAC,CAAC;IACrC,CAAC;CACF;AA9BD,wDA8BC;AAzBC;IAFC,IAAA,4BAAU,GAAE;IACZ,IAAA,0BAAQ,GAAE;;4DACW"}
|
|
@@ -13,7 +13,7 @@ exports.SanitizeTextPipe = void 0;
|
|
|
13
13
|
const class_validator_1 = require("class-validator");
|
|
14
14
|
const cleansing_pipe_1 = require("./cleansing-pipe");
|
|
15
15
|
const cleansing_type_enum_1 = require("../enums/cleansing-type.enum");
|
|
16
|
-
const string_util_1 = require("../
|
|
16
|
+
const string_util_1 = require("../utils/string.util");
|
|
17
17
|
class SanitizeTextPipe extends cleansing_pipe_1.CleansingPipe {
|
|
18
18
|
type = cleansing_type_enum_1.CleansingType.SANITIZE_TEXT;
|
|
19
19
|
static TAG_PAIR_REGEXES = [
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"sanitize-text.pipe.js","sourceRoot":"","sources":["../../src/pipes/sanitize-text.pipe.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,qDAA6C;AAC7C,qDAAiD;AACjD,sEAA6D;AAC7D,
|
|
1
|
+
{"version":3,"file":"sanitize-text.pipe.js","sourceRoot":"","sources":["../../src/pipes/sanitize-text.pipe.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,qDAA6C;AAC7C,qDAAiD;AACjD,sEAA6D;AAC7D,sDAA2D;AAK3D,MAAa,gBAAiB,SAAQ,8BAA6B;IACjE,IAAI,GAAG,mCAAa,CAAC,aAAa,CAAC;IAE3B,MAAM,CAAU,gBAAgB,GAAa;QACnD,QAAQ;QACR,QAAQ;QACR,QAAQ;QACR,OAAO;QACP,QAAQ;QACR,MAAM;QACN,OAAO;QACP,QAAQ;QACR,QAAQ;QACR,UAAU;QACV,MAAM;QACN,MAAM;QACN,OAAO;KACR,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,MAAM,CAAC,IAAI,GAAG,gBAAgB,GAAG,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC;IAExD,MAAM,CAAU,wBAAwB,GAAa;QAC3D,QAAQ;QACR,QAAQ;QACR,QAAQ;QACR,OAAO;QACP,QAAQ;QACR,MAAM;QACN,OAAO;QACP,QAAQ;QACR,QAAQ;QACR,UAAU;QACV,MAAM;QACN,MAAM;QACN,OAAO;KACR,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,MAAM,CAAC,IAAI,GAAG,gBAAgB,EAAE,IAAI,CAAC,CAAC,CAAC;IAElD,MAAM,CAAU,qBAAqB,GAAa;QACxD,SAAS;QACT,SAAS;QACT,QAAQ;QACR,aAAa;QACb,YAAY;QACZ,SAAS;QACT,QAAQ;QACR,UAAU;QACV,UAAU;QACV,SAAS;QACT,UAAU;QACV,WAAW;QACX,SAAS;QACT,YAAY;KACb,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,MAAM,CAAC,GAAG,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,CAAC;IAG5C,gBAAgB,CAAW;IAE3B,IAAI,CAAC,KAAa;QAChB,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,CAAC,KAAK,EAAE,CAAC;YACxC,OAAO,KAAK,CAAC;QACf,CAAC;QAED,IAAI,MAAM,GAAG,KAAK,CAAC;QAEnB,KAAK,MAAM,KAAK,IAAI,gBAAgB,CAAC,gBAAgB,EAAE,CAAC;YACtD,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QACrC,CAAC;QACD,KAAK,MAAM,KAAK,IAAI,gBAAgB,CAAC,wBAAwB,EAAE,CAAC;YAC9D,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QACrC,CAAC;QACD,KAAK,MAAM,KAAK,IAAI,gBAAgB,CAAC,qBAAqB,EAAE,CAAC;YAC3D,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QACrC,CAAC;QAED,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,eAAe,EAAE,EAAE,CAAC,CAAC;QAC7C,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;QAExC,IAAI,CAAC,IAAI,CAAC,gBAAgB,EAAE,CAAC;YAC3B,MAAM,GAAG,MAAM;iBACZ,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;iBACtB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;iBACrB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;iBACrB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;iBACvB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;QAC5B,CAAC;QAED,MAAM,GAAG,IAAA,iCAAmB,EAAC,MAAM,CAAC,CAAC;QAErC,OAAO,MAAM,CAAC;IAChB,CAAC;;AAvFH,4CAwFC;AAnCC;IADC,IAAA,4BAAU,GAAE;;0DACc"}
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
import type { Page, ScreenshotOptions, PDFOptions } from 'puppeteer-core';
|
|
2
|
-
import { PageService } from '
|
|
3
|
-
import { CookieService } from '
|
|
4
|
-
import { CleansingService } from '
|
|
2
|
+
import { PageService } from './page.service';
|
|
3
|
+
import { CookieService } from './cookie.service';
|
|
4
|
+
import { CleansingService } from './cleansing.service';
|
|
5
5
|
import type { WorkflowDefinition, VariableContext } from '../interfaces/workflow-options';
|
|
6
6
|
import { SelectorMap, ScraperOptions, ScrapeResult, ScrapeAllResult, WorkflowResultTyped } from '../interfaces/types';
|
|
7
7
|
import type { BrowserActionOptions } from '../interfaces/browser-action-options';
|
|
8
|
-
export declare class
|
|
8
|
+
export declare class BrowserActionService {
|
|
9
9
|
private readonly pageService;
|
|
10
10
|
private readonly cookieService;
|
|
11
11
|
private readonly cleansingService;
|
|
@@ -17,14 +17,17 @@ export declare class ActionHelpersService {
|
|
|
17
17
|
takeScreenshot(url: string, path: string, options?: ScreenshotOptions): Promise<Buffer>;
|
|
18
18
|
generatePDF(url: string, path: string, options?: PDFOptions): Promise<Buffer>;
|
|
19
19
|
scrape<T extends SelectorMap>(url: string, selectors: T, options?: ScraperOptions): Promise<ScrapeResult>;
|
|
20
|
+
private parseSelector;
|
|
21
|
+
private buildNavOptions;
|
|
20
22
|
scrapeAll<T extends SelectorMap>(url: string, selectors: T, options?: ScraperOptions): Promise<ScrapeAllResult>;
|
|
21
23
|
private extractAllData;
|
|
22
24
|
private extractAllFromShadowRoot;
|
|
23
25
|
waitForSelector(url: string, selector: string, timeout?: number): Promise<Page>;
|
|
24
|
-
evaluate<T = unknown>(url: string, script: string | (() =>
|
|
25
|
-
|
|
26
|
+
evaluate<T = unknown>(url: string, script: string | (() => unknown)): Promise<T>;
|
|
27
|
+
scrapeWithWorkflow<T = Record<string, unknown>>(url: string, workflow: WorkflowDefinition, variables?: VariableContext): Promise<WorkflowResultTyped<T>>;
|
|
26
28
|
scrapeAllWithWorkflow<T = Record<string, unknown>>(url: string, workflow: WorkflowDefinition, variables?: VariableContext): Promise<WorkflowResultTyped<T>>;
|
|
27
29
|
private executeAction;
|
|
30
|
+
private dispatchAction;
|
|
28
31
|
private evaluateCondition;
|
|
29
32
|
private describeTarget;
|
|
30
33
|
private findElement;
|
|
@@ -11,19 +11,19 @@ var __metadata = (this && this.__metadata) || function (k, v) {
|
|
|
11
11
|
var __param = (this && this.__param) || function (paramIndex, decorator) {
|
|
12
12
|
return function (target, key) { decorator(target, key, paramIndex); }
|
|
13
13
|
};
|
|
14
|
-
var
|
|
14
|
+
var BrowserActionService_1;
|
|
15
15
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
16
|
-
exports.
|
|
16
|
+
exports.BrowserActionService = void 0;
|
|
17
17
|
const common_1 = require("@nestjs/common");
|
|
18
|
-
const page_service_1 = require("
|
|
19
|
-
const cookie_service_1 = require("
|
|
20
|
-
const cleansing_service_1 = require("
|
|
21
|
-
const logger_util_1 = require("
|
|
22
|
-
const delay_util_1 = require("
|
|
23
|
-
const dom_util_1 = require("
|
|
24
|
-
const truncate_log_util_1 = require("
|
|
18
|
+
const page_service_1 = require("./page.service");
|
|
19
|
+
const cookie_service_1 = require("./cookie.service");
|
|
20
|
+
const cleansing_service_1 = require("./cleansing.service");
|
|
21
|
+
const logger_util_1 = require("../utils/logger.util");
|
|
22
|
+
const delay_util_1 = require("../utils/delay.util");
|
|
23
|
+
const dom_util_1 = require("../utils/dom.util");
|
|
24
|
+
const truncate_log_util_1 = require("../utils/truncate-log.util");
|
|
25
25
|
const browser_action_constants_1 = require("../constants/browser-action.constants");
|
|
26
|
-
let
|
|
26
|
+
let BrowserActionService = BrowserActionService_1 = class BrowserActionService {
|
|
27
27
|
pageService;
|
|
28
28
|
cookieService;
|
|
29
29
|
cleansingService;
|
|
@@ -36,7 +36,7 @@ let ActionHelpersService = ActionHelpersService_1 = class ActionHelpersService {
|
|
|
36
36
|
this.cookieService = cookieService;
|
|
37
37
|
this.cleansingService = cleansingService;
|
|
38
38
|
this.moduleOptions = moduleOptions;
|
|
39
|
-
this.logger = new logger_util_1.LoggerWithLevel(
|
|
39
|
+
this.logger = new logger_util_1.LoggerWithLevel(BrowserActionService_1.name, this.pageService.getLogLevel());
|
|
40
40
|
this.activeDebugLogMaxLength =
|
|
41
41
|
moduleOptions?.debugLogMaxLength ?? browser_action_constants_1.DEFAULT_DEBUG_LOG_MAX_LENGTH;
|
|
42
42
|
}
|
|
@@ -56,11 +56,15 @@ let ActionHelpersService = ActionHelpersService_1 = class ActionHelpersService {
|
|
|
56
56
|
}
|
|
57
57
|
async scrape(url, selectors, options) {
|
|
58
58
|
this.logger.debug((0, truncate_log_util_1.truncateLog)(this.activeDebugLogMaxLength, `Scraping ${url}`));
|
|
59
|
-
const page = await this.pageService.navigateTo(url);
|
|
59
|
+
const page = await this.pageService.navigateTo(url, this.buildNavOptions(options), options?.cloak);
|
|
60
60
|
const result = {};
|
|
61
|
-
|
|
61
|
+
const evalOne = page['$eval'].bind(page);
|
|
62
|
+
await Promise.all(Object.entries(selectors).map(async ([key, rawSelector]) => {
|
|
63
|
+
const { selector, attribute } = this.parseSelector(rawSelector);
|
|
62
64
|
try {
|
|
63
|
-
const value =
|
|
65
|
+
const value = attribute
|
|
66
|
+
? await evalOne(selector, (el, attr) => el.getAttribute(attr), attribute)
|
|
67
|
+
: await evalOne(selector, (el) => el.textContent);
|
|
64
68
|
if (options?.pipes?.[key]) {
|
|
65
69
|
const pipeInstances = this.getCachedPipeInstances(options.pipes[key]);
|
|
66
70
|
result[key] = this.cleansingService.cleanse(value, pipeInstances);
|
|
@@ -69,23 +73,50 @@ let ActionHelpersService = ActionHelpersService_1 = class ActionHelpersService {
|
|
|
69
73
|
result[key] = value;
|
|
70
74
|
}
|
|
71
75
|
}
|
|
72
|
-
catch {
|
|
73
|
-
this.logger.warn(`Failed to scrape ${
|
|
76
|
+
catch (err) {
|
|
77
|
+
this.logger.warn(`Failed to scrape '${key}' (${rawSelector}): ${err instanceof Error ? err.message : String(err)}`);
|
|
74
78
|
}
|
|
75
|
-
}
|
|
79
|
+
}));
|
|
76
80
|
await this.pageService.closePage();
|
|
77
81
|
return result;
|
|
78
82
|
}
|
|
83
|
+
parseSelector(raw) {
|
|
84
|
+
const match = raw.match(/^(.*)@([A-Za-z_][\w-]*)$/);
|
|
85
|
+
if (match && match[1].trim() !== '') {
|
|
86
|
+
return { selector: match[1], attribute: match[2] };
|
|
87
|
+
}
|
|
88
|
+
return { selector: raw };
|
|
89
|
+
}
|
|
90
|
+
buildNavOptions(options) {
|
|
91
|
+
if (!options?.waitUntil && !options?.timeout)
|
|
92
|
+
return undefined;
|
|
93
|
+
return { waitUntil: options.waitUntil, timeout: options.timeout };
|
|
94
|
+
}
|
|
79
95
|
async scrapeAll(url, selectors, options) {
|
|
80
96
|
this.logger.debug((0, truncate_log_util_1.truncateLog)(this.activeDebugLogMaxLength, `Scraping all elements from ${url}`));
|
|
81
|
-
const page = await this.pageService.navigateTo(url);
|
|
97
|
+
const page = await this.pageService.navigateTo(url, this.buildNavOptions(options), options?.cloak);
|
|
82
98
|
const result = {};
|
|
83
|
-
|
|
84
|
-
|
|
99
|
+
const evalAll = page['$$eval'].bind(page);
|
|
100
|
+
Object.entries(selectors).forEach(([key, selector]) => this.validateSelector(key, selector));
|
|
101
|
+
await Promise.all(Object.entries(selectors).map(async ([key, rawSelector]) => {
|
|
102
|
+
const { selector, attribute } = this.parseSelector(rawSelector);
|
|
85
103
|
try {
|
|
86
104
|
const isXPath = (0, dom_util_1.isXPathSelector)(selector);
|
|
87
105
|
let values;
|
|
88
|
-
if (isXPath) {
|
|
106
|
+
if (isXPath && attribute) {
|
|
107
|
+
values = await page.evaluate((xpathSelector, attr) => {
|
|
108
|
+
const results = document.evaluate(xpathSelector, document, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null);
|
|
109
|
+
const vals = [];
|
|
110
|
+
let node;
|
|
111
|
+
while ((node = results.iterateNext())) {
|
|
112
|
+
vals.push(node instanceof Element
|
|
113
|
+
? node.getAttribute(attr) || ''
|
|
114
|
+
: node?.textContent?.trim() || '');
|
|
115
|
+
}
|
|
116
|
+
return vals;
|
|
117
|
+
}, selector, attribute);
|
|
118
|
+
}
|
|
119
|
+
else if (isXPath) {
|
|
89
120
|
values = await page.evaluate((xpathSelector) => {
|
|
90
121
|
const results = document.evaluate(xpathSelector, document, null, XPathResult.UNORDERED_NODE_ITERATOR_TYPE, null);
|
|
91
122
|
const vals = [];
|
|
@@ -96,8 +127,11 @@ let ActionHelpersService = ActionHelpersService_1 = class ActionHelpersService {
|
|
|
96
127
|
return vals;
|
|
97
128
|
}, selector);
|
|
98
129
|
}
|
|
130
|
+
else if (attribute) {
|
|
131
|
+
values = await evalAll(selector, (elements, attr) => elements.map((el) => el.getAttribute(attr) || ''), attribute);
|
|
132
|
+
}
|
|
99
133
|
else {
|
|
100
|
-
values = await
|
|
134
|
+
values = await evalAll(selector, (elements) => elements.map((el) => el.textContent?.trim() || ''));
|
|
101
135
|
}
|
|
102
136
|
if (options?.pipes?.[key]) {
|
|
103
137
|
const pipeInstances = this.getCachedPipeInstances(options.pipes[key]);
|
|
@@ -107,10 +141,10 @@ let ActionHelpersService = ActionHelpersService_1 = class ActionHelpersService {
|
|
|
107
141
|
result[key] = values;
|
|
108
142
|
}
|
|
109
143
|
}
|
|
110
|
-
catch {
|
|
111
|
-
this.logger.warn(`Failed to scrape ${
|
|
144
|
+
catch (err) {
|
|
145
|
+
this.logger.warn(`Failed to scrape '${key}' (${rawSelector}): ${err instanceof Error ? err.message : String(err)}`);
|
|
112
146
|
}
|
|
113
|
-
}
|
|
147
|
+
}));
|
|
114
148
|
await this.pageService.closePage();
|
|
115
149
|
return result;
|
|
116
150
|
}
|
|
@@ -206,17 +240,19 @@ let ActionHelpersService = ActionHelpersService_1 = class ActionHelpersService {
|
|
|
206
240
|
async evaluate(url, script) {
|
|
207
241
|
this.logger.debug((0, truncate_log_util_1.truncateLog)(this.activeDebugLogMaxLength, `Evaluating script on ${url}`));
|
|
208
242
|
const page = await this.pageService.navigateTo(url);
|
|
209
|
-
const result =
|
|
243
|
+
const result = typeof script === 'function'
|
|
244
|
+
? await page.evaluate(script)
|
|
245
|
+
: await page.evaluate(script);
|
|
210
246
|
await this.pageService.closePage();
|
|
211
247
|
return result;
|
|
212
248
|
}
|
|
213
|
-
async
|
|
249
|
+
async scrapeWithWorkflow(url, workflow, variables) {
|
|
214
250
|
this.activeDebugLogMaxLength =
|
|
215
251
|
workflow.debugLogMaxLength ??
|
|
216
252
|
this.moduleOptions?.debugLogMaxLength ??
|
|
217
253
|
browser_action_constants_1.DEFAULT_DEBUG_LOG_MAX_LENGTH;
|
|
218
254
|
this.logger.debug((0, truncate_log_util_1.truncateLog)(this.activeDebugLogMaxLength, `Starting workflow execution for ${url}`));
|
|
219
|
-
const page = await this.pageService.navigateTo(url);
|
|
255
|
+
const page = await this.pageService.navigateTo(url, undefined, workflow.cloak);
|
|
220
256
|
const result = {
|
|
221
257
|
success: false,
|
|
222
258
|
data: {},
|
|
@@ -263,7 +299,7 @@ let ActionHelpersService = ActionHelpersService_1 = class ActionHelpersService {
|
|
|
263
299
|
return result;
|
|
264
300
|
}
|
|
265
301
|
async scrapeAllWithWorkflow(url, workflow, variables) {
|
|
266
|
-
return await this.
|
|
302
|
+
return await this.scrapeWithWorkflow(url, workflow, variables);
|
|
267
303
|
}
|
|
268
304
|
async executeAction(page, action, context) {
|
|
269
305
|
if (action.condition) {
|
|
@@ -278,11 +314,34 @@ let ActionHelpersService = ActionHelpersService_1 = class ActionHelpersService {
|
|
|
278
314
|
? `[${action.action}] id="${action.id}"`
|
|
279
315
|
: `[${action.action}]`;
|
|
280
316
|
this.logger.debug((0, truncate_log_util_1.truncateLog)(this.activeDebugLogMaxLength, `Executing action: ${actionLabel}`));
|
|
317
|
+
const maxRetries = action.options?.retry ?? 0;
|
|
318
|
+
const retryDelay = action.options?.retryDelay ?? 0;
|
|
319
|
+
for (let attempt = 0;; attempt++) {
|
|
320
|
+
try {
|
|
321
|
+
await this.dispatchAction(page, action, value, context);
|
|
322
|
+
return;
|
|
323
|
+
}
|
|
324
|
+
catch (err) {
|
|
325
|
+
if (attempt >= maxRetries)
|
|
326
|
+
throw err;
|
|
327
|
+
this.logger.warn((0, truncate_log_util_1.truncateLog)(this.activeDebugLogMaxLength, `Action ${actionLabel} failed (attempt ${attempt + 1}/${maxRetries + 1}), retrying: ${err instanceof Error ? err.message : String(err)}`));
|
|
328
|
+
if (retryDelay > 0)
|
|
329
|
+
await (0, delay_util_1.delay)(retryDelay);
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
async dispatchAction(page, action, value, context) {
|
|
281
334
|
switch (action.action) {
|
|
282
|
-
case 'navigate':
|
|
335
|
+
case 'navigate': {
|
|
336
|
+
const navOptions = {};
|
|
337
|
+
if (action.options?.waitUntil)
|
|
338
|
+
navOptions.waitUntil = action.options.waitUntil;
|
|
339
|
+
if (action.options?.timeout)
|
|
340
|
+
navOptions.timeout = action.options.timeout;
|
|
283
341
|
this.logger.debug((0, truncate_log_util_1.truncateLog)(this.activeDebugLogMaxLength, ` navigate → ${value}`));
|
|
284
|
-
await page.goto(value);
|
|
342
|
+
await page.goto(value, Object.keys(navOptions).length ? navOptions : undefined);
|
|
285
343
|
break;
|
|
344
|
+
}
|
|
286
345
|
case 'wait':
|
|
287
346
|
this.logger.debug((0, truncate_log_util_1.truncateLog)(this.activeDebugLogMaxLength, ` wait ${Number(action.value) || 0}ms`));
|
|
288
347
|
await (0, delay_util_1.delay)(Number(action.value) || 0);
|
|
@@ -573,7 +632,7 @@ let ActionHelpersService = ActionHelpersService_1 = class ActionHelpersService {
|
|
|
573
632
|
getCachedPipeInstances(config) {
|
|
574
633
|
const cacheKey = JSON.stringify(config);
|
|
575
634
|
if (!this.pipeCache.has(cacheKey)) {
|
|
576
|
-
this.pipeCache.set(cacheKey, this.cleansingService.
|
|
635
|
+
this.pipeCache.set(cacheKey, this.cleansingService.buildPipes(config));
|
|
577
636
|
}
|
|
578
637
|
return this.pipeCache.get(cacheKey);
|
|
579
638
|
}
|
|
@@ -711,13 +770,13 @@ let ActionHelpersService = ActionHelpersService_1 = class ActionHelpersService {
|
|
|
711
770
|
});
|
|
712
771
|
}
|
|
713
772
|
};
|
|
714
|
-
exports.
|
|
715
|
-
exports.
|
|
773
|
+
exports.BrowserActionService = BrowserActionService;
|
|
774
|
+
exports.BrowserActionService = BrowserActionService = BrowserActionService_1 = __decorate([
|
|
716
775
|
(0, common_1.Injectable)({ scope: common_1.Scope.TRANSIENT }),
|
|
717
776
|
__param(3, (0, common_1.Optional)()),
|
|
718
777
|
__param(3, (0, common_1.Inject)(browser_action_constants_1.BROWSER_ACTION_OPTIONS)),
|
|
719
778
|
__metadata("design:paramtypes", [page_service_1.PageService,
|
|
720
779
|
cookie_service_1.CookieService,
|
|
721
780
|
cleansing_service_1.CleansingService, Object])
|
|
722
|
-
],
|
|
723
|
-
//# sourceMappingURL=action
|
|
781
|
+
], BrowserActionService);
|
|
782
|
+
//# sourceMappingURL=browser-action.service.js.map
|