crawlee-one 1.0.5 → 1.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -2
- package/dist/cjs/index.d.ts +4 -3
- package/dist/cjs/index.js +4 -3
- package/dist/cjs/index.js.map +1 -1
- package/dist/cjs/lib/actor/actor.js +1 -1
- package/dist/cjs/lib/actor/actor.js.map +1 -1
- package/dist/cjs/lib/actor/types.d.ts +1 -1
- package/dist/cjs/lib/actor/types.js.map +1 -1
- package/dist/cjs/lib/actorSpec.d.ts +5 -5
- package/dist/cjs/lib/actorSpec.js.map +1 -1
- package/dist/cjs/lib/error/errorHandler.d.ts +1 -1
- package/dist/cjs/lib/error/errorHandler.js.map +1 -1
- package/dist/cjs/lib/integrations/apify.js +7 -3
- package/dist/cjs/lib/integrations/apify.js.map +1 -1
- package/dist/cjs/lib/io/pushRequests.d.ts +4 -3
- package/dist/cjs/lib/io/pushRequests.js +1 -1
- package/dist/cjs/lib/io/pushRequests.js.map +1 -1
- package/dist/cjs/lib/log.d.ts +1 -1
- package/dist/cjs/lib/log.js.map +1 -1
- package/dist/cjs/lib/readme/apify/readme.d.ts +65 -0
- package/dist/cjs/lib/readme/apify/readme.js +534 -0
- package/dist/cjs/lib/readme/apify/readme.js.map +1 -0
- package/dist/cjs/lib/readme/apify/types.d.ts +260 -0
- package/dist/cjs/lib/readme/apify/types.js +54 -0
- package/dist/cjs/lib/readme/apify/types.js.map +1 -0
- package/dist/cjs/lib/router/router.d.ts +67 -0
- package/dist/cjs/lib/router/router.js +180 -0
- package/dist/cjs/lib/router/router.js.map +1 -0
- package/dist/cjs/lib/router/types.d.ts +59 -0
- package/dist/cjs/lib/router/types.js +19 -0
- package/dist/cjs/lib/router/types.js.map +1 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -17,6 +17,19 @@ Conversely, Crawlee One is NOT suitable for:
|
|
|
17
17
|
|
|
18
18
|
[Read here](./docs/scraping-workflow-summary.md) for the recap of how Crawlee and Apify work.
|
|
19
19
|
|
|
20
|
+
### What can Crawlee One do?
|
|
21
|
+
|
|
22
|
+
Crawlee One supports many common and advanced web scraping use cases. See the [Table of Content](#table-of-content) for the overview of the use cases.
|
|
23
|
+
|
|
24
|
+
See the section [How to use](#how-to-use) for how Crawlee One looks from user's perspective.
|
|
25
|
+
|
|
26
|
+
## Pre-requirements
|
|
27
|
+
|
|
28
|
+
- Familiarity with Apify platform
|
|
29
|
+
- For advanced use cases:
|
|
30
|
+
- Basic familiarity with web scraping
|
|
31
|
+
- Basic familiarity with JavaScript
|
|
32
|
+
|
|
20
33
|
## Use cases
|
|
21
34
|
|
|
22
35
|
Web crawlers written with Crawlee One can be configured via their input to handle following advanced use cases:
|
|
@@ -34,6 +47,12 @@ Web crawlers written with Crawlee One can be configured via their input to handl
|
|
|
34
47
|
- [11. Capture errors](./docs/playbook-11-errors.md)
|
|
35
48
|
- [12. Source control: Keep scraper configuration in sync](./docs/playbook-12-source-control.md)
|
|
36
49
|
|
|
50
|
+
## How to use
|
|
51
|
+
|
|
52
|
+
[See here](./docs/user-guide.md) for how to use a Crawlee One web scraper through Apify platform.
|
|
53
|
+
|
|
54
|
+

|
|
55
|
+
|
|
37
56
|
## Library contents
|
|
38
57
|
|
|
39
58
|
Crawlee One includes a set of utility functions for:
|
|
@@ -72,9 +91,9 @@ Crawlee One allows you to configure the following via the input:
|
|
|
72
91
|
- [Logging & Error handling](./docs/reference-input.md#logging--error-handling-advanced)
|
|
73
92
|
- [Integrations (Metamorphing)](./docs/reference-input.md#integrations-metamorphing-advanced)
|
|
74
93
|
|
|
75
|
-
## Example
|
|
94
|
+
## Example projects
|
|
76
95
|
|
|
77
|
-
|
|
96
|
+
- [SKCRIS Scraper](https://github.com/JuroOravec/apify-actor-skcris)
|
|
78
97
|
|
|
79
98
|
---
|
|
80
99
|
|
package/dist/cjs/index.d.ts
CHANGED
|
@@ -15,9 +15,10 @@ export * from './lib/error/sentry';
|
|
|
15
15
|
export * from './lib/migrate/localMigrator';
|
|
16
16
|
export * from './lib/migrate/localState';
|
|
17
17
|
export * from './lib/migrate/types';
|
|
18
|
-
export * from './lib/readme/readme';
|
|
19
|
-
export * from './lib/readme/types';
|
|
20
|
-
export * from './lib/router';
|
|
18
|
+
export * from './lib/readme/apify/readme';
|
|
19
|
+
export * from './lib/readme/apify/types';
|
|
20
|
+
export * from './lib/router/router';
|
|
21
|
+
export * from './lib/router/types';
|
|
21
22
|
export * from './lib/log';
|
|
22
23
|
export * from './lib/test/actor';
|
|
23
24
|
export * from './lib/test/mockApifyClient';
|
package/dist/cjs/index.js
CHANGED
|
@@ -34,9 +34,10 @@ __exportStar(require("./lib/error/sentry"), exports);
|
|
|
34
34
|
__exportStar(require("./lib/migrate/localMigrator"), exports);
|
|
35
35
|
__exportStar(require("./lib/migrate/localState"), exports);
|
|
36
36
|
__exportStar(require("./lib/migrate/types"), exports);
|
|
37
|
-
__exportStar(require("./lib/readme/readme"), exports);
|
|
38
|
-
__exportStar(require("./lib/readme/types"), exports);
|
|
39
|
-
__exportStar(require("./lib/router"), exports);
|
|
37
|
+
__exportStar(require("./lib/readme/apify/readme"), exports);
|
|
38
|
+
__exportStar(require("./lib/readme/apify/types"), exports);
|
|
39
|
+
__exportStar(require("./lib/router/router"), exports);
|
|
40
|
+
__exportStar(require("./lib/router/types"), exports);
|
|
40
41
|
__exportStar(require("./lib/log"), exports);
|
|
41
42
|
__exportStar(require("./lib/test/actor"), exports);
|
|
42
43
|
__exportStar(require("./lib/test/mockApifyClient"), exports);
|
package/dist/cjs/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;AAAA,2CAAqF;AAA5E,+GAAA,sBAAsB,OAAA;AAAE,iHAAA,wBAAwB,OAAA;AACzD,oDAAkC;AAClC,kDAAgC;AAChC,+CAA6B;AAC7B,mDAAiC;AACjC,wDAAsC;AACtC,oDAAkC;AAClC,wDAAsC;AACtC,oDAAkC;AAClC,yDAAuC;AACvC,qDAAmC;AACnC,8DAA4C;AAC5C,2DAAyC;AACzC,qDAAmC;AACnC,8DAA4C;AAC5C,2DAAyC;AACzC,sDAAoC;AACpC,sDAAoC;AACpC,qDAAmC;AACnC
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;AAAA,2CAAqF;AAA5E,+GAAA,sBAAsB,OAAA;AAAE,iHAAA,wBAAwB,OAAA;AACzD,oDAAkC;AAClC,kDAAgC;AAChC,+CAA6B;AAC7B,mDAAiC;AACjC,wDAAsC;AACtC,oDAAkC;AAClC,wDAAsC;AACtC,oDAAkC;AAClC,yDAAuC;AACvC,qDAAmC;AACnC,8DAA4C;AAC5C,2DAAyC;AACzC,qDAAmC;AACnC,8DAA4C;AAC5C,2DAAyC;AACzC,sDAAoC;AACpC,4DAA0C;AAC1C,2DAAyC;AACzC,sDAAoC;AACpC,qDAAmC;AACnC,4CAA0B;AAC1B,mDAAiC;AACjC,6DAA2C;AAE3C,2DAAyC;AACzC,2DAAyC","sourcesContent":["export { createAndRunCrawleeOne, createHttpCrawlerOptions } from './lib/actor/actor';\nexport * from './lib/actor/types';\nexport * from './lib/actorSpec';\nexport * from './lib/config';\nexport * from './lib/io/dataset';\nexport * from './lib/io/requestQueue';\nexport * from './lib/io/pushData';\nexport * from './lib/io/pushRequests';\nexport * from './lib/actions/dom';\nexport * from './lib/actions/domUtils';\nexport * from './lib/actions/page';\nexport * from './lib/actions/scrapeListing';\nexport * from './lib/error/errorHandler';\nexport * from './lib/error/sentry';\nexport * from './lib/migrate/localMigrator';\nexport * from './lib/migrate/localState';\nexport * from './lib/migrate/types';\nexport * from './lib/readme/apify/readme';\nexport * from './lib/readme/apify/types';\nexport * from './lib/router/router';\nexport * from './lib/router/types';\nexport * from './lib/log';\nexport * from './lib/test/actor';\nexport * from './lib/test/mockApifyClient';\nexport type { CrawlerUrl, CrawlerType } from './types';\nexport * from './lib/integrations/apify';\nexport * from './lib/integrations/types';\n"]}
|
|
@@ -19,7 +19,7 @@ const pushData_1 = require("../io/pushData");
|
|
|
19
19
|
const dataset_1 = require("../io/dataset");
|
|
20
20
|
const pushRequests_1 = require("../io/pushRequests");
|
|
21
21
|
const apify_1 = require("../integrations/apify");
|
|
22
|
-
const router_1 = require("../router");
|
|
22
|
+
const router_1 = require("../router/router");
|
|
23
23
|
const config_1 = require("../config");
|
|
24
24
|
const log_1 = require("../log");
|
|
25
25
|
const actorClassByType = {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"actor.js","sourceRoot":"","sources":["../../../../src/lib/actor/actor.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,qCAaiB;AACjB,mCAAgD;AAEhD,+CAA2C;AAI3C,wDAA2D;AAC3D,4CAA8C;AAC9C,6CAA8E;AAC9E,2CAAqD;AACrD,qDAAuE;AAEvE,iDAAgD;AAChD,sCAAgE;AAChE,sCAWmB;AACnB,gCAAmE;AAUnE,MAAM,gBAAgB,GAAG;IACvB,KAAK,EAAE,sBAAY;IACnB,IAAI,EAAE,qBAAW;IACjB,OAAO,EAAE,wBAAc;IACvB,KAAK,EAAE,sBAAY;IACnB,UAAU,EAAE,2BAAiB;IAC7B,SAAS,EAAE,0BAAgB;CAC+C,CAAC;AAE7E,MAAM,QAAQ,GAAG,CAAC,CAAM,EAA2B,EAAE;IACnD,OAAO,CAAC,CAAC,CAAE,CAAmB,CAAC,UAAU,IAAK,CAAmB,CAAC,iBAAiB,CAAC,CAAC;AACvF,CAAC,CAAC;AACF,MAAM,MAAM,GAAG,CAAC,CAAM,EAAgC,EAAE;IACtD,OAAO,OAAO,CAAC,KAAK,UAAU,CAAC;AACjC,CAAC,CAAC;AAEF,kEAAkE;AAClE,MAAM,SAAS,GAAG,CAMhB,KAA4E,EAC5E,KAAc,EACd,EAAE;IACF,IAAI,CAAC,KAAK;QAAE,OAAO,IAAI,CAAC;IAExB,MAAM,OAAO,GAAG;QACd,EAAE,EAAE,KAAK,CAAC,EAAE;QACZ,KAAK,EAAE,KAAK,CAAC,KAAK;QAClB,KAAK,EAAE,KAAK,CAAC,KAAK;QAClB,YAAY,EAAZ,uBAAY;QACZ,WAAW,EAAE,0BAAW;KACO,CAAC;IAElC,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC;IAC3B,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC;IAEzB,OAAO,CAAO,GAAG,IAAI,EAAE,EAAE,kDAAC,OAAA,MAAM,CAAC,GAAG,IAAI,EAAE,OAAO,CAAC,CAAA,GAAA,CAAC;AACrD,CAAC,CAAC;AAEF;;;;;;;GAOG;AACI,MAAM,sBAAsB,GAAG,CAMpC,IAiCD,EAAiB,EAAE;IAClB,MAAM,EACJ,SAAS,EACT,SAAS,EACT,WAAW,EACX,qBAAqB,EACrB,sBAAsB,EACtB,aAAa,EACb,YAAY,GACb,GAAG,IAAI,CAAC;IAET,MAAM,EAAE,EAAE,GAAG,eAAqB,EAAE,GAAG,WAAW,CAAC;IAEnD,MAAM,IAAA,oBAAW,kCAAM,aAAa,KAAE,UAAU,EAAE,SAAS,KAAI,EAAE,EAAE,EAAE,CAAC,CAAC;IAEvE,YAAY;IACZ,mCAAmC;IACnC,yGAAyG;IACzG,2EAA2E;IAC3E,MAAM,EAAE,CAAC,YAAY,CACnB,GAAS,EAAE;;QACT,MAAM,aAAa,GAA8D;YAC/E,EAAE;YACF,MAAM,EAAE,gBAAM,CAAC,MAAM,EAAO;YAC5B,cAAc,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE;;gBAAC,OAAA;oBAC7B,IAAA,4BAAsB,EAAW,MAAA,KAAK,aAAL,KAAK,uBAAL,KAAK,CAAE,QAAQ,mCAAI,MAAM,CAAC;iBAC5D,CAAA;aAAA;YACD,aAAa,EAAE,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE,EAAE;;gBAC1C,MAAM,OAAO,GAAG,IAAA,gCAAwB,EAGtC;oBACA,KAAK;oBACL,QAAQ,EAAE,qBAAqB;oBAC/B,SAAS,kBACP,cAAc,EAAE,MAAM,EACtB,kBAAkB,EAAE,KAAK;wBACzB,yEAAyE;wBACzE,oBAAoB,EAAE,IAAA,iCAAkB,EAAC;4BACvC,EAAE;4BACF,kBAAkB,EAAE,MAAA,KAAK,aAAL,KAAK,uBAAL,KAAK,CAAE,uBAAuB,mCAAI,WAAW;4BACjE,YAAY,EAAE,MAAA,KAAK,aAAL,KAAK,uBAAL,KAAK,CAAE,iBAAiB,mCAAI,IAAI;yBAC/C,CAAC,IACC,sBAAsB,CAC1B;iBACF,CAAC,CAAC;gBACH,MAAM,YAAY,GAAG,gBAAgB,CAAC,SAAS,CAAQ,CAAC;gBACxD,OAAO,IAAI,YAAY,CAAC,OAAO,CAAC,CAAC;YACnC,CAAC;YACD,MAAM,EAAE,EAAE;YACV,aAAa,EAAE,EAAS;SACzB,CAAC;QAEF,MAAM,KAAK,GAAG,MAAM,IAAA,wBAAgB,kCAC/B,WAAW,KACd,EAAE,EACF,MAAM,EAAE,MAAA,WAAW,CAAC,MAAM,mCAAK,aAAa,CAAC,MAAc,EAC3D,cAAc,EAAE,MAAA,WAAW,CAAC,cAAc,mCAAK,aAAa,CAAC,cAAsB,EACnF,aAAa,EAAE,MAAA,WAAW,CAAC,aAAa,mCAAK,aAAa,CAAC,aAAqB,IAChF,CAAC;QAEH,MAAM,CAAA,YAAY,aAAZ,YAAY,uBAAZ,YAAY,CAAG,KAAK,CAAC,CAAA,CAAC;IAC9B,CAAC,CAAA,EACD,EAAE,aAAa,EAAE,oBAAoB,EAAE,CACxC,CAAC;AACJ,CAAC,CAAA,CAAC;AAxGW,QAAA,sBAAsB,0BAwGjC;AAEF;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AACI,MAAM,gBAAgB,GAAG,CAM9B,MAAmE,EACnB,EAAE;IAClD,MAAM,EAAE,EAAE,GAAG,eAAqB,EAAE,GAAG,MAAM,CAAC;IAE9C,qDAAqD;IACrD,MAAM,KAAK,GAAG,EAAE,CAAC;IAEjB,0BAA0B;IAC1B,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK;QAC3B,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC;YACpB,CAAC,CAAC,MAAM,MAAM,CAAC,KAAK,iCAAM,MAAM,KAAE,EAAE,IAAG;YACvC,CAAC,CAAC,MAAM,CAAC,KAAK;QAChB,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAS,CAAC;IAC/B,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,YAAY,CAAe,QAAQ,EAAE,KAAK,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC;IAEvF,IAAI,MAAM,CAAC,aAAa;QAAE,MAAM,MAAM,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;IAE5D,MAAM,EAAE,QAAQ,EAAE,GAAG,CAAC,KAAK,aAAL,KAAK,cAAL,KAAK,GAAI,EAAE,CAAsB,CAAC;IACxD,MAAM,GAAG,GAAG,IAAI,aAAG,CAAC,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAC,CAAC,uBAAiB,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC;IAEnF,gFAAgF;IAChF,MAAM,SAAS,GAAG,GAAG,EAAE,CAAC,iCAAM,MAAM,KAAE,KAAK,EAAE,KAAK,EAAE,EAAE,EAAE,GAAG,IAAG,CAAC;IAE/D,eAAe;IACf,MAAM,YAAY,GAChB,MAAM,CAAC,KAAK,IAAI,IAAI,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,+BAA+B,CAAC,KAAK,aAAL,KAAK,cAAL,KAAK,GAAI,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IAClG,MAAM,KAAK,GACT,MAAM,CAAC,KAAK,IAAI,IAAI;QAClB,CAAC,CAAC,YAAY;QACd,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC;YACtB,CAAC,CAAC,MAAM,MAAM,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC;YACjC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;IAEnB,+BAA+B;IAC/B,MAAM,MAAM,GAAuB,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC;QACxD,CAAC,CAAC,MAAM,CAAC,MAAM;QACf,CAAC,CAAC,MAAO,MAAM,CAAC,MAAc,CAAC,SAAS,EAAE,CAAC,CAAC;IAC9C,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,kBAAkB;IAC3G,MAAM,aAAa,GAAG,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,aAAa,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,kBAAkB;IACvI,MAAM,cAAc,GAAG,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,cAAc,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,kBAAkB;IAE3I,yBAAyB;IACzB,MAAM,WAAW,GAAG,GAAG,EAAE,CAAC,CAAC;QACzB,EAAE;QACF,MAAM;QACN,MAAM;QACN,aAAa;QACb,KAAK;QACL,MAAM;QACN,KAAK;QACL,KAAK;QACL,GAAG;KACJ,CAAC,CAAC;IACH,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC,WAAW,EAAE,CAAC,CAAC;IAE1D,mCAAmC;IACnC,MAAM,QAAQ,mBAAK,OAAO,IAAK,WAAW,EAAE,CAAE,CAAC;IAC/C,MAAM,UAAU,GAAG,sBAAsB,CAAC,QAAQ,CAAC,CAAC;IACpD,MAAM,SAAS,GAAG,qBAAqB,CAAC,QAAQ,CAAC,CAAC;IAClD,MAAM,cAAc,GAAG,oBAAoB,CAAC,QAAQ,CAAC,CAAC;IACtD,MAAM,iBAAiB,GAAG,wBAAwB,CAAC,QAAQ,CAAC,CAAC;IAC7D,MAAM,SAAS,GAAG,MAAM,qBAAqB,CAAC,QAAQ,CAAC,CAAC;IAExD,MAAM,KAAK,GAAG,gCACT,QAAQ,KACX,OAAO;QACP,UAAU;QACV,SAAS,EACT,QAAQ,EAAE,cAAc,EACxB,YAAY,EAAE,iBAAiB,EAC/B,SAAS,GACsC,CAAC;IAElD,0DAA0D;IAC1D,MAAM,aAAa,GAAG,EAAE,KAAK,EAAE,QAAQ,EAAE,cAAc,EAAE,CAAC;IAE1D,gBAAgB;IAChB,MAAM,IAAA,0BAAiB,EAAkE;QACvF,EAAE;QACF,MAAM;QACN,cAAc;QACd,aAAa;QACb,MAAM;QACN,aAAa;QACb,KAAK;KACN,CAAC,CAAC;IACH,MAAM,IAAA,yBAAgB,EAA2D;QAC/E,MAAM;QACN,cAAc;QACd,aAAa;QACb,aAAa;KACd,CAAC,CAAC;IAEH,2DAA2D;IAC3D,MAAM,iBAAiB,CAAC,SAA6B,CAAC,CAAC;IAEvD,OAAO,KAAK,CAAC;AACf,CAAC,CAAA,CAAC;AAvGW,QAAA,gBAAgB,oBAuG3B;AAEF,MAAM,YAAY,GAAG,CACnB,KAAoB,EACpB,KAA8B,EAC9B,OAA+B,EAC/B,EAAE;;IACF,MAAM,EAAE,EAAE,GAAG,eAAuB,EAAE,GAAG,OAAO,aAAP,OAAO,cAAP,OAAO,GAAI,EAAE,CAAC;IACvD,MAAM,EAAE,cAAc,EAAE,uBAAuB,EAAE,GAAG,CAAC,KAAK,aAAL,KAAK,cAAL,KAAK,GAAI,EAAE,CAAoB,CAAC;IAErF,MAAM,YAAY,GAAG,cAAc,CAAC,CAAC,CAAC,MAAM,0BAAW,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC,IAAI,EAAU,CAAC,CAAC,CAAC,IAAI,CAAC;IAClG,MAAM,OAAO,GAAG,SAAS,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE,EAAE,EAAE,uBAAuB,CAAC,CAAC;IACzE,MAAM,aAAa,GAAG,MAAA,CAAC,MAAM,CAAA,OAAO,aAAP,OAAO,uBAAP,OAAO,EAAI,CAAA,CAAC,mCAAI,IAAI,CAAC;IAClD,MAAM,aAAa,iDAAQ,YAAY,GAAK,aAAa,GAAK,KAAK,CAAE,CAAC;IAEtE,OAAO,aAAkB,CAAC;AAC5B,CAAC,CAAA,CAAC;AAEF;;;;GAIG;AACH,MAAM,sBAAsB,GAAG,CAM7B,KAGC,EACD,EAAE;;IACF,MAAM,EACJ,sBAAsB,EACtB,qBAAqB,EACrB,mBAAmB,EACnB,kBAAkB,EAClB,qBAAqB,EACrB,oBAAoB,EACpB,kBAAkB,EAClB,iBAAiB,EACjB,kBAAkB,EAClB,yBAAyB,GAC1B,GAAG,CAAC,MAAA,KAAK,CAAC,KAAK,mCAAI,EAAE,CAAyC,CAAC;IAEhE,MAAM,SAAS,GAAG,qBAAqB,CAAC,KAAK,CAAC,CAAC;IAE/C,MAAM,UAAU,GAAoB,CAAO,QAAQ,EAAE,OAAO,EAAE,EAAE;;QAC9D,2CAA2C;QAC3C,IAAI,kBAAkB,IAAI,yBAAyB,KAAK,WAAW,EAAE;YACnE,MAAM,KAAK,GAAG,MAAM,KAAK,CAAC,EAAE,CAAC,iBAAiB,CAAC,kBAAkB,CAAC,CAAC;YACnE,MAAM,KAAK,CAAC,IAAI,EAAE,CAAC;SACpB;QAED,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,qBAAqB,CAAC,2CAAI,CAAA,CAAC;QAClD,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,kBAAkB,CAAC,2CAAI,CAAA,CAAC;QAC/C,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,sBAAsB,CAAC,2CAAI,CAAA,CAAC;QACnD,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,mBAAmB,CAAC,2CAAI,CAAA,CAAC;QAEhD,MAAM,MAAM,GAAG,MAAM,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QAE1D,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,oBAAoB,CAAC,2CAAI,CAAA,CAAC;QACjD,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,iBAAiB,CAAC,2CAAI,CAAA,CAAC;QAC9C,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,qBAAqB,CAAC,2CAAI,CAAA,CAAC;QAClD,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,kBAAkB,CAAC,2CAAI,CAAA,CAAC;QAE/C,iDAAiD;QACjD,MAAM,SAAS,EAAE,CAAC;QAElB,OAAO,MAAM,CAAC;IAChB,CAAC,CAAA,CAAC;IAEF,OAAO,UAAU,CAAC;AACpB,CAAC,CAAC;AAEF,mFAAmF;AACnF,MAAM,qBAAqB,GAAG,CAAC,KAAyC,EAAE,EAAE;IAC1E,iDAAiD;IACjD,MAAM,SAAS,GAAc,CAAO,SAA+B,EAAE,EAAE;;QACrE,MAAM,EACJ,gBAAgB,EAChB,mBAAmB,EACnB,mBAAmB,GACpB,GAAG,IAAA,iBAAQ,EAAC,EAAE,EAAE,SAAS,EAAE,MAAA,KAAK,CAAC,KAAK,mCAAI,EAAE,CAAC,CAAC,CAAC,kBAAkB;QAElE,IAAI,CAAC,gBAAgB;YAAE,OAAO;QAE9B,MAAM,KAAK,CAAC,EAAE,CAAC,wBAAwB,CAAC,gBAAgB,EAAE,mBAAmB,EAAE;YAC7E,KAAK,EAAE,mBAAmB;SAC3B,CAAC,CAAC;IACL,CAAC,CAAA,CAAC;IAEF,OAAO,SAAS,CAAC;AACnB,CAAC,CAAC;AAEF,uEAAuE;AACvE,MAAM,oBAAoB,GAAG,CAAC,KAA2D,EAAE,EAAE;;IAC3F,MAAM,EACJ,mBAAmB,EACnB,cAAc,EACd,gBAAgB,EAChB,eAAe,EACf,YAAY,EACZ,eAAe,EACf,gBAAgB,EAChB,kBAAkB,EAClB,kBAAkB,EAClB,sBAAsB,EACtB,yBAAyB,GAC1B,GAAG,CAAC,MAAA,KAAK,CAAC,KAAK,mCAAI,EAAE,CAA6D,CAAC;IAEpF,MAAM,cAAc,GAA6B,CAAO,OAAO,EAAE,GAAG,EAAE,OAAO,EAAE,EAAE;QAC/E,MAAM,WAAW,GAAG,SAAS,CAAC,KAAK,EAAE,eAAe,CAAC,CAAC;QACtD,MAAM,QAAQ,GAAG,SAAS,CAAC,KAAK,EAAE,YAAY,CAAC,CAAC;QAEhD,MAAM,aAAa,GAAG,gBACpB,EAAE,EAAE,KAAK,CAAC,EAAE,EACZ,GAAG,EAAE,KAAK,CAAC,GAAG,EACd,WAAW,EAAE,mBAAmB,EAChC,QAAQ,EAAE,gBAAgB,EAC1B,QAAQ,EAAE,gBAAgB,EAC1B,SAAS,EAAE,kBAAkB,EAC7B,SAAS,EAAE,WAAW,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,EAChE,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,EACvD,SAAS,EAAE,eAAe,EAC1B,cAAc,EACd,YAAY,EAAE,kBAAkB,EAChC,gBAAgB,EAAE,sBAAsB,EACxC,mBAAmB,EAAE,yBAAyB,IAC3C,OAAO,CACuB,CAAC;QAEpC,OAAO,IAAA,mBAAQ,EAAC,OAAO,EAAE,GAAG,EAAE,aAAa,CAAC,CAAC;IAC/C,CAAC,CAAA,CAAC;IAEF,OAAO,cAAc,CAAC;AACxB,CAAC,CAAC;AAEF,2EAA2E;AAC3E,MAAM,wBAAwB,GAAG,CAAC,KAA2D,EAAE,EAAE;;IAC/F,MAAM,EAAE,cAAc,EAAE,iBAAiB,EAAE,gBAAgB,EAAE,aAAa,EAAE,GAAG,CAAC,MAAA,KAAK,CAAC,KAAK,mCACzF,EAAE,CAAsB,CAAC;IAE3B,MAAM,iBAAiB,GAAiC,CAAO,OAAO,EAAE,OAAO,EAAE,EAAE;QACjF,MAAM,WAAW,GAAG,SAAS,CAAC,KAAK,EAAE,gBAAgB,CAAC,CAAC;QACvD,MAAM,QAAQ,GAAG,SAAS,CAAC,KAAK,EAAE,aAAa,CAAC,CAAC;QAEjD,MAAM,aAAa,GAAG,gBACpB,EAAE,EAAE,KAAK,CAAC,EAAE,EACZ,GAAG,EAAE,KAAK,CAAC,GAAG,EACd,QAAQ,EAAE,iBAAiB,EAC3B,SAAS,EAAE,WAAW,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,EAChE,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,EACvD,cAAc,IACX,OAAO,CACwB,CAAC;QAErC,OAAO,IAAA,2BAAY,EAAC,OAAO,EAAE,aAAa,CAAC,CAAC;IAC9C,CAAC,CAAA,CAAC;IAEF,OAAO,iBAAiB,CAAC;AAC3B,CAAC,CAAC;AAEF,4DAA4D;AACrD,MAAM,wBAAwB,GAAG,CAGtC,EACA,KAAK,EACL,QAAQ,EACR,SAAS,GAcV,EAAE,EAAE;IACH,MAAM,sBAAsB,GAAG,CAAoC,MAAS,EAAE,EAAE,CAC9E,IAAA,aAAI,EAAC,MAAM,EAAE,MAAM,CAAC,IAAI,CAAC,qBAAY,CAAC,CAAC,CAAC;IAE1C,OAAO,8CAEF,IAAA,eAAM,EAAC,QAAQ,aAAR,QAAQ,cAAR,QAAQ,GAAK,EAAY,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,KAAK,SAAS,CAAC,GAEjE,IAAA,eAAM,EAAC,sBAAsB,CAAC,KAAK,aAAL,KAAK,cAAL,KAAK,GAAI,EAAE,CAAC,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,KAAK,SAAS,CAAC,GAE3E,IAAA,eAAM,EAAC,SAAS,aAAT,SAAS,cAAT,SAAS,GAAK,EAAY,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,KAAK,SAAS,CAAC,CAC7C,CAAC;AAC7B,CAAC,CAAC;AAhCW,QAAA,wBAAwB,4BAgCnC;AAEF,MAAM,qBAAqB,GAAG,CAC5B,KAA2D,EAC3D,EAAE;;IACF,MAAM,EAAE,SAAS,EAAE,oBAAoB,EAAE,qBAAqB,EAAE,GAAG,CAAC,MAAA,KAAK,CAAC,KAAK,mCAC7E,EAAE,CAAwB,CAAC;IAE7B,MAAM,OAAO,GAAG,CAAC,GAAG,CAAC,SAAS,aAAT,SAAS,cAAT,SAAS,GAAI,EAAE,CAAC,CAAC,CAAC;IAEvC,IAAI,oBAAoB,EAAE;QACxB,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,mCAAmC,oBAAoB,EAAE,CAAC,CAAC;QAC3E,MAAM,CAAC,SAAS,EAAE,KAAK,CAAC,GAAG,oBAAoB,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC3D,MAAM,eAAe,GAAG,MAAM,IAAA,8BAAoB,EAAM,SAAS,EAAE,KAAK,EAAE,EAAE,EAAE,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,CAAC;QAC5F,OAAO,CAAC,IAAI,CAAC,GAAG,eAAe,CAAC,CAAC;KAClC;IAED,IAAI,qBAAqB,EAAE;QACzB,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,kCAAkC,CAAC,CAAC;QACpD,MAAM,UAAU,GAAG,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,qBAAqB,CAAC,2CAAI,CAAA,CAAC;QACrE,OAAO,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,CAAC;KAC7B;IAED,OAAO,OAAO,CAAC;AACjB,CAAC,CAAA,CAAC","sourcesContent":["import {\n BasicCrawler,\n CrawlingContext,\n RouterHandler,\n BasicCrawlerOptions,\n CheerioCrawler,\n Router,\n HttpCrawler,\n JSDOMCrawler,\n PlaywrightCrawler,\n PuppeteerCrawler,\n Log,\n Request as CrawleeRequest,\n} from 'crawlee';\nimport { omitBy, pick, defaults } from 'lodash';\nimport * as Sentry from '@sentry/node';\nimport { gotScraping } from 'got-scraping';\n\nimport type { CrawlerMeta, CrawlerType } from '../../types';\nimport type { MaybePromise, PickPartial } from '../../utils/types';\nimport { createErrorHandler } from '../error/errorHandler';\nimport { setupSentry } from '../error/sentry';\nimport { type PushDataOptions, itemCacheKey, pushData } from '../io/pushData';\nimport { getColumnFromDataset } from '../io/dataset';\nimport { PushRequestsOptions, pushRequests } from '../io/pushRequests';\nimport type { CrawleeOneIO } from '../integrations/types';\nimport { apifyIO } from '../integrations/apify';\nimport { registerHandlers, setupDefaultRoute } from '../router';\nimport {\n CrawlerConfigActorInput,\n OutputActorInput,\n MetamorphActorInput,\n PrivacyActorInput,\n crawlerInput,\n StartUrlsActorInput,\n InputActorInput,\n RequestActorInput,\n AllActorInputs,\n LoggingActorInput,\n} from '../config';\nimport { logLevelHandlerWrapper, logLevelToCrawlee } from '../log';\nimport type {\n ActorContext,\n ActorDefinition,\n ActorHookContext,\n ActorRouterContext,\n Metamorph,\n RunCrawler,\n} from './types';\n\nconst actorClassByType = {\n basic: BasicCrawler,\n http: HttpCrawler,\n cheerio: CheerioCrawler,\n jsdom: JSDOMCrawler,\n playwright: PlaywrightCrawler,\n puppeteer: PuppeteerCrawler,\n} satisfies Record<CrawlerType, { new (options: Record<string, any>): any }>;\n\nconst isRouter = (r: any): r is RouterHandler<any> => {\n return !!((r as RouterHandler).addHandler && (r as RouterHandler).addDefaultHandler);\n};\nconst isFunc = (f: any): f is (...args: any[]) => any => {\n return typeof f === 'function';\n};\n\n/** Run a function that was defined as a string via Actor input */\nconst genHookFn = <\n Ctx extends CrawlingContext<any> = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO\n>(\n actor: Pick<ActorContext<Ctx, Labels, Input, TIO>, 'input' | 'state' | 'io'>,\n fnStr?: string\n) => {\n if (!fnStr) return null;\n\n const hookCtx = {\n io: actor.io,\n input: actor.input,\n state: actor.state,\n itemCacheKey,\n sendRequest: gotScraping,\n } satisfies ActorHookContext<TIO>;\n\n const hookFn = eval(fnStr);\n if (!hookFn) return null;\n\n return async (...args) => hookFn(...args, hookCtx);\n};\n\n/**\n * Create default configuration for an opinionated Crawlee actor,\n * and run the actor within Apify's `Actor.main()` context.\n *\n * Apify context can be replaced with custom implementation using the `actorConfig.io` option.\n *\n * Read more about what this actor does at {@link createCrawleeOne}.\n */\nexport const createAndRunCrawleeOne = async <\n TCrawlerType extends CrawlerType,\n Ctx extends CrawlerMeta<TCrawlerType, any>['context'] = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO\n>(args: {\n /** String idetifying the actor class, e.g. `'cheerio'` */\n actorType: TCrawlerType;\n actorName: string;\n /** Config passed to the {@link createCrawleeOne} */\n actorConfig: PickPartial<\n ActorDefinition<Ctx, Labels, Input, TIO>,\n 'router' | 'createCrawler' | 'io'\n >;\n /**\n * If using default `createCrawler` implementation, these are crawler options\n * that may be overriden by user input.\n */\n crawlerConfigDefaults?: CrawlerMeta<TCrawlerType, any>['options'];\n /**\n * If using default `createCrawler` implementation, these are crawler options\n * that will override user input.\n *\n * This is useful for testing env.\n */\n crawlerConfigOverrides?: CrawlerMeta<TCrawlerType, any>['options'];\n /**\n * Sentry configuration. If using default `createCrawler` implementation,\n * failed requests are optionally reported to Sentry.\n *\n * To disable Sentry, set `\"enabled\": false`.\n */\n sentryOptions?: Sentry.NodeOptions;\n /**\n * Callback with the created actor. The callback is called within\n * the `Actor.main()` context.\n */\n onActorReady?: (actor: ActorContext<Ctx, Labels, Input, TIO>) => MaybePromise<void>;\n}): Promise<void> => {\n const {\n actorType,\n actorName,\n actorConfig,\n crawlerConfigDefaults,\n crawlerConfigOverrides,\n sentryOptions,\n onActorReady,\n } = args;\n\n const { io = apifyIO as any as TIO } = actorConfig;\n\n await setupSentry({ ...sentryOptions, serverName: actorName }, { io });\n\n // See docs:\n // - https://docs.apify.com/sdk/js/\n // - https://docs.apify.com/academy/deploying-your-code/inputs-outputs#accepting-input-with-the-apify-sdk\n // - https://docs.apify.com/sdk/js/docs/upgrading/upgrading-to-v3#apify-sdk\n await io.runInContext(\n async () => {\n const actorDefaults: ActorDefinition<Ctx, Labels, Input & AllActorInputs, TIO> = {\n io,\n router: Router.create<Ctx>(),\n routerWrappers: ({ input }) => [\n logLevelHandlerWrapper<Ctx, any>(input?.logLevel ?? 'info'),\n ],\n createCrawler: ({ router, proxy, input }) => {\n const options = createHttpCrawlerOptions<\n CrawlerMeta<TCrawlerType, any>['options'],\n Input\n >({\n input,\n defaults: crawlerConfigDefaults,\n overrides: {\n requestHandler: router,\n proxyConfiguration: proxy,\n // Capture errors in a separate (Apify) Dataset and pass errors to Sentry\n failedRequestHandler: createErrorHandler({\n io,\n reportingDatasetId: input?.errorReportingDatasetId ?? 'REPORTING',\n sendToSentry: input?.errorSendToSentry ?? true,\n }),\n ...crawlerConfigOverrides,\n },\n });\n const CrawlerClass = actorClassByType[actorType] as any;\n return new CrawlerClass(options);\n },\n routes: [],\n routeHandlers: {} as any,\n };\n\n const actor = await createCrawleeOne<Ctx, Labels, Input, TIO>({\n ...actorConfig,\n io,\n router: actorConfig.router ?? (actorDefaults.router as any),\n routerWrappers: actorConfig.routerWrappers ?? (actorDefaults.routerWrappers as any),\n createCrawler: actorConfig.createCrawler ?? (actorDefaults.createCrawler as any),\n });\n\n await onActorReady?.(actor);\n },\n { statusMessage: 'Crawling finished!' }\n );\n};\n\n/**\n * Create opinionated Crawlee crawler that uses router for handling requests.\n *\n * This is a quality-of-life function that does the following for you:\n *\n * 1) Full TypeScript coverage - Ensure all components use the same Crawler / CrawlerContext.\n *\n * 2) Get Actor input from `Actor.getInput` if not given.\n *\n * 3) (Optional) Validate Actor input\n *\n * 4) Set up router such that requests that reach default route are\n * redirected to labelled routes based on which item from \"routes\" they match.\n *\n * 5) Register all route handlers for you.\n *\n * 6) (Optional) Wrap all route handlers in a wrapper. Use this e.g.\n * if you want to add a field to the context object, or handle errors\n * from a single place.\n *\n * 7) (Optional) Support transformation and filtering of (scraped) entries,\n * configured via Actor input.\n *\n * 8) (Optional) Support Actor metamorphing, configured via Actor input.\n *\n * 9) Apify context (e.g. calling `Actor.getInput`) can be replaced with custom\n * implementation using the `io` option.\n */\nexport const createCrawleeOne = async <\n Ctx extends CrawlingContext = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO\n>(\n config: PickPartial<ActorDefinition<Ctx, Labels, Input, TIO>, 'io'>\n): Promise<ActorContext<Ctx, Labels, Input, TIO>> => {\n const { io = apifyIO as any as TIO } = config;\n\n // Mutable state that is available to the actor hooks\n const state = {};\n\n // Initialize actor inputs\n const rawInput = config.input\n ? isFunc(config.input)\n ? await config.input({ ...config, io })\n : config.input\n : await io.getInput<Input>();\n const input = Object.freeze(await resolveInput<Input | null>(rawInput, state, { io }));\n\n if (config.validateInput) await config.validateInput(input);\n\n const { logLevel } = (input ?? {}) as LoggingActorInput;\n const log = new Log({ level: logLevel ? logLevelToCrawlee[logLevel] : undefined });\n\n // This is context that is available to options that use initialization function\n const getConfig = () => ({ ...config, input, state, io, log });\n\n // Set up proxy\n const defaultProxy =\n config.proxy == null ? await io.createDefaultProxyConfiguration(input ?? undefined) : undefined;\n const proxy =\n config.proxy == null\n ? defaultProxy\n : isFunc(config.proxy)\n ? await config.proxy(getConfig())\n : config.proxy;\n\n // Run initialization functions\n const router: RouterHandler<Ctx> = isRouter(config.router)\n ? config.router\n : await (config.router as any)(getConfig());\n const routes = isFunc(config.routes) ? await config.routes(getConfig()) : config.routes; // prettier-ignore\n const routeHandlers = isFunc(config.routeHandlers) ? await config.routeHandlers(getConfig()) : config.routeHandlers; // prettier-ignore\n const routerWrappers = isFunc(config.routerWrappers) ? await config.routerWrappers(getConfig()) : config.routerWrappers; // prettier-ignore\n\n // Create Crawlee crawler\n const getActorCtx = () => ({\n io,\n router,\n routes,\n routeHandlers,\n proxy,\n config,\n input,\n state,\n log,\n });\n const crawler = await config.createCrawler(getActorCtx());\n\n // Create actor (our custom entity)\n const preActor = { crawler, ...getActorCtx() };\n const runCrawler = createScopedCrawlerRun(preActor);\n const metamorph = createScopedMetamorph(preActor);\n const scopedPushData = createScopedPushData(preActor);\n const scopedPushRequest = createScopedPushRequests(preActor);\n const startUrls = await getStartUrlsFromInput(preActor);\n\n const actor = {\n ...preActor,\n crawler,\n runCrawler,\n metamorph,\n pushData: scopedPushData,\n pushRequests: scopedPushRequest,\n startUrls,\n } satisfies ActorContext<Ctx, Labels, Input, TIO>;\n\n // Extra data that we make available to the route handlers\n const routerContext = { actor, pushData: scopedPushData };\n\n // Set up router\n await setupDefaultRoute<Ctx, ActorRouterContext<Ctx, Labels, Input, TIO>, Labels, Input>({\n io,\n router,\n routerWrappers,\n routerContext,\n routes,\n routeHandlers,\n input,\n });\n await registerHandlers<Ctx, ActorRouterContext<Ctx, Labels, Input, TIO>, Labels>({\n router,\n routerWrappers,\n routerContext,\n routeHandlers,\n });\n\n // Now that the actor is ready, enqueue the URLs right away\n await scopedPushRequest(startUrls as CrawleeRequest[]);\n\n return actor;\n};\n\nconst resolveInput = async <T extends Record<string, any> | null>(\n input: object | null,\n state: Record<string, unknown>,\n options?: { io?: CrawleeOneIO }\n) => {\n const { io = apifyIO as CrawleeOneIO } = options ?? {};\n const { inputExtendUrl, inputExtendFromFunction } = (input ?? {}) as InputActorInput;\n\n const inputFromUrl = inputExtendUrl ? await gotScraping.get(inputExtendUrl).json<object>() : null;\n const inputFn = genHookFn({ state, input, io }, inputExtendFromFunction);\n const inputFromFunc = (await inputFn?.()) ?? null;\n const extendedInput = { ...inputFromUrl, ...inputFromFunc, ...input };\n\n return extendedInput as T;\n};\n\n/**\n * Create a function that wraps `crawler.run(requests, runOtions)` with additional\n * features like:\n * - Automatically metamorph into another actor after the run finishes\n */\nconst createScopedCrawlerRun = <\n Ctx extends CrawlingContext<any> = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO\n>(\n actor: Omit<\n ActorContext<Ctx, Labels, Input, TIO>,\n 'runCrawler' | 'metamorph' | 'pushData' | 'pushRequests' | 'startUrls'\n >\n) => {\n const {\n requestTransformBefore,\n requestTransformAfter,\n requestFilterBefore,\n requestFilterAfter,\n outputTransformBefore,\n outputTransformAfter,\n outputFilterBefore,\n outputFilterAfter,\n outputCacheStoreId,\n outputCacheActionOnResult,\n } = (actor.input ?? {}) as OutputActorInput & RequestActorInput;\n\n const metamorph = createScopedMetamorph(actor);\n\n const runCrawler: RunCrawler<Ctx> = async (requests, options) => {\n // Clear cache if it was set from the input\n if (outputCacheStoreId && outputCacheActionOnResult === 'overwrite') {\n const store = await actor.io.openKeyValueStore(outputCacheStoreId);\n await store.drop();\n }\n\n await genHookFn(actor, outputTransformBefore)?.();\n await genHookFn(actor, outputFilterBefore)?.();\n await genHookFn(actor, requestTransformBefore)?.();\n await genHookFn(actor, requestFilterBefore)?.();\n\n const runRes = await actor.crawler.run(requests, options);\n\n await genHookFn(actor, outputTransformAfter)?.();\n await genHookFn(actor, outputFilterAfter)?.();\n await genHookFn(actor, requestTransformAfter)?.();\n await genHookFn(actor, requestFilterAfter)?.();\n\n // Trigger metamorph if it was set from the input\n await metamorph();\n\n return runRes;\n };\n\n return runCrawler;\n};\n\n/** Create a function that triggers metamorph, using Actor's inputs as defaults. */\nconst createScopedMetamorph = (actor: Pick<ActorContext, 'input' | 'io'>) => {\n // Trigger metamorph if it was set from the input\n const metamorph: Metamorph = async (overrides?: MetamorphActorInput) => {\n const {\n metamorphActorId,\n metamorphActorBuild,\n metamorphActorInput,\n } = defaults({}, overrides, actor.input ?? {}); // prettier-ignore\n\n if (!metamorphActorId) return;\n\n await actor.io.triggerDownstreamCrawler(metamorphActorId, metamorphActorInput, {\n build: metamorphActorBuild,\n });\n };\n\n return metamorph;\n};\n\n/** pushData wrapper that pre-populates options based on actor input */\nconst createScopedPushData = (actor: Pick<ActorContext, 'input' | 'state' | 'io' | 'log'>) => {\n const {\n includePersonalData,\n requestQueueId,\n outputMaxEntries,\n outputTransform,\n outputFilter,\n outputDatasetId,\n outputPickFields,\n outputRenameFields,\n outputCacheStoreId,\n outputCachePrimaryKeys,\n outputCacheActionOnResult,\n } = (actor.input ?? {}) as OutputActorInput & PrivacyActorInput & RequestActorInput;\n\n const scopedPushData: ActorContext['pushData'] = async (entries, ctx, options) => {\n const transformFn = genHookFn(actor, outputTransform);\n const filterFn = genHookFn(actor, outputFilter);\n\n const mergedOptions = {\n io: actor.io,\n log: actor.log,\n showPrivate: includePersonalData,\n maxCount: outputMaxEntries,\n pickKeys: outputPickFields,\n remapKeys: outputRenameFields,\n transform: transformFn ? (item) => transformFn(item) : undefined,\n filter: filterFn ? (item) => filterFn(item) : undefined,\n datasetId: outputDatasetId,\n requestQueueId,\n cacheStoreId: outputCacheStoreId,\n cachePrimaryKeys: outputCachePrimaryKeys,\n cacheActionOnResult: outputCacheActionOnResult,\n ...options,\n } satisfies PushDataOptions<object>;\n\n return pushData(entries, ctx, mergedOptions);\n };\n\n return scopedPushData;\n};\n\n/** pushRequests wrapper that pre-populates options based on actor input */\nconst createScopedPushRequests = (actor: Pick<ActorContext, 'input' | 'state' | 'io' | 'log'>) => {\n const { requestQueueId, requestMaxEntries, requestTransform, requestFilter } = (actor.input ??\n {}) as RequestActorInput;\n\n const scopedPushRequest: ActorContext['pushRequests'] = async (entries, options) => {\n const transformFn = genHookFn(actor, requestTransform);\n const filterFn = genHookFn(actor, requestFilter);\n\n const mergedOptions = {\n io: actor.io,\n log: actor.log,\n maxCount: requestMaxEntries,\n transform: transformFn ? (item) => transformFn(item) : undefined,\n filter: filterFn ? (item) => filterFn(item) : undefined,\n requestQueueId,\n ...options,\n } satisfies PushRequestsOptions<any>;\n\n return pushRequests(entries, mergedOptions);\n };\n\n return scopedPushRequest;\n};\n\n/** Given the actor input, create common crawler options. */\nexport const createHttpCrawlerOptions = <\n TOpts extends BasicCrawlerOptions<any> = BasicCrawlerOptions,\n Input extends Record<string, any> = Record<string, any>\n>({\n input,\n defaults,\n overrides,\n}: {\n /** Actor input */\n input: Input | null;\n /**\n * Default config options set by us. These may be overriden\n * by values from actor input (set by user).\n */\n defaults?: TOpts;\n /**\n * These config options will overwrite both the default and user\n * options. This is useful for hard-setting values e.g. in tests.\n */\n overrides?: TOpts;\n}) => {\n const pickCrawlerInputFields = <T extends CrawlerConfigActorInput>(config: T) =>\n pick(config, Object.keys(crawlerInput));\n\n return {\n // ----- 1. DEFAULTS -----\n ...omitBy(defaults ?? ({} as TOpts), (field) => field === undefined),\n // ----- 2. CONFIG FROM INPUT -----\n ...omitBy(pickCrawlerInputFields(input ?? {}), (field) => field === undefined),\n // ----- 3. OVERRIDES - E.G. TEST CONFIG -----\n ...omitBy(overrides ?? ({} as TOpts), (field) => field === undefined),\n } satisfies Partial<TOpts>;\n};\n\nconst getStartUrlsFromInput = async (\n actor: Pick<ActorContext, 'input' | 'state' | 'io' | 'log'>\n) => {\n const { startUrls, startUrlsFromDataset, startUrlsFromFunction } = (actor.input ??\n {}) as StartUrlsActorInput;\n\n const urlsAgg = [...(startUrls ?? [])];\n\n if (startUrlsFromDataset) {\n actor.log.debug(`Loading start URLs from Dataset ${startUrlsFromDataset}`);\n const [datasetId, field] = startUrlsFromDataset.split('#');\n const urlsFromDataset = await getColumnFromDataset<any>(datasetId, field, { io: actor.io });\n urlsAgg.push(...urlsFromDataset);\n }\n\n if (startUrlsFromFunction) {\n actor.log.debug(`Loading start URLs from function`);\n const urlsFromFn = await genHookFn(actor, startUrlsFromFunction)?.();\n urlsAgg.push(...urlsFromFn);\n }\n\n return urlsAgg;\n};\n"]}
|
|
1
|
+
{"version":3,"file":"actor.js","sourceRoot":"","sources":["../../../../src/lib/actor/actor.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,qCAaiB;AACjB,mCAAgD;AAEhD,+CAA2C;AAI3C,wDAA2D;AAC3D,4CAA8C;AAC9C,6CAA8E;AAC9E,2CAAqD;AACrD,qDAAuE;AAEvE,iDAAgD;AAChD,6CAAuE;AACvE,sCAWmB;AACnB,gCAAmE;AAUnE,MAAM,gBAAgB,GAAG;IACvB,KAAK,EAAE,sBAAY;IACnB,IAAI,EAAE,qBAAW;IACjB,OAAO,EAAE,wBAAc;IACvB,KAAK,EAAE,sBAAY;IACnB,UAAU,EAAE,2BAAiB;IAC7B,SAAS,EAAE,0BAAgB;CAC+C,CAAC;AAE7E,MAAM,QAAQ,GAAG,CAAC,CAAM,EAA2B,EAAE;IACnD,OAAO,CAAC,CAAC,CAAE,CAAmB,CAAC,UAAU,IAAK,CAAmB,CAAC,iBAAiB,CAAC,CAAC;AACvF,CAAC,CAAC;AACF,MAAM,MAAM,GAAG,CAAC,CAAM,EAAgC,EAAE;IACtD,OAAO,OAAO,CAAC,KAAK,UAAU,CAAC;AACjC,CAAC,CAAC;AAEF,kEAAkE;AAClE,MAAM,SAAS,GAAG,CAMhB,KAA4E,EAC5E,KAAc,EACd,EAAE;IACF,IAAI,CAAC,KAAK;QAAE,OAAO,IAAI,CAAC;IAExB,MAAM,OAAO,GAAG;QACd,EAAE,EAAE,KAAK,CAAC,EAAE;QACZ,KAAK,EAAE,KAAK,CAAC,KAAK;QAClB,KAAK,EAAE,KAAK,CAAC,KAAK;QAClB,YAAY,EAAZ,uBAAY;QACZ,WAAW,EAAE,0BAAW;KACO,CAAC;IAElC,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC;IAC3B,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC;IAEzB,OAAO,CAAO,GAAG,IAAI,EAAE,EAAE,kDAAC,OAAA,MAAM,CAAC,GAAG,IAAI,EAAE,OAAO,CAAC,CAAA,GAAA,CAAC;AACrD,CAAC,CAAC;AAEF;;;;;;;GAOG;AACI,MAAM,sBAAsB,GAAG,CAMpC,IAiCD,EAAiB,EAAE;IAClB,MAAM,EACJ,SAAS,EACT,SAAS,EACT,WAAW,EACX,qBAAqB,EACrB,sBAAsB,EACtB,aAAa,EACb,YAAY,GACb,GAAG,IAAI,CAAC;IAET,MAAM,EAAE,EAAE,GAAG,eAAqB,EAAE,GAAG,WAAW,CAAC;IAEnD,MAAM,IAAA,oBAAW,kCAAM,aAAa,KAAE,UAAU,EAAE,SAAS,KAAI,EAAE,EAAE,EAAE,CAAC,CAAC;IAEvE,YAAY;IACZ,mCAAmC;IACnC,yGAAyG;IACzG,2EAA2E;IAC3E,MAAM,EAAE,CAAC,YAAY,CACnB,GAAS,EAAE;;QACT,MAAM,aAAa,GAA8D;YAC/E,EAAE;YACF,MAAM,EAAE,gBAAM,CAAC,MAAM,EAAO;YAC5B,cAAc,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE;;gBAAC,OAAA;oBAC7B,IAAA,4BAAsB,EAAW,MAAA,KAAK,aAAL,KAAK,uBAAL,KAAK,CAAE,QAAQ,mCAAI,MAAM,CAAC;iBAC5D,CAAA;aAAA;YACD,aAAa,EAAE,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE,EAAE;;gBAC1C,MAAM,OAAO,GAAG,IAAA,gCAAwB,EAGtC;oBACA,KAAK;oBACL,QAAQ,EAAE,qBAAqB;oBAC/B,SAAS,kBACP,cAAc,EAAE,MAAM,EACtB,kBAAkB,EAAE,KAAK;wBACzB,yEAAyE;wBACzE,oBAAoB,EAAE,IAAA,iCAAkB,EAAC;4BACvC,EAAE;4BACF,kBAAkB,EAAE,MAAA,KAAK,aAAL,KAAK,uBAAL,KAAK,CAAE,uBAAuB,mCAAI,WAAW;4BACjE,YAAY,EAAE,MAAA,KAAK,aAAL,KAAK,uBAAL,KAAK,CAAE,iBAAiB,mCAAI,IAAI;yBAC/C,CAAC,IACC,sBAAsB,CAC1B;iBACF,CAAC,CAAC;gBACH,MAAM,YAAY,GAAG,gBAAgB,CAAC,SAAS,CAAQ,CAAC;gBACxD,OAAO,IAAI,YAAY,CAAC,OAAO,CAAC,CAAC;YACnC,CAAC;YACD,MAAM,EAAE,EAAE;YACV,aAAa,EAAE,EAAS;SACzB,CAAC;QAEF,MAAM,KAAK,GAAG,MAAM,IAAA,wBAAgB,kCAC/B,WAAW,KACd,EAAE,EACF,MAAM,EAAE,MAAA,WAAW,CAAC,MAAM,mCAAK,aAAa,CAAC,MAAc,EAC3D,cAAc,EAAE,MAAA,WAAW,CAAC,cAAc,mCAAK,aAAa,CAAC,cAAsB,EACnF,aAAa,EAAE,MAAA,WAAW,CAAC,aAAa,mCAAK,aAAa,CAAC,aAAqB,IAChF,CAAC;QAEH,MAAM,CAAA,YAAY,aAAZ,YAAY,uBAAZ,YAAY,CAAG,KAAK,CAAC,CAAA,CAAC;IAC9B,CAAC,CAAA,EACD,EAAE,aAAa,EAAE,oBAAoB,EAAE,CACxC,CAAC;AACJ,CAAC,CAAA,CAAC;AAxGW,QAAA,sBAAsB,0BAwGjC;AAEF;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AACI,MAAM,gBAAgB,GAAG,CAM9B,MAAmE,EACnB,EAAE;IAClD,MAAM,EAAE,EAAE,GAAG,eAAqB,EAAE,GAAG,MAAM,CAAC;IAE9C,qDAAqD;IACrD,MAAM,KAAK,GAAG,EAAE,CAAC;IAEjB,0BAA0B;IAC1B,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK;QAC3B,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC;YACpB,CAAC,CAAC,MAAM,MAAM,CAAC,KAAK,iCAAM,MAAM,KAAE,EAAE,IAAG;YACvC,CAAC,CAAC,MAAM,CAAC,KAAK;QAChB,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAS,CAAC;IAC/B,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,YAAY,CAAe,QAAQ,EAAE,KAAK,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC;IAEvF,IAAI,MAAM,CAAC,aAAa;QAAE,MAAM,MAAM,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;IAE5D,MAAM,EAAE,QAAQ,EAAE,GAAG,CAAC,KAAK,aAAL,KAAK,cAAL,KAAK,GAAI,EAAE,CAAsB,CAAC;IACxD,MAAM,GAAG,GAAG,IAAI,aAAG,CAAC,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAC,CAAC,uBAAiB,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC;IAEnF,gFAAgF;IAChF,MAAM,SAAS,GAAG,GAAG,EAAE,CAAC,iCAAM,MAAM,KAAE,KAAK,EAAE,KAAK,EAAE,EAAE,EAAE,GAAG,IAAG,CAAC;IAE/D,eAAe;IACf,MAAM,YAAY,GAChB,MAAM,CAAC,KAAK,IAAI,IAAI,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,+BAA+B,CAAC,KAAK,aAAL,KAAK,cAAL,KAAK,GAAI,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IAClG,MAAM,KAAK,GACT,MAAM,CAAC,KAAK,IAAI,IAAI;QAClB,CAAC,CAAC,YAAY;QACd,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC;YACtB,CAAC,CAAC,MAAM,MAAM,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC;YACjC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;IAEnB,+BAA+B;IAC/B,MAAM,MAAM,GAAuB,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC;QACxD,CAAC,CAAC,MAAM,CAAC,MAAM;QACf,CAAC,CAAC,MAAO,MAAM,CAAC,MAAc,CAAC,SAAS,EAAE,CAAC,CAAC;IAC9C,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,kBAAkB;IAC3G,MAAM,aAAa,GAAG,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,aAAa,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,kBAAkB;IACvI,MAAM,cAAc,GAAG,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,cAAc,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,kBAAkB;IAE3I,yBAAyB;IACzB,MAAM,WAAW,GAAG,GAAG,EAAE,CAAC,CAAC;QACzB,EAAE;QACF,MAAM;QACN,MAAM;QACN,aAAa;QACb,KAAK;QACL,MAAM;QACN,KAAK;QACL,KAAK;QACL,GAAG;KACJ,CAAC,CAAC;IACH,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC,WAAW,EAAE,CAAC,CAAC;IAE1D,mCAAmC;IACnC,MAAM,QAAQ,mBAAK,OAAO,IAAK,WAAW,EAAE,CAAE,CAAC;IAC/C,MAAM,UAAU,GAAG,sBAAsB,CAAC,QAAQ,CAAC,CAAC;IACpD,MAAM,SAAS,GAAG,qBAAqB,CAAC,QAAQ,CAAC,CAAC;IAClD,MAAM,cAAc,GAAG,oBAAoB,CAAC,QAAQ,CAAC,CAAC;IACtD,MAAM,iBAAiB,GAAG,wBAAwB,CAAC,QAAQ,CAAC,CAAC;IAC7D,MAAM,SAAS,GAAG,MAAM,qBAAqB,CAAC,QAAQ,CAAC,CAAC;IAExD,MAAM,KAAK,GAAG,gCACT,QAAQ,KACX,OAAO;QACP,UAAU;QACV,SAAS,EACT,QAAQ,EAAE,cAAc,EACxB,YAAY,EAAE,iBAAiB,EAC/B,SAAS,GACsC,CAAC;IAElD,0DAA0D;IAC1D,MAAM,aAAa,GAAG,EAAE,KAAK,EAAE,QAAQ,EAAE,cAAc,EAAE,CAAC;IAE1D,gBAAgB;IAChB,MAAM,IAAA,0BAAiB,EAAkE;QACvF,EAAE;QACF,MAAM;QACN,cAAc;QACd,aAAa;QACb,MAAM;QACN,aAAa;QACb,KAAK;KACN,CAAC,CAAC;IACH,MAAM,IAAA,yBAAgB,EAA2D;QAC/E,MAAM;QACN,cAAc;QACd,aAAa;QACb,aAAa;KACd,CAAC,CAAC;IAEH,2DAA2D;IAC3D,MAAM,iBAAiB,CAAC,SAA6B,CAAC,CAAC;IAEvD,OAAO,KAAK,CAAC;AACf,CAAC,CAAA,CAAC;AAvGW,QAAA,gBAAgB,oBAuG3B;AAEF,MAAM,YAAY,GAAG,CACnB,KAAoB,EACpB,KAA8B,EAC9B,OAA+B,EAC/B,EAAE;;IACF,MAAM,EAAE,EAAE,GAAG,eAAuB,EAAE,GAAG,OAAO,aAAP,OAAO,cAAP,OAAO,GAAI,EAAE,CAAC;IACvD,MAAM,EAAE,cAAc,EAAE,uBAAuB,EAAE,GAAG,CAAC,KAAK,aAAL,KAAK,cAAL,KAAK,GAAI,EAAE,CAAoB,CAAC;IAErF,MAAM,YAAY,GAAG,cAAc,CAAC,CAAC,CAAC,MAAM,0BAAW,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC,IAAI,EAAU,CAAC,CAAC,CAAC,IAAI,CAAC;IAClG,MAAM,OAAO,GAAG,SAAS,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE,EAAE,EAAE,uBAAuB,CAAC,CAAC;IACzE,MAAM,aAAa,GAAG,MAAA,CAAC,MAAM,CAAA,OAAO,aAAP,OAAO,uBAAP,OAAO,EAAI,CAAA,CAAC,mCAAI,IAAI,CAAC;IAClD,MAAM,aAAa,iDAAQ,YAAY,GAAK,aAAa,GAAK,KAAK,CAAE,CAAC;IAEtE,OAAO,aAAkB,CAAC;AAC5B,CAAC,CAAA,CAAC;AAEF;;;;GAIG;AACH,MAAM,sBAAsB,GAAG,CAM7B,KAGC,EACD,EAAE;;IACF,MAAM,EACJ,sBAAsB,EACtB,qBAAqB,EACrB,mBAAmB,EACnB,kBAAkB,EAClB,qBAAqB,EACrB,oBAAoB,EACpB,kBAAkB,EAClB,iBAAiB,EACjB,kBAAkB,EAClB,yBAAyB,GAC1B,GAAG,CAAC,MAAA,KAAK,CAAC,KAAK,mCAAI,EAAE,CAAyC,CAAC;IAEhE,MAAM,SAAS,GAAG,qBAAqB,CAAC,KAAK,CAAC,CAAC;IAE/C,MAAM,UAAU,GAAoB,CAAO,QAAQ,EAAE,OAAO,EAAE,EAAE;;QAC9D,2CAA2C;QAC3C,IAAI,kBAAkB,IAAI,yBAAyB,KAAK,WAAW,EAAE;YACnE,MAAM,KAAK,GAAG,MAAM,KAAK,CAAC,EAAE,CAAC,iBAAiB,CAAC,kBAAkB,CAAC,CAAC;YACnE,MAAM,KAAK,CAAC,IAAI,EAAE,CAAC;SACpB;QAED,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,qBAAqB,CAAC,2CAAI,CAAA,CAAC;QAClD,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,kBAAkB,CAAC,2CAAI,CAAA,CAAC;QAC/C,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,sBAAsB,CAAC,2CAAI,CAAA,CAAC;QACnD,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,mBAAmB,CAAC,2CAAI,CAAA,CAAC;QAEhD,MAAM,MAAM,GAAG,MAAM,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QAE1D,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,oBAAoB,CAAC,2CAAI,CAAA,CAAC;QACjD,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,iBAAiB,CAAC,2CAAI,CAAA,CAAC;QAC9C,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,qBAAqB,CAAC,2CAAI,CAAA,CAAC;QAClD,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,kBAAkB,CAAC,2CAAI,CAAA,CAAC;QAE/C,iDAAiD;QACjD,MAAM,SAAS,EAAE,CAAC;QAElB,OAAO,MAAM,CAAC;IAChB,CAAC,CAAA,CAAC;IAEF,OAAO,UAAU,CAAC;AACpB,CAAC,CAAC;AAEF,mFAAmF;AACnF,MAAM,qBAAqB,GAAG,CAAC,KAAyC,EAAE,EAAE;IAC1E,iDAAiD;IACjD,MAAM,SAAS,GAAc,CAAO,SAA+B,EAAE,EAAE;;QACrE,MAAM,EACJ,gBAAgB,EAChB,mBAAmB,EACnB,mBAAmB,GACpB,GAAG,IAAA,iBAAQ,EAAC,EAAE,EAAE,SAAS,EAAE,MAAA,KAAK,CAAC,KAAK,mCAAI,EAAE,CAAC,CAAC,CAAC,kBAAkB;QAElE,IAAI,CAAC,gBAAgB;YAAE,OAAO;QAE9B,MAAM,KAAK,CAAC,EAAE,CAAC,wBAAwB,CAAC,gBAAgB,EAAE,mBAAmB,EAAE;YAC7E,KAAK,EAAE,mBAAmB;SAC3B,CAAC,CAAC;IACL,CAAC,CAAA,CAAC;IAEF,OAAO,SAAS,CAAC;AACnB,CAAC,CAAC;AAEF,uEAAuE;AACvE,MAAM,oBAAoB,GAAG,CAAC,KAA2D,EAAE,EAAE;;IAC3F,MAAM,EACJ,mBAAmB,EACnB,cAAc,EACd,gBAAgB,EAChB,eAAe,EACf,YAAY,EACZ,eAAe,EACf,gBAAgB,EAChB,kBAAkB,EAClB,kBAAkB,EAClB,sBAAsB,EACtB,yBAAyB,GAC1B,GAAG,CAAC,MAAA,KAAK,CAAC,KAAK,mCAAI,EAAE,CAA6D,CAAC;IAEpF,MAAM,cAAc,GAA6B,CAAO,OAAO,EAAE,GAAG,EAAE,OAAO,EAAE,EAAE;QAC/E,MAAM,WAAW,GAAG,SAAS,CAAC,KAAK,EAAE,eAAe,CAAC,CAAC;QACtD,MAAM,QAAQ,GAAG,SAAS,CAAC,KAAK,EAAE,YAAY,CAAC,CAAC;QAEhD,MAAM,aAAa,GAAG,gBACpB,EAAE,EAAE,KAAK,CAAC,EAAE,EACZ,GAAG,EAAE,KAAK,CAAC,GAAG,EACd,WAAW,EAAE,mBAAmB,EAChC,QAAQ,EAAE,gBAAgB,EAC1B,QAAQ,EAAE,gBAAgB,EAC1B,SAAS,EAAE,kBAAkB,EAC7B,SAAS,EAAE,WAAW,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,EAChE,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,EACvD,SAAS,EAAE,eAAe,EAC1B,cAAc,EACd,YAAY,EAAE,kBAAkB,EAChC,gBAAgB,EAAE,sBAAsB,EACxC,mBAAmB,EAAE,yBAAyB,IAC3C,OAAO,CACuB,CAAC;QAEpC,OAAO,IAAA,mBAAQ,EAAC,OAAO,EAAE,GAAG,EAAE,aAAa,CAAC,CAAC;IAC/C,CAAC,CAAA,CAAC;IAEF,OAAO,cAAc,CAAC;AACxB,CAAC,CAAC;AAEF,2EAA2E;AAC3E,MAAM,wBAAwB,GAAG,CAAC,KAA2D,EAAE,EAAE;;IAC/F,MAAM,EAAE,cAAc,EAAE,iBAAiB,EAAE,gBAAgB,EAAE,aAAa,EAAE,GAAG,CAAC,MAAA,KAAK,CAAC,KAAK,mCACzF,EAAE,CAAsB,CAAC;IAE3B,MAAM,iBAAiB,GAAiC,CAAO,OAAO,EAAE,OAAO,EAAE,EAAE;QACjF,MAAM,WAAW,GAAG,SAAS,CAAC,KAAK,EAAE,gBAAgB,CAAC,CAAC;QACvD,MAAM,QAAQ,GAAG,SAAS,CAAC,KAAK,EAAE,aAAa,CAAC,CAAC;QAEjD,MAAM,aAAa,GAAG,gBACpB,EAAE,EAAE,KAAK,CAAC,EAAE,EACZ,GAAG,EAAE,KAAK,CAAC,GAAG,EACd,QAAQ,EAAE,iBAAiB,EAC3B,SAAS,EAAE,WAAW,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,EAChE,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,EACvD,cAAc,IACX,OAAO,CACwB,CAAC;QAErC,OAAO,IAAA,2BAAY,EAAC,OAAO,EAAE,aAAa,CAAC,CAAC;IAC9C,CAAC,CAAA,CAAC;IAEF,OAAO,iBAAiB,CAAC;AAC3B,CAAC,CAAC;AAEF,4DAA4D;AACrD,MAAM,wBAAwB,GAAG,CAGtC,EACA,KAAK,EACL,QAAQ,EACR,SAAS,GAcV,EAAE,EAAE;IACH,MAAM,sBAAsB,GAAG,CAAoC,MAAS,EAAE,EAAE,CAC9E,IAAA,aAAI,EAAC,MAAM,EAAE,MAAM,CAAC,IAAI,CAAC,qBAAY,CAAC,CAAC,CAAC;IAE1C,OAAO,8CAEF,IAAA,eAAM,EAAC,QAAQ,aAAR,QAAQ,cAAR,QAAQ,GAAK,EAAY,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,KAAK,SAAS,CAAC,GAEjE,IAAA,eAAM,EAAC,sBAAsB,CAAC,KAAK,aAAL,KAAK,cAAL,KAAK,GAAI,EAAE,CAAC,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,KAAK,SAAS,CAAC,GAE3E,IAAA,eAAM,EAAC,SAAS,aAAT,SAAS,cAAT,SAAS,GAAK,EAAY,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,KAAK,SAAS,CAAC,CAC7C,CAAC;AAC7B,CAAC,CAAC;AAhCW,QAAA,wBAAwB,4BAgCnC;AAEF,MAAM,qBAAqB,GAAG,CAC5B,KAA2D,EAC3D,EAAE;;IACF,MAAM,EAAE,SAAS,EAAE,oBAAoB,EAAE,qBAAqB,EAAE,GAAG,CAAC,MAAA,KAAK,CAAC,KAAK,mCAC7E,EAAE,CAAwB,CAAC;IAE7B,MAAM,OAAO,GAAG,CAAC,GAAG,CAAC,SAAS,aAAT,SAAS,cAAT,SAAS,GAAI,EAAE,CAAC,CAAC,CAAC;IAEvC,IAAI,oBAAoB,EAAE;QACxB,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,mCAAmC,oBAAoB,EAAE,CAAC,CAAC;QAC3E,MAAM,CAAC,SAAS,EAAE,KAAK,CAAC,GAAG,oBAAoB,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC3D,MAAM,eAAe,GAAG,MAAM,IAAA,8BAAoB,EAAM,SAAS,EAAE,KAAK,EAAE,EAAE,EAAE,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,CAAC;QAC5F,OAAO,CAAC,IAAI,CAAC,GAAG,eAAe,CAAC,CAAC;KAClC;IAED,IAAI,qBAAqB,EAAE;QACzB,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,kCAAkC,CAAC,CAAC;QACpD,MAAM,UAAU,GAAG,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,qBAAqB,CAAC,2CAAI,CAAA,CAAC;QACrE,OAAO,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,CAAC;KAC7B;IAED,OAAO,OAAO,CAAC;AACjB,CAAC,CAAA,CAAC","sourcesContent":["import {\n BasicCrawler,\n CrawlingContext,\n RouterHandler,\n BasicCrawlerOptions,\n CheerioCrawler,\n Router,\n HttpCrawler,\n JSDOMCrawler,\n PlaywrightCrawler,\n PuppeteerCrawler,\n Log,\n Request as CrawleeRequest,\n} from 'crawlee';\nimport { omitBy, pick, defaults } from 'lodash';\nimport * as Sentry from '@sentry/node';\nimport { gotScraping } from 'got-scraping';\n\nimport type { CrawlerMeta, CrawlerType } from '../../types';\nimport type { MaybePromise, PickPartial } from '../../utils/types';\nimport { createErrorHandler } from '../error/errorHandler';\nimport { setupSentry } from '../error/sentry';\nimport { type PushDataOptions, itemCacheKey, pushData } from '../io/pushData';\nimport { getColumnFromDataset } from '../io/dataset';\nimport { PushRequestsOptions, pushRequests } from '../io/pushRequests';\nimport type { CrawleeOneIO } from '../integrations/types';\nimport { apifyIO } from '../integrations/apify';\nimport { registerHandlers, setupDefaultRoute } from '../router/router';\nimport {\n CrawlerConfigActorInput,\n OutputActorInput,\n MetamorphActorInput,\n PrivacyActorInput,\n crawlerInput,\n StartUrlsActorInput,\n InputActorInput,\n RequestActorInput,\n AllActorInputs,\n LoggingActorInput,\n} from '../config';\nimport { logLevelHandlerWrapper, logLevelToCrawlee } from '../log';\nimport type {\n ActorContext,\n ActorDefinition,\n ActorHookContext,\n ActorRouterContext,\n Metamorph,\n RunCrawler,\n} from './types';\n\nconst actorClassByType = {\n basic: BasicCrawler,\n http: HttpCrawler,\n cheerio: CheerioCrawler,\n jsdom: JSDOMCrawler,\n playwright: PlaywrightCrawler,\n puppeteer: PuppeteerCrawler,\n} satisfies Record<CrawlerType, { new (options: Record<string, any>): any }>;\n\nconst isRouter = (r: any): r is RouterHandler<any> => {\n return !!((r as RouterHandler).addHandler && (r as RouterHandler).addDefaultHandler);\n};\nconst isFunc = (f: any): f is (...args: any[]) => any => {\n return typeof f === 'function';\n};\n\n/** Run a function that was defined as a string via Actor input */\nconst genHookFn = <\n Ctx extends CrawlingContext<any> = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO\n>(\n actor: Pick<ActorContext<Ctx, Labels, Input, TIO>, 'input' | 'state' | 'io'>,\n fnStr?: string\n) => {\n if (!fnStr) return null;\n\n const hookCtx = {\n io: actor.io,\n input: actor.input,\n state: actor.state,\n itemCacheKey,\n sendRequest: gotScraping,\n } satisfies ActorHookContext<TIO>;\n\n const hookFn = eval(fnStr);\n if (!hookFn) return null;\n\n return async (...args) => hookFn(...args, hookCtx);\n};\n\n/**\n * Create default configuration for an opinionated Crawlee actor,\n * and run the actor within Apify's `Actor.main()` context.\n *\n * Apify context can be replaced with custom implementation using the `actorConfig.io` option.\n *\n * Read more about what this actor does at {@link createCrawleeOne}.\n */\nexport const createAndRunCrawleeOne = async <\n TCrawlerType extends CrawlerType,\n Ctx extends CrawlerMeta<TCrawlerType, any>['context'] = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO\n>(args: {\n /** String idetifying the actor class, e.g. `'cheerio'` */\n actorType: TCrawlerType;\n actorName: string;\n /** Config passed to the {@link createCrawleeOne} */\n actorConfig: PickPartial<\n ActorDefinition<Ctx, Labels, Input, TIO>,\n 'router' | 'createCrawler' | 'io'\n >;\n /**\n * If using default `createCrawler` implementation, these are crawler options\n * that may be overriden by user input.\n */\n crawlerConfigDefaults?: CrawlerMeta<TCrawlerType, any>['options'];\n /**\n * If using default `createCrawler` implementation, these are crawler options\n * that will override user input.\n *\n * This is useful for testing env.\n */\n crawlerConfigOverrides?: CrawlerMeta<TCrawlerType, any>['options'];\n /**\n * Sentry configuration. If using default `createCrawler` implementation,\n * failed requests are optionally reported to Sentry.\n *\n * To disable Sentry, set `\"enabled\": false`.\n */\n sentryOptions?: Sentry.NodeOptions;\n /**\n * Callback with the created actor. The callback is called within\n * the `Actor.main()` context.\n */\n onActorReady?: (actor: ActorContext<Ctx, Labels, Input, TIO>) => MaybePromise<void>;\n}): Promise<void> => {\n const {\n actorType,\n actorName,\n actorConfig,\n crawlerConfigDefaults,\n crawlerConfigOverrides,\n sentryOptions,\n onActorReady,\n } = args;\n\n const { io = apifyIO as any as TIO } = actorConfig;\n\n await setupSentry({ ...sentryOptions, serverName: actorName }, { io });\n\n // See docs:\n // - https://docs.apify.com/sdk/js/\n // - https://docs.apify.com/academy/deploying-your-code/inputs-outputs#accepting-input-with-the-apify-sdk\n // - https://docs.apify.com/sdk/js/docs/upgrading/upgrading-to-v3#apify-sdk\n await io.runInContext(\n async () => {\n const actorDefaults: ActorDefinition<Ctx, Labels, Input & AllActorInputs, TIO> = {\n io,\n router: Router.create<Ctx>(),\n routerWrappers: ({ input }) => [\n logLevelHandlerWrapper<Ctx, any>(input?.logLevel ?? 'info'),\n ],\n createCrawler: ({ router, proxy, input }) => {\n const options = createHttpCrawlerOptions<\n CrawlerMeta<TCrawlerType, any>['options'],\n Input\n >({\n input,\n defaults: crawlerConfigDefaults,\n overrides: {\n requestHandler: router,\n proxyConfiguration: proxy,\n // Capture errors in a separate (Apify) Dataset and pass errors to Sentry\n failedRequestHandler: createErrorHandler({\n io,\n reportingDatasetId: input?.errorReportingDatasetId ?? 'REPORTING',\n sendToSentry: input?.errorSendToSentry ?? true,\n }),\n ...crawlerConfigOverrides,\n },\n });\n const CrawlerClass = actorClassByType[actorType] as any;\n return new CrawlerClass(options);\n },\n routes: [],\n routeHandlers: {} as any,\n };\n\n const actor = await createCrawleeOne<Ctx, Labels, Input, TIO>({\n ...actorConfig,\n io,\n router: actorConfig.router ?? (actorDefaults.router as any),\n routerWrappers: actorConfig.routerWrappers ?? (actorDefaults.routerWrappers as any),\n createCrawler: actorConfig.createCrawler ?? (actorDefaults.createCrawler as any),\n });\n\n await onActorReady?.(actor);\n },\n { statusMessage: 'Crawling finished!' }\n );\n};\n\n/**\n * Create opinionated Crawlee crawler that uses router for handling requests.\n *\n * This is a quality-of-life function that does the following for you:\n *\n * 1) Full TypeScript coverage - Ensure all components use the same Crawler / CrawlerContext.\n *\n * 2) Get Actor input from `Actor.getInput` if not given.\n *\n * 3) (Optional) Validate Actor input\n *\n * 4) Set up router such that requests that reach default route are\n * redirected to labelled routes based on which item from \"routes\" they match.\n *\n * 5) Register all route handlers for you.\n *\n * 6) (Optional) Wrap all route handlers in a wrapper. Use this e.g.\n * if you want to add a field to the context object, or handle errors\n * from a single place.\n *\n * 7) (Optional) Support transformation and filtering of (scraped) entries,\n * configured via Actor input.\n *\n * 8) (Optional) Support Actor metamorphing, configured via Actor input.\n *\n * 9) Apify context (e.g. calling `Actor.getInput`) can be replaced with custom\n * implementation using the `io` option.\n */\nexport const createCrawleeOne = async <\n Ctx extends CrawlingContext = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO\n>(\n config: PickPartial<ActorDefinition<Ctx, Labels, Input, TIO>, 'io'>\n): Promise<ActorContext<Ctx, Labels, Input, TIO>> => {\n const { io = apifyIO as any as TIO } = config;\n\n // Mutable state that is available to the actor hooks\n const state = {};\n\n // Initialize actor inputs\n const rawInput = config.input\n ? isFunc(config.input)\n ? await config.input({ ...config, io })\n : config.input\n : await io.getInput<Input>();\n const input = Object.freeze(await resolveInput<Input | null>(rawInput, state, { io }));\n\n if (config.validateInput) await config.validateInput(input);\n\n const { logLevel } = (input ?? {}) as LoggingActorInput;\n const log = new Log({ level: logLevel ? logLevelToCrawlee[logLevel] : undefined });\n\n // This is context that is available to options that use initialization function\n const getConfig = () => ({ ...config, input, state, io, log });\n\n // Set up proxy\n const defaultProxy =\n config.proxy == null ? await io.createDefaultProxyConfiguration(input ?? undefined) : undefined;\n const proxy =\n config.proxy == null\n ? defaultProxy\n : isFunc(config.proxy)\n ? await config.proxy(getConfig())\n : config.proxy;\n\n // Run initialization functions\n const router: RouterHandler<Ctx> = isRouter(config.router)\n ? config.router\n : await (config.router as any)(getConfig());\n const routes = isFunc(config.routes) ? await config.routes(getConfig()) : config.routes; // prettier-ignore\n const routeHandlers = isFunc(config.routeHandlers) ? await config.routeHandlers(getConfig()) : config.routeHandlers; // prettier-ignore\n const routerWrappers = isFunc(config.routerWrappers) ? await config.routerWrappers(getConfig()) : config.routerWrappers; // prettier-ignore\n\n // Create Crawlee crawler\n const getActorCtx = () => ({\n io,\n router,\n routes,\n routeHandlers,\n proxy,\n config,\n input,\n state,\n log,\n });\n const crawler = await config.createCrawler(getActorCtx());\n\n // Create actor (our custom entity)\n const preActor = { crawler, ...getActorCtx() };\n const runCrawler = createScopedCrawlerRun(preActor);\n const metamorph = createScopedMetamorph(preActor);\n const scopedPushData = createScopedPushData(preActor);\n const scopedPushRequest = createScopedPushRequests(preActor);\n const startUrls = await getStartUrlsFromInput(preActor);\n\n const actor = {\n ...preActor,\n crawler,\n runCrawler,\n metamorph,\n pushData: scopedPushData,\n pushRequests: scopedPushRequest,\n startUrls,\n } satisfies ActorContext<Ctx, Labels, Input, TIO>;\n\n // Extra data that we make available to the route handlers\n const routerContext = { actor, pushData: scopedPushData };\n\n // Set up router\n await setupDefaultRoute<Ctx, ActorRouterContext<Ctx, Labels, Input, TIO>, Labels, Input>({\n io,\n router,\n routerWrappers,\n routerContext,\n routes,\n routeHandlers,\n input,\n });\n await registerHandlers<Ctx, ActorRouterContext<Ctx, Labels, Input, TIO>, Labels>({\n router,\n routerWrappers,\n routerContext,\n routeHandlers,\n });\n\n // Now that the actor is ready, enqueue the URLs right away\n await scopedPushRequest(startUrls as CrawleeRequest[]);\n\n return actor;\n};\n\nconst resolveInput = async <T extends Record<string, any> | null>(\n input: object | null,\n state: Record<string, unknown>,\n options?: { io?: CrawleeOneIO }\n) => {\n const { io = apifyIO as CrawleeOneIO } = options ?? {};\n const { inputExtendUrl, inputExtendFromFunction } = (input ?? {}) as InputActorInput;\n\n const inputFromUrl = inputExtendUrl ? await gotScraping.get(inputExtendUrl).json<object>() : null;\n const inputFn = genHookFn({ state, input, io }, inputExtendFromFunction);\n const inputFromFunc = (await inputFn?.()) ?? null;\n const extendedInput = { ...inputFromUrl, ...inputFromFunc, ...input };\n\n return extendedInput as T;\n};\n\n/**\n * Create a function that wraps `crawler.run(requests, runOtions)` with additional\n * features like:\n * - Automatically metamorph into another actor after the run finishes\n */\nconst createScopedCrawlerRun = <\n Ctx extends CrawlingContext<any> = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO\n>(\n actor: Omit<\n ActorContext<Ctx, Labels, Input, TIO>,\n 'runCrawler' | 'metamorph' | 'pushData' | 'pushRequests' | 'startUrls'\n >\n) => {\n const {\n requestTransformBefore,\n requestTransformAfter,\n requestFilterBefore,\n requestFilterAfter,\n outputTransformBefore,\n outputTransformAfter,\n outputFilterBefore,\n outputFilterAfter,\n outputCacheStoreId,\n outputCacheActionOnResult,\n } = (actor.input ?? {}) as OutputActorInput & RequestActorInput;\n\n const metamorph = createScopedMetamorph(actor);\n\n const runCrawler: RunCrawler<Ctx> = async (requests, options) => {\n // Clear cache if it was set from the input\n if (outputCacheStoreId && outputCacheActionOnResult === 'overwrite') {\n const store = await actor.io.openKeyValueStore(outputCacheStoreId);\n await store.drop();\n }\n\n await genHookFn(actor, outputTransformBefore)?.();\n await genHookFn(actor, outputFilterBefore)?.();\n await genHookFn(actor, requestTransformBefore)?.();\n await genHookFn(actor, requestFilterBefore)?.();\n\n const runRes = await actor.crawler.run(requests, options);\n\n await genHookFn(actor, outputTransformAfter)?.();\n await genHookFn(actor, outputFilterAfter)?.();\n await genHookFn(actor, requestTransformAfter)?.();\n await genHookFn(actor, requestFilterAfter)?.();\n\n // Trigger metamorph if it was set from the input\n await metamorph();\n\n return runRes;\n };\n\n return runCrawler;\n};\n\n/** Create a function that triggers metamorph, using Actor's inputs as defaults. */\nconst createScopedMetamorph = (actor: Pick<ActorContext, 'input' | 'io'>) => {\n // Trigger metamorph if it was set from the input\n const metamorph: Metamorph = async (overrides?: MetamorphActorInput) => {\n const {\n metamorphActorId,\n metamorphActorBuild,\n metamorphActorInput,\n } = defaults({}, overrides, actor.input ?? {}); // prettier-ignore\n\n if (!metamorphActorId) return;\n\n await actor.io.triggerDownstreamCrawler(metamorphActorId, metamorphActorInput, {\n build: metamorphActorBuild,\n });\n };\n\n return metamorph;\n};\n\n/** pushData wrapper that pre-populates options based on actor input */\nconst createScopedPushData = (actor: Pick<ActorContext, 'input' | 'state' | 'io' | 'log'>) => {\n const {\n includePersonalData,\n requestQueueId,\n outputMaxEntries,\n outputTransform,\n outputFilter,\n outputDatasetId,\n outputPickFields,\n outputRenameFields,\n outputCacheStoreId,\n outputCachePrimaryKeys,\n outputCacheActionOnResult,\n } = (actor.input ?? {}) as OutputActorInput & PrivacyActorInput & RequestActorInput;\n\n const scopedPushData: ActorContext['pushData'] = async (entries, ctx, options) => {\n const transformFn = genHookFn(actor, outputTransform);\n const filterFn = genHookFn(actor, outputFilter);\n\n const mergedOptions = {\n io: actor.io,\n log: actor.log,\n showPrivate: includePersonalData,\n maxCount: outputMaxEntries,\n pickKeys: outputPickFields,\n remapKeys: outputRenameFields,\n transform: transformFn ? (item) => transformFn(item) : undefined,\n filter: filterFn ? (item) => filterFn(item) : undefined,\n datasetId: outputDatasetId,\n requestQueueId,\n cacheStoreId: outputCacheStoreId,\n cachePrimaryKeys: outputCachePrimaryKeys,\n cacheActionOnResult: outputCacheActionOnResult,\n ...options,\n } satisfies PushDataOptions<object>;\n\n return pushData(entries, ctx, mergedOptions);\n };\n\n return scopedPushData;\n};\n\n/** pushRequests wrapper that pre-populates options based on actor input */\nconst createScopedPushRequests = (actor: Pick<ActorContext, 'input' | 'state' | 'io' | 'log'>) => {\n const { requestQueueId, requestMaxEntries, requestTransform, requestFilter } = (actor.input ??\n {}) as RequestActorInput;\n\n const scopedPushRequest: ActorContext['pushRequests'] = async (entries, options) => {\n const transformFn = genHookFn(actor, requestTransform);\n const filterFn = genHookFn(actor, requestFilter);\n\n const mergedOptions = {\n io: actor.io,\n log: actor.log,\n maxCount: requestMaxEntries,\n transform: transformFn ? (item) => transformFn(item) : undefined,\n filter: filterFn ? (item) => filterFn(item) : undefined,\n requestQueueId,\n ...options,\n } satisfies PushRequestsOptions<any>;\n\n return pushRequests(entries, mergedOptions);\n };\n\n return scopedPushRequest;\n};\n\n/** Given the actor input, create common crawler options. */\nexport const createHttpCrawlerOptions = <\n TOpts extends BasicCrawlerOptions<any> = BasicCrawlerOptions,\n Input extends Record<string, any> = Record<string, any>\n>({\n input,\n defaults,\n overrides,\n}: {\n /** Actor input */\n input: Input | null;\n /**\n * Default config options set by us. These may be overriden\n * by values from actor input (set by user).\n */\n defaults?: TOpts;\n /**\n * These config options will overwrite both the default and user\n * options. This is useful for hard-setting values e.g. in tests.\n */\n overrides?: TOpts;\n}) => {\n const pickCrawlerInputFields = <T extends CrawlerConfigActorInput>(config: T) =>\n pick(config, Object.keys(crawlerInput));\n\n return {\n // ----- 1. DEFAULTS -----\n ...omitBy(defaults ?? ({} as TOpts), (field) => field === undefined),\n // ----- 2. CONFIG FROM INPUT -----\n ...omitBy(pickCrawlerInputFields(input ?? {}), (field) => field === undefined),\n // ----- 3. OVERRIDES - E.G. TEST CONFIG -----\n ...omitBy(overrides ?? ({} as TOpts), (field) => field === undefined),\n } satisfies Partial<TOpts>;\n};\n\nconst getStartUrlsFromInput = async (\n actor: Pick<ActorContext, 'input' | 'state' | 'io' | 'log'>\n) => {\n const { startUrls, startUrlsFromDataset, startUrlsFromFunction } = (actor.input ??\n {}) as StartUrlsActorInput;\n\n const urlsAgg = [...(startUrls ?? [])];\n\n if (startUrlsFromDataset) {\n actor.log.debug(`Loading start URLs from Dataset ${startUrlsFromDataset}`);\n const [datasetId, field] = startUrlsFromDataset.split('#');\n const urlsFromDataset = await getColumnFromDataset<any>(datasetId, field, { io: actor.io });\n urlsAgg.push(...urlsFromDataset);\n }\n\n if (startUrlsFromFunction) {\n actor.log.debug(`Loading start URLs from function`);\n const urlsFromFn = await genHookFn(actor, startUrlsFromFunction)?.();\n urlsAgg.push(...urlsFromFn);\n }\n\n return urlsAgg;\n};\n"]}
|
|
@@ -4,7 +4,7 @@ import type { MaybePromise, PickPartial } from '../../utils/types';
|
|
|
4
4
|
import type { CrawlerUrl } from '../../types';
|
|
5
5
|
import type { itemCacheKey, pushData } from '../io/pushData';
|
|
6
6
|
import type { pushRequests } from '../io/pushRequests';
|
|
7
|
-
import type { RouteHandler, RouteMatcher, CrawlerRouterWrapper } from '../router';
|
|
7
|
+
import type { RouteHandler, RouteMatcher, CrawlerRouterWrapper } from '../router/types';
|
|
8
8
|
import type { MetamorphActorInput } from '../config';
|
|
9
9
|
import type { CrawleeOneIO } from '../integrations/types';
|
|
10
10
|
type MaybeAsyncFn<R, Args extends any[]> = R | ((...args: Args) => MaybePromise<R>);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../../../src/lib/actor/types.ts"],"names":[],"mappings":"","sourcesContent":["import type {\n BasicCrawler,\n CrawlingContext,\n Log,\n ProxyConfiguration,\n RouterHandler,\n} from 'crawlee';\nimport type { gotScraping } from 'got-scraping';\n\nimport type { MaybePromise, PickPartial } from '../../utils/types';\nimport type { CrawlerUrl } from '../../types';\nimport type { itemCacheKey, pushData } from '../io/pushData';\nimport type { pushRequests } from '../io/pushRequests';\nimport type { RouteHandler, RouteMatcher, CrawlerRouterWrapper } from '../router';\nimport type { MetamorphActorInput } from '../config';\nimport type { CrawleeOneIO } from '../integrations/types';\n\ntype MaybeAsyncFn<R, Args extends any[]> = R | ((...args: Args) => MaybePromise<R>);\n\ntype OrigRunCrawler<T extends CrawlingContext<any, any>> = BasicCrawler<T>['run'];\n\n/** Extended type of `crawler.run()` function */\nexport type RunCrawler<Ctx extends CrawlingContext = CrawlingContext<BasicCrawler>> = (\n requests?: CrawlerUrl[],\n options?: Parameters<OrigRunCrawler<Ctx>>[1]\n) => ReturnType<OrigRunCrawler<Ctx>>;\n\n/** Trigger actor metamorph, using actor's inputs as defaults. */\nexport type Metamorph = (overrides?: MetamorphActorInput) => Promise<void>;\n\n/** Context passed to route handlers */\nexport type ActorRouterContext<\n Ctx extends CrawlingContext = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO\n> = {\n actor: ActorContext<Ctx, Labels, Input, TIO>;\n};\n\n/** Context passed to user-defined functions passed from input */\nexport type ActorHookContext<TIO extends CrawleeOneIO> = Pick<ActorContext, 'input' | 'state'> & {\n io: TIO;\n itemCacheKey: typeof itemCacheKey;\n sendRequest: typeof gotScraping;\n};\n\nexport interface ActorDefinition<\n Ctx extends CrawlingContext = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO\n> {\n /** Client for communicating with cloud/local storage. */\n io: TIO;\n\n // Actor input\n /**\n * Actor input which you can get e.g. via `Actor.getInput()`\n *\n * Input is automatically retrieved if undefined.\n */\n input?: MaybeAsyncFn<Input, [ActorDefinition<Ctx, Labels, Input, TIO>]>;\n /** Validation for the actor input. Should throw error if validation fails. */\n validateInput?: (input: Input | null) => MaybePromise<void>;\n\n // Router setup\n /**\n * Router instance that redirects the request to handlers.\n * @example\n * import { createCheerioRouter } from 'crawlee';\n *\n * ({\n * ...\n * router: createCheerioRouter(),\n * })\n */\n router: MaybeAsyncFn<RouterHandler<Ctx>, [ActorDefinitionWithInput<Ctx, Labels, Input, TIO>]>;\n /**\n * Criteria that un-labelled requests are matched against.\n *\n * E.g. If `match` function returns truthy value,\n * the request is passed to the `action` function for processing.\n *\n * @example\n * ({\n * ...\n * routes: [{\n * // If match returns true, the request is forwarded to handler\n * // with label JOB_DETAIL.\n * name: 'Job detail',\n * handlerLabel: routeLabels.JOB_DETAIL,\n * match: (url) => isUrlOfJobOffer(url),\n * }, {\n * // Define custom action function:\n * // If match returns true, we replace this request with new one\n * // pointing to new domain.\n * name: 'Main page',\n * handlerLabel: null,\n * match: (url) => url.match(/example\\.com\\/?(?:[?#~]|$)/i),\n * action: async (url, ctx, _, handlers) => {\n * ctx.log.info(`Redirecting to https://www.new-domain.com`);\n * await ctx.crawler.addRequests(['https://www.new-domain.com'], { forefront: true });\n * },\n * }],\n * })\n */\n routes: MaybeAsyncFn<\n RouteMatcher<Ctx, ActorRouterContext<Ctx, Labels, Input, TIO>, Labels>[],\n [ActorDefinitionWithInput<Ctx, Labels, Input, TIO>]\n >;\n /** Handlers for the labelled requests. The object keys are the labels. */\n routeHandlers: MaybeAsyncFn<Record<Labels, RouteHandler<Ctx, ActorRouterContext<Ctx, Labels, Input, TIO>>>, [ActorDefinitionWithInput<Ctx, Labels, Input, TIO>]>; // prettier-ignore\n /**\n * Provides the option to modify or extend all router handlers by wrapping\n * them in these functions.\n *\n * Wrappers are applied from right to left. That means that wrappers `[A, B, C]`\n * will be applied like so `A( B( C( handler ) ) )`.\n *\n * Default `routerWrappers`:\n * ```js\n * {\n * ...\n * routerWrappers: ({ input }) => [\n * logLevelHandlerWrapper<Ctx, any>(input?.logLevel ?? 'info'),\n * ],\n * }\n * ```\n */\n routerWrappers?: MaybeAsyncFn<CrawlerRouterWrapper<Ctx, ActorRouterContext<Ctx, Labels, Input, TIO>>[], [ActorDefinitionWithInput<Ctx, Labels, Input, TIO>]>; // prettier-ignore\n\n // Proxy setup\n proxy?: MaybeAsyncFn<ProxyConfiguration, [ActorDefinitionWithInput<Ctx, Labels, Input, TIO>]>; // prettier-ignore\n\n // Crawler setup\n createCrawler: (\n actorCtx: Omit<\n ActorContext<Ctx, Labels, Input, TIO>,\n 'crawler' | 'runCrawler' | 'metamorph' | 'pushData' | 'pushRequests' | 'startUrls'\n >\n ) => MaybePromise<Ctx['crawler']>;\n}\n\n/** ActorDefinition object where the input is already resolved */\nexport type ActorDefinitionWithInput<\n Ctx extends CrawlingContext = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO\n> = Omit<ActorDefinition<Ctx, Labels, Input, TIO>, 'input'> & {\n input: Input | null;\n state: Record<string, unknown>;\n};\n\n/** Context available while creating a Crawlee crawler/actor */\nexport interface ActorContext<\n Ctx extends CrawlingContext = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO\n> {\n crawler: Ctx['crawler'];\n /**\n * This function wraps `crawler.run(requests, runOtions)` with additional\n * features:\n * - Automatically metamorph into another actor after the run finishes\n */\n runCrawler: RunCrawler<Ctx>;\n /** Trigger actor metamorph, using actor's inputs as defaults. */\n metamorph: Metamorph;\n /**\n * `Actor.pushData` with extra optional features:\n *\n * - Limit the number of entries pushed to the Dataset based on the Actor input\n * - Transform and filter entries via Actor input.\n * - Add metadata to entries before they are pushed to Dataset.\n * - Set which (nested) properties are personal data optionally redact them for privacy compliance.\n */\n pushData: typeof pushData;\n /**\n * Similar to `Actor.openRequestQueue().addRequests`, but with extra features:\n *\n * - Limit the max size of the RequestQueue. No requests are added when RequestQueue is at or above the limit.\n * - Transform and filter requests. Requests that did not pass the filter are not added to the RequestQueue.\n */\n pushRequests: typeof pushRequests;\n /**\n * A list of resolved Requests to be scraped.\n *\n * This list is a combination of 3 Actor inputs:\n * - `startUrls` - Static list of URLs to scrape.\n * - `startUrlsFromDataset` - From a specific field from a Dataset (e.g. \"dataset123#fieldName\" - Dataset: \"dataset123\", field: \"fieldName\").\n * - `startUrlsFromFunction` - A function that is evaulated to generate the Requests.\n */\n startUrls: CrawlerUrl[];\n proxy?: ProxyConfiguration;\n router: RouterHandler<Ctx>;\n routes: RouteMatcher<Ctx, ActorRouterContext<Ctx, Labels, Input, TIO>, Labels>[];\n routeHandlers: Record<Labels, RouteHandler<Ctx, ActorRouterContext<Ctx, Labels, Input, TIO>>>;\n /** Original config from which this actor context was created */\n config: PickPartial<ActorDefinition<Ctx, Labels, Input, TIO>, 'io'>;\n /** Read-only inputs passed to the actor */\n input: Input | null;\n /** Mutable state that is shared across setup and teardown hooks */\n state: Record<string, unknown>;\n /**\n * Instance managing communication with databases - storage & retrieval\n * (Dataset, RequestQueue, KeyValueStore).\n *\n * This is modelled and similar to Apify's `Actor` static class.\n */\n io: TIO;\n log: Log;\n}\n"]}
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../../../src/lib/actor/types.ts"],"names":[],"mappings":"","sourcesContent":["import type {\n BasicCrawler,\n CrawlingContext,\n Log,\n ProxyConfiguration,\n RouterHandler,\n} from 'crawlee';\nimport type { gotScraping } from 'got-scraping';\n\nimport type { MaybePromise, PickPartial } from '../../utils/types';\nimport type { CrawlerUrl } from '../../types';\nimport type { itemCacheKey, pushData } from '../io/pushData';\nimport type { pushRequests } from '../io/pushRequests';\nimport type { RouteHandler, RouteMatcher, CrawlerRouterWrapper } from '../router/types';\nimport type { MetamorphActorInput } from '../config';\nimport type { CrawleeOneIO } from '../integrations/types';\n\ntype MaybeAsyncFn<R, Args extends any[]> = R | ((...args: Args) => MaybePromise<R>);\n\ntype OrigRunCrawler<T extends CrawlingContext<any, any>> = BasicCrawler<T>['run'];\n\n/** Extended type of `crawler.run()` function */\nexport type RunCrawler<Ctx extends CrawlingContext = CrawlingContext<BasicCrawler>> = (\n requests?: CrawlerUrl[],\n options?: Parameters<OrigRunCrawler<Ctx>>[1]\n) => ReturnType<OrigRunCrawler<Ctx>>;\n\n/** Trigger actor metamorph, using actor's inputs as defaults. */\nexport type Metamorph = (overrides?: MetamorphActorInput) => Promise<void>;\n\n/** Context passed to route handlers */\nexport type ActorRouterContext<\n Ctx extends CrawlingContext = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO\n> = {\n actor: ActorContext<Ctx, Labels, Input, TIO>;\n};\n\n/** Context passed to user-defined functions passed from input */\nexport type ActorHookContext<TIO extends CrawleeOneIO> = Pick<ActorContext, 'input' | 'state'> & {\n io: TIO;\n itemCacheKey: typeof itemCacheKey;\n sendRequest: typeof gotScraping;\n};\n\nexport interface ActorDefinition<\n Ctx extends CrawlingContext = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO\n> {\n /** Client for communicating with cloud/local storage. */\n io: TIO;\n\n // Actor input\n /**\n * Actor input which you can get e.g. via `Actor.getInput()`\n *\n * Input is automatically retrieved if undefined.\n */\n input?: MaybeAsyncFn<Input, [ActorDefinition<Ctx, Labels, Input, TIO>]>;\n /** Validation for the actor input. Should throw error if validation fails. */\n validateInput?: (input: Input | null) => MaybePromise<void>;\n\n // Router setup\n /**\n * Router instance that redirects the request to handlers.\n * @example\n * import { createCheerioRouter } from 'crawlee';\n *\n * ({\n * ...\n * router: createCheerioRouter(),\n * })\n */\n router: MaybeAsyncFn<RouterHandler<Ctx>, [ActorDefinitionWithInput<Ctx, Labels, Input, TIO>]>;\n /**\n * Criteria that un-labelled requests are matched against.\n *\n * E.g. If `match` function returns truthy value,\n * the request is passed to the `action` function for processing.\n *\n * @example\n * ({\n * ...\n * routes: [{\n * // If match returns true, the request is forwarded to handler\n * // with label JOB_DETAIL.\n * name: 'Job detail',\n * handlerLabel: routeLabels.JOB_DETAIL,\n * match: (url) => isUrlOfJobOffer(url),\n * }, {\n * // Define custom action function:\n * // If match returns true, we replace this request with new one\n * // pointing to new domain.\n * name: 'Main page',\n * handlerLabel: null,\n * match: (url) => url.match(/example\\.com\\/?(?:[?#~]|$)/i),\n * action: async (url, ctx, _, handlers) => {\n * ctx.log.info(`Redirecting to https://www.new-domain.com`);\n * await ctx.crawler.addRequests(['https://www.new-domain.com'], { forefront: true });\n * },\n * }],\n * })\n */\n routes: MaybeAsyncFn<\n RouteMatcher<Ctx, ActorRouterContext<Ctx, Labels, Input, TIO>, Labels>[],\n [ActorDefinitionWithInput<Ctx, Labels, Input, TIO>]\n >;\n /** Handlers for the labelled requests. The object keys are the labels. */\n routeHandlers: MaybeAsyncFn<Record<Labels, RouteHandler<Ctx, ActorRouterContext<Ctx, Labels, Input, TIO>>>, [ActorDefinitionWithInput<Ctx, Labels, Input, TIO>]>; // prettier-ignore\n /**\n * Provides the option to modify or extend all router handlers by wrapping\n * them in these functions.\n *\n * Wrappers are applied from right to left. That means that wrappers `[A, B, C]`\n * will be applied like so `A( B( C( handler ) ) )`.\n *\n * Default `routerWrappers`:\n * ```js\n * {\n * ...\n * routerWrappers: ({ input }) => [\n * logLevelHandlerWrapper<Ctx, any>(input?.logLevel ?? 'info'),\n * ],\n * }\n * ```\n */\n routerWrappers?: MaybeAsyncFn<CrawlerRouterWrapper<Ctx, ActorRouterContext<Ctx, Labels, Input, TIO>>[], [ActorDefinitionWithInput<Ctx, Labels, Input, TIO>]>; // prettier-ignore\n\n // Proxy setup\n proxy?: MaybeAsyncFn<ProxyConfiguration, [ActorDefinitionWithInput<Ctx, Labels, Input, TIO>]>; // prettier-ignore\n\n // Crawler setup\n createCrawler: (\n actorCtx: Omit<\n ActorContext<Ctx, Labels, Input, TIO>,\n 'crawler' | 'runCrawler' | 'metamorph' | 'pushData' | 'pushRequests' | 'startUrls'\n >\n ) => MaybePromise<Ctx['crawler']>;\n}\n\n/** ActorDefinition object where the input is already resolved */\nexport type ActorDefinitionWithInput<\n Ctx extends CrawlingContext = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO\n> = Omit<ActorDefinition<Ctx, Labels, Input, TIO>, 'input'> & {\n input: Input | null;\n state: Record<string, unknown>;\n};\n\n/** Context available while creating a Crawlee crawler/actor */\nexport interface ActorContext<\n Ctx extends CrawlingContext = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO\n> {\n crawler: Ctx['crawler'];\n /**\n * This function wraps `crawler.run(requests, runOtions)` with additional\n * features:\n * - Automatically metamorph into another actor after the run finishes\n */\n runCrawler: RunCrawler<Ctx>;\n /** Trigger actor metamorph, using actor's inputs as defaults. */\n metamorph: Metamorph;\n /**\n * `Actor.pushData` with extra optional features:\n *\n * - Limit the number of entries pushed to the Dataset based on the Actor input\n * - Transform and filter entries via Actor input.\n * - Add metadata to entries before they are pushed to Dataset.\n * - Set which (nested) properties are personal data optionally redact them for privacy compliance.\n */\n pushData: typeof pushData;\n /**\n * Similar to `Actor.openRequestQueue().addRequests`, but with extra features:\n *\n * - Limit the max size of the RequestQueue. No requests are added when RequestQueue is at or above the limit.\n * - Transform and filter requests. Requests that did not pass the filter are not added to the RequestQueue.\n */\n pushRequests: typeof pushRequests;\n /**\n * A list of resolved Requests to be scraped.\n *\n * This list is a combination of 3 Actor inputs:\n * - `startUrls` - Static list of URLs to scrape.\n * - `startUrlsFromDataset` - From a specific field from a Dataset (e.g. \"dataset123#fieldName\" - Dataset: \"dataset123\", field: \"fieldName\").\n * - `startUrlsFromFunction` - A function that is evaulated to generate the Requests.\n */\n startUrls: CrawlerUrl[];\n proxy?: ProxyConfiguration;\n router: RouterHandler<Ctx>;\n routes: RouteMatcher<Ctx, ActorRouterContext<Ctx, Labels, Input, TIO>, Labels>[];\n routeHandlers: Record<Labels, RouteHandler<Ctx, ActorRouterContext<Ctx, Labels, Input, TIO>>>;\n /** Original config from which this actor context was created */\n config: PickPartial<ActorDefinition<Ctx, Labels, Input, TIO>, 'io'>;\n /** Read-only inputs passed to the actor */\n input: Input | null;\n /** Mutable state that is shared across setup and teardown hooks */\n state: Record<string, unknown>;\n /**\n * Instance managing communication with databases - storage & retrieval\n * (Dataset, RequestQueue, KeyValueStore).\n *\n * This is modelled and similar to Apify's `Actor` static class.\n */\n io: TIO;\n log: Log;\n}\n"]}
|
|
@@ -4,17 +4,17 @@ import type { DatasetPerfStat, ScraperActorSpec, ScraperDataset } from 'actor-sp
|
|
|
4
4
|
*
|
|
5
5
|
* See {@link ScraperActorSpec}
|
|
6
6
|
*/
|
|
7
|
-
export interface
|
|
8
|
-
datasets:
|
|
7
|
+
export interface ApifyScraperActorSpec extends ScraperActorSpec {
|
|
8
|
+
datasets: ApifyScraperDataset[];
|
|
9
9
|
}
|
|
10
10
|
/** Dataset with additional perf stats info for formatting in tables */
|
|
11
|
-
export interface
|
|
12
|
-
perfStats:
|
|
11
|
+
export interface ApifyScraperDataset extends ScraperDataset {
|
|
12
|
+
perfStats: ApifyDatasetPerfStat[];
|
|
13
13
|
/** Specify which perfTable should render this data */
|
|
14
14
|
perfTable: string;
|
|
15
15
|
}
|
|
16
16
|
/** Dataset perf stats with additional info for formatting in tables */
|
|
17
|
-
export interface
|
|
17
|
+
export interface ApifyDatasetPerfStat extends DatasetPerfStat {
|
|
18
18
|
rowId: string;
|
|
19
19
|
colId: string;
|
|
20
20
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"actorSpec.js","sourceRoot":"","sources":["../../../src/lib/actorSpec.ts"],"names":[],"mappings":"","sourcesContent":["import type { DatasetPerfStat, ScraperActorSpec, ScraperDataset } from 'actor-spec';\n\n/**\n * Scraper actor spec with additional dataset perf stats info for formatting in tables\n *\n * See {@link ScraperActorSpec}\n */\nexport interface
|
|
1
|
+
{"version":3,"file":"actorSpec.js","sourceRoot":"","sources":["../../../src/lib/actorSpec.ts"],"names":[],"mappings":"","sourcesContent":["import type { DatasetPerfStat, ScraperActorSpec, ScraperDataset } from 'actor-spec';\n\n/**\n * Scraper actor spec with additional dataset perf stats info for formatting in tables\n *\n * See {@link ScraperActorSpec}\n */\nexport interface ApifyScraperActorSpec extends ScraperActorSpec {\n datasets: ApifyScraperDataset[];\n}\n\n/** Dataset with additional perf stats info for formatting in tables */\nexport interface ApifyScraperDataset extends ScraperDataset {\n perfStats: ApifyDatasetPerfStat[];\n /** Specify which perfTable should render this data */\n perfTable: string;\n}\n\n/** Dataset perf stats with additional info for formatting in tables */\nexport interface ApifyDatasetPerfStat extends DatasetPerfStat {\n rowId: string;\n colId: string;\n}\n"]}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { BasicCrawlingContext, CheerioCrawlingContext, CrawlingContext, ErrorHandler, HttpCrawlingContext, JSDOMCrawlingContext, PlaywrightCrawlingContext, PuppeteerCrawlingContext } from 'crawlee';
|
|
2
2
|
import type { MaybePromise, PickRequired } from '../../utils/types';
|
|
3
|
-
import type { RouteHandler, RouterHandlerCtx } from '../router';
|
|
3
|
+
import type { RouteHandler, RouterHandlerCtx } from '../router/types';
|
|
4
4
|
import type { CrawleeOneErrorHandlerInput, CrawleeOneErrorHandlerOptions } from '../integrations/types';
|
|
5
5
|
export type CaptureErrorInput = PickRequired<Partial<CrawleeOneErrorHandlerInput>, 'error'>;
|
|
6
6
|
export type CaptureError = (input: CaptureErrorInput) => MaybePromise<void>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"errorHandler.js","sourceRoot":"","sources":["../../../../src/lib/error/errorHandler.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAUA,qDAAuC;AAUvC,iDAAgD;AAKhD;;;;;;GAMG;AACI,MAAM,YAAY,GAAG,CAC1B,KAAwB,EACxB,OAAqD,EACrD,EAAE;;IACF,MAAM,EAAE,KAAK,EAAE,GAAG,EAAE,SAAS,EAAE,GAAG,KAAK,CAAC;IACxC,MAAM,EACJ,EAAE,GAAG,eAA6C,EAClD,kBAAkB,EAClB,cAAc,GACf,GAAG,OAAO,CAAC;IAEZ,MAAM,GAAG,GAAG,MAAA,SAAS,aAAT,SAAS,uBAAT,SAAS,CAAE,KAAK,CAAC,EAAE,MAAM,EAAE,kBAAkB,EAAE,CAAC,mCAAI,IAAI,CAAC;IAErE,GAAG,aAAH,GAAG,uBAAH,GAAG,CAAE,KAAK,CAAC,SAAS,KAAK,CAAC,IAAI,KAAK,KAAK,CAAC,OAAO,EAAE,EAAE,KAAK,CAAC,CAAC;IAC3D,OAAO,CAAC,KAAK,CAAC,SAAS,KAAK,CAAC,IAAI,KAAK,KAAK,CAAC,OAAO,EAAE,EAAE,KAAK,CAAC,CAAC;IAE9D,iCAAiC;IACjC,2DAA2D;IAC3D,MAAM,gBAAgB,GAAG,kBAAkB,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,WAAW,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAC9F,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,mBAAmB,CACzC,EAAE,KAAK,EAAE,IAAI,EAAE,MAAA,KAAK,CAAC,IAAI,mCAAI,IAAI,EAAE,GAAG,EAAE,MAAA,KAAK,CAAC,GAAG,mCAAI,IAAI,EAAE,GAAG,EAAE,kCAC3D,OAAO,KAAE,EAAE,IACjB,CAAC;IAEF,GAAG,aAAH,GAAG,uBAAH,GAAG,CAAE,KAAK,CAAC,gCAAgC,EAAE,MAAM,CAAC,CAAC;IAErD,yBAAyB;IACzB,IAAI,kBAAkB,EAAE;QACtB,GAAG,aAAH,GAAG,uBAAH,GAAG,CAAE,IAAI,CAAC,4CAA4C,kBAAkB,EAAE,CAAC,CAAC;QAC5E,MAAM,CAAA,gBAAgB,aAAhB,gBAAgB,uBAAhB,gBAAgB,CAAE,QAAQ,CAAC,MAAM,CAAC,CAAA,CAAC;QACzC,GAAG,aAAH,GAAG,uBAAH,GAAG,CAAE,IAAI,CAAC,iDAAiD,kBAAkB,EAAE,CAAC,CAAC;KAClF;IAED,GAAG,aAAH,GAAG,uBAAH,GAAG,CAAE,KAAK,CAAC,wCAAwC,CAAC,CAAC;IACrD,MAAM,CAAA,cAAc,aAAd,cAAc,uBAAd,cAAc,CAAG,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAA,CAAC;IAC1C,GAAG,aAAH,GAAG,uBAAH,GAAG,CAAE,KAAK,CAAC,6CAA6C,CAAC,CAAC;IAE1D,gEAAgE;IAChE,KAAK,CAAC,wBAAwB,GAAG,IAAI,CAAC;IACtC,sBAAsB;IACtB,MAAM,KAAK,CAAC;AACd,CAAC,CAAA,CAAC;AAzCW,QAAA,YAAY,gBAyCvB;AAEF;;;;GAIG;AACI,MAAM,mBAAmB,GAAG,CAIjC,EAAiE,EACjE,OAAqD,EACrD,EAAE;IACF,MAAM,oBAAoB,GAAiB,CAAC,KAAK,EAAE,EAAE,CAAC,IAAA,oBAAY,EAAC,KAAK,EAAE,OAAO,CAAC,CAAC;IAEnF,IAAI;QACF,4FAA4F;QAC5F,MAAM,EAAE,CAAC,EAAE,YAAY,EAAE,oBAAoB,EAAE,CAAC,CAAC;KAClD;IAAC,OAAO,KAAU,EAAE;QACnB,IAAI,CAAC,KAAK,CAAC,wBAAwB,EAAE;YACnC,+DAA+D;YAC/D,MAAM,oBAAoB,CAAC,EAAE,KAAK,EAAE,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;SACzE;KACF;AACH,CAAC,CAAA,CAAC;AAlBW,QAAA,mBAAmB,uBAkB9B;AAEF;;;;;;;;;;;;;;;GAeG;AACI,MAAM,wBAAwB,GAAG,CAKtC,OAA4F,EAC5F,OAAqD,EACrD,EAAE;IACF,sFAAsF;IACtF,MAAM,cAAc,GAAG,CAAC,GAAqC,EAAE,EAAE;QAC/D,OAAO,IAAA,2BAAmB,EAAC,CAAC,EAAE,YAAY,EAAE,EAAE,EAAE;YAC9C,OAAO,OAAO,iCACR,GAAW;gBACf,0EAA0E;gBAC1E,YAAY,EAAE,CAAC,KAAK,EAAE,EAAE;;oBACtB,OAAA,YAAY,CAAC;wBACX,KAAK,EAAE,KAAK,CAAC,KAAK;wBAClB,IAAI,EAAE,MAAA,KAAK,CAAC,IAAI,mCAAI,GAAG,CAAC,IAAI;wBAC5B,GAAG,EAAE,KAAK,CAAC,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG;wBACjC,GAAG,EAAE,MAAA,KAAK,CAAC,GAAG,mCAAI,GAAG,CAAC,GAAG;qBAC1B,CAAC,CAAA;iBAAA,IACJ,CAAC;QACL,CAAC,EAAE,OAAO,CAAC,CAAC;IACd,CAAC,CAAC;IACF,OAAO,cAAc,CAAC;AACxB,CAAC,CAAC;AAzBW,QAAA,wBAAwB,4BAyBnC;AAEK,MAAM,6BAA6B,GAAG,CAAmC,GAAG,IAAsD,EAAE,EAAE,CAAC,IAAA,gCAAwB,EAAM,GAAG,IAAI,CAAC,CAAC,CAAC,kBAAkB;AAA3L,QAAA,6BAA6B,iCAA2I;AAC9K,MAAM,4BAA4B,GAAG,CAAkC,GAAG,IAAsD,EAAE,EAAE,CAAC,IAAA,gCAAwB,EAAM,GAAG,IAAI,CAAC,CAAC,CAAC,kBAAkB;AAAzL,QAAA,4BAA4B,gCAA0I;AAC5K,MAAM,6BAA6B,GAAG,CAAmC,GAAG,IAAsD,EAAE,EAAE,CAAC,IAAA,gCAAwB,EAAM,GAAG,IAAI,CAAC,CAAC,CAAC,kBAAkB;AAA3L,QAAA,6BAA6B,iCAA2I;AAC9K,MAAM,kCAAkC,GAAG,CAAwC,GAAG,IAAsD,EAAE,EAAE,CAAC,IAAA,gCAAwB,EAAM,GAAG,IAAI,CAAC,CAAC,CAAC,kBAAkB;AAArM,QAAA,kCAAkC,sCAAgJ;AACxL,MAAM,+BAA+B,GAAG,CAAqC,GAAG,IAAsD,EAAE,EAAE,CAAC,IAAA,gCAAwB,EAAM,GAAG,IAAI,CAAC,CAAC,CAAC,kBAAkB;AAA/L,QAAA,+BAA+B,mCAA6I;AAClL,MAAM,iCAAiC,GAAG,CAAuC,GAAG,IAAsD,EAAE,EAAE,CAAC,IAAA,gCAAwB,EAAM,GAAG,IAAI,CAAC,CAAC,CAAC,kBAAkB;AAAnM,QAAA,iCAAiC,qCAA+I;AAE7L;;;;;;;GAOG;AACI,MAAM,kBAAkB,GAAG,CAChC,OAAmE,EAChD,EAAE;IACrB,OAAO,CAAO,EAAE,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,EAAE,KAAK,EAAE,EAAE;;QAC7C,MAAM,GAAG,GAAG,OAAO,CAAC,SAAS,IAAI,OAAO,CAAC,GAAG,CAAC;QAC7C,IAAA,oBAAY,EACV,EAAE,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,IAAY,EAAE,EACvC;YACE,EAAE,EAAE,OAAO,CAAC,EAAE;YACd,kBAAkB,EAAE,OAAO,CAAC,kBAAkB;YAC9C,eAAe,EAAE,MAAA,OAAO,CAAC,eAAe,mCAAI,IAAI;YAChD,cAAc,EAAE,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,EAAE,EAAE;gBACpC,IAAI,CAAC,OAAO,CAAC,YAAY;oBAAE,OAAO;gBAElC,MAAM,CAAC,gBAAgB,CAAC,KAAK,EAAE,EAAE,KAAK,EAAE,MAAa,EAAE,CAAC,CAAC;YAC3D,CAAC;SACF,CACF,CAAC;IACJ,CAAC,CAAA,CAAC;AACJ,CAAC,CAAC;AAnBW,QAAA,kBAAkB,sBAmB7B","sourcesContent":["import type {\n BasicCrawlingContext,\n CheerioCrawlingContext,\n CrawlingContext,\n ErrorHandler,\n HttpCrawlingContext,\n JSDOMCrawlingContext,\n PlaywrightCrawlingContext,\n PuppeteerCrawlingContext,\n} from 'crawlee';\nimport * as Sentry from '@sentry/node';\nimport type { Page } from 'playwright';\n\nimport type { MaybePromise, PickRequired } from '../../utils/types';\nimport type { RouteHandler, RouterHandlerCtx } from '../router';\nimport type {\n CrawleeOneErrorHandlerInput,\n CrawleeOneErrorHandlerOptions,\n CrawleeOneIO,\n} from '../integrations/types';\nimport { apifyIO } from '../integrations/apify';\n\nexport type CaptureErrorInput = PickRequired<Partial<CrawleeOneErrorHandlerInput>, 'error'>;\nexport type CaptureError = (input: CaptureErrorInput) => MaybePromise<void>;\n\n/**\n * Error handling for CrawleeOne crawlers.\n *\n * By default, error reports are saved to Apify Dataset.\n *\n * See https://docs.apify.com/academy/node-js/analyzing-pages-and-fixing-errors#error-reporting\n */\nexport const captureError = async <TEnv extends object = object, TReport extends object = object>(\n input: CaptureErrorInput,\n options: CrawleeOneErrorHandlerOptions<TEnv, TReport>\n) => {\n const { error, log: parentLog } = input;\n const {\n io = apifyIO as any as CrawleeOneIO<TEnv, TReport>,\n reportingDatasetId,\n onErrorCapture,\n } = options;\n\n const log = parentLog?.child({ prefix: '[Error capture] ' }) ?? null;\n\n log?.error(`ERROR ${error.name}: ${error.message}`, error);\n console.error(`ERROR ${error.name}: ${error.message}`, error);\n\n // Let's create reporting dataset\n // If you already have one, this will continue adding to it\n const reportingDataset = reportingDatasetId ? await io.openDataset(reportingDatasetId) : null;\n const report = await io.generateErrorReport(\n { error, page: input.page ?? null, url: input.url ?? null, log },\n { ...options, io }\n );\n\n log?.error('[Error capture] Error captured', report);\n\n // And we push the report\n if (reportingDatasetId) {\n log?.info(`[Error capture] Pushing error to dataset ${reportingDatasetId}`);\n await reportingDataset?.pushData(report);\n log?.info(`[Error capture] DONE pushing error to dataset ${reportingDatasetId}`);\n }\n\n log?.error('[Error capture] Calling onErrorCapture');\n await onErrorCapture?.({ error, report });\n log?.error('[Error capture] Done calling onErrorCapture');\n\n // @ts-expect-error Tag the error, so we don't capture it twice.\n error._crawleeOneErrorCaptured = true;\n // Propagate the error\n throw error;\n};\n\n/**\n * Error handling for Crawlers as a function wrapper\n *\n * By default, error reports are saved to Apify Dataset.\n */\nexport const captureErrorWrapper = async <\n TEnv extends object = object,\n TReport extends object = object\n>(\n fn: (input: { captureError: CaptureError }) => MaybePromise<void>,\n options: CrawleeOneErrorHandlerOptions<TEnv, TReport>\n) => {\n const captureErrorWithArgs: CaptureError = (input) => captureError(input, options);\n\n try {\n // Pass the error capturing function to the wrapped function, so it can trigger it by itself\n await fn({ captureError: captureErrorWithArgs });\n } catch (error: any) {\n if (!error._crawleeOneErrorCaptured) {\n // And if the wrapped function fails, we capture error for them\n await captureErrorWithArgs({ error, url: null, page: null, log: null });\n }\n }\n};\n\n/**\n * Drop-in replacement for regular request handler callback for Crawlee route\n * that automatically tracks errors.\n *\n * By default, error reports are saved to Apify Dataset.\n *\n * @example\n *\n * router.addDefaultHandler(\n * captureErrorRouteHandler(async (ctx) => {\n * const { page, crawler } = ctx;\n * const url = page.url();\n * ...\n * })\n * );\n */\nexport const captureErrorRouteHandler = <\n Ctx extends CrawlingContext,\n TEnv extends object = object,\n TReport extends object = object\n>(\n handler: (ctx: RouterHandlerCtx<Ctx> & { captureError: CaptureError }) => MaybePromise<void>,\n options: CrawleeOneErrorHandlerOptions<TEnv, TReport>\n) => {\n // Wrap the original handler, so we can additionally pass it the captureError function\n const wrapperHandler = (ctx: Parameters<RouteHandler<Ctx>>[0]) => {\n return captureErrorWrapper(({ captureError }) => {\n return handler({\n ...(ctx as any),\n // And automatically feed contextual args (page, url, log) to captureError\n captureError: (input) =>\n captureError({\n error: input.error,\n page: input.page ?? ctx.page,\n url: input.url || ctx.request.url,\n log: input.log ?? ctx.log,\n }),\n });\n }, options);\n };\n return wrapperHandler;\n};\n\nexport const basicCaptureErrorRouteHandler = <Ctx extends BasicCrawlingContext>(...args: Parameters<typeof captureErrorRouteHandler<Ctx>>) => captureErrorRouteHandler<Ctx>(...args); // prettier-ignore\nexport const httpCaptureErrorRouteHandler = <Ctx extends HttpCrawlingContext>(...args: Parameters<typeof captureErrorRouteHandler<Ctx>>) => captureErrorRouteHandler<Ctx>(...args); // prettier-ignore\nexport const jsdomCaptureErrorRouteHandler = <Ctx extends JSDOMCrawlingContext>(...args: Parameters<typeof captureErrorRouteHandler<Ctx>>) => captureErrorRouteHandler<Ctx>(...args); // prettier-ignore\nexport const playwrightCaptureErrorRouteHandler = <Ctx extends PlaywrightCrawlingContext>(...args: Parameters<typeof captureErrorRouteHandler<Ctx>>) => captureErrorRouteHandler<Ctx>(...args); // prettier-ignore\nexport const cheerioCaptureErrorRouteHandler = <Ctx extends CheerioCrawlingContext>(...args: Parameters<typeof captureErrorRouteHandler<Ctx>>) => captureErrorRouteHandler<Ctx>(...args); // prettier-ignore\nexport const puppeteerCaptureErrorRouteHandler = <Ctx extends PuppeteerCrawlingContext>(...args: Parameters<typeof captureErrorRouteHandler<Ctx>>) => captureErrorRouteHandler<Ctx>(...args); // prettier-ignore\n\n/**\n * Create an `ErrorHandler` function that can be assigned to\n * `failedRequestHandler` option of `BasicCrawlerOptions`.\n *\n * The function saves error to a Dataset, and optionally forwards it to Sentry.\n *\n * By default, error reports are saved to Apify Dataset.\n */\nexport const createErrorHandler = <Ctx extends CrawlingContext>(\n options: CrawleeOneErrorHandlerOptions & { sendToSentry?: boolean }\n): ErrorHandler<Ctx> => {\n return async ({ request, log, page }, error) => {\n const url = request.loadedUrl || request.url;\n captureError(\n { error, url, log, page: page as Page },\n {\n io: options.io,\n reportingDatasetId: options.reportingDatasetId,\n allowScreenshot: options.allowScreenshot ?? true,\n onErrorCapture: ({ error, report }) => {\n if (!options.sendToSentry) return;\n\n Sentry.captureException(error, { extra: report as any });\n },\n }\n );\n };\n};\n"]}
|
|
1
|
+
{"version":3,"file":"errorHandler.js","sourceRoot":"","sources":["../../../../src/lib/error/errorHandler.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAUA,qDAAuC;AAUvC,iDAAgD;AAKhD;;;;;;GAMG;AACI,MAAM,YAAY,GAAG,CAC1B,KAAwB,EACxB,OAAqD,EACrD,EAAE;;IACF,MAAM,EAAE,KAAK,EAAE,GAAG,EAAE,SAAS,EAAE,GAAG,KAAK,CAAC;IACxC,MAAM,EACJ,EAAE,GAAG,eAA6C,EAClD,kBAAkB,EAClB,cAAc,GACf,GAAG,OAAO,CAAC;IAEZ,MAAM,GAAG,GAAG,MAAA,SAAS,aAAT,SAAS,uBAAT,SAAS,CAAE,KAAK,CAAC,EAAE,MAAM,EAAE,kBAAkB,EAAE,CAAC,mCAAI,IAAI,CAAC;IAErE,GAAG,aAAH,GAAG,uBAAH,GAAG,CAAE,KAAK,CAAC,SAAS,KAAK,CAAC,IAAI,KAAK,KAAK,CAAC,OAAO,EAAE,EAAE,KAAK,CAAC,CAAC;IAC3D,OAAO,CAAC,KAAK,CAAC,SAAS,KAAK,CAAC,IAAI,KAAK,KAAK,CAAC,OAAO,EAAE,EAAE,KAAK,CAAC,CAAC;IAE9D,iCAAiC;IACjC,2DAA2D;IAC3D,MAAM,gBAAgB,GAAG,kBAAkB,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,WAAW,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;IAC9F,MAAM,MAAM,GAAG,MAAM,EAAE,CAAC,mBAAmB,CACzC,EAAE,KAAK,EAAE,IAAI,EAAE,MAAA,KAAK,CAAC,IAAI,mCAAI,IAAI,EAAE,GAAG,EAAE,MAAA,KAAK,CAAC,GAAG,mCAAI,IAAI,EAAE,GAAG,EAAE,kCAC3D,OAAO,KAAE,EAAE,IACjB,CAAC;IAEF,GAAG,aAAH,GAAG,uBAAH,GAAG,CAAE,KAAK,CAAC,gCAAgC,EAAE,MAAM,CAAC,CAAC;IAErD,yBAAyB;IACzB,IAAI,kBAAkB,EAAE;QACtB,GAAG,aAAH,GAAG,uBAAH,GAAG,CAAE,IAAI,CAAC,4CAA4C,kBAAkB,EAAE,CAAC,CAAC;QAC5E,MAAM,CAAA,gBAAgB,aAAhB,gBAAgB,uBAAhB,gBAAgB,CAAE,QAAQ,CAAC,MAAM,CAAC,CAAA,CAAC;QACzC,GAAG,aAAH,GAAG,uBAAH,GAAG,CAAE,IAAI,CAAC,iDAAiD,kBAAkB,EAAE,CAAC,CAAC;KAClF;IAED,GAAG,aAAH,GAAG,uBAAH,GAAG,CAAE,KAAK,CAAC,wCAAwC,CAAC,CAAC;IACrD,MAAM,CAAA,cAAc,aAAd,cAAc,uBAAd,cAAc,CAAG,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAA,CAAC;IAC1C,GAAG,aAAH,GAAG,uBAAH,GAAG,CAAE,KAAK,CAAC,6CAA6C,CAAC,CAAC;IAE1D,gEAAgE;IAChE,KAAK,CAAC,wBAAwB,GAAG,IAAI,CAAC;IACtC,sBAAsB;IACtB,MAAM,KAAK,CAAC;AACd,CAAC,CAAA,CAAC;AAzCW,QAAA,YAAY,gBAyCvB;AAEF;;;;GAIG;AACI,MAAM,mBAAmB,GAAG,CAIjC,EAAiE,EACjE,OAAqD,EACrD,EAAE;IACF,MAAM,oBAAoB,GAAiB,CAAC,KAAK,EAAE,EAAE,CAAC,IAAA,oBAAY,EAAC,KAAK,EAAE,OAAO,CAAC,CAAC;IAEnF,IAAI;QACF,4FAA4F;QAC5F,MAAM,EAAE,CAAC,EAAE,YAAY,EAAE,oBAAoB,EAAE,CAAC,CAAC;KAClD;IAAC,OAAO,KAAU,EAAE;QACnB,IAAI,CAAC,KAAK,CAAC,wBAAwB,EAAE;YACnC,+DAA+D;YAC/D,MAAM,oBAAoB,CAAC,EAAE,KAAK,EAAE,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;SACzE;KACF;AACH,CAAC,CAAA,CAAC;AAlBW,QAAA,mBAAmB,uBAkB9B;AAEF;;;;;;;;;;;;;;;GAeG;AACI,MAAM,wBAAwB,GAAG,CAKtC,OAA4F,EAC5F,OAAqD,EACrD,EAAE;IACF,sFAAsF;IACtF,MAAM,cAAc,GAAG,CAAC,GAAqC,EAAE,EAAE;QAC/D,OAAO,IAAA,2BAAmB,EAAC,CAAC,EAAE,YAAY,EAAE,EAAE,EAAE;YAC9C,OAAO,OAAO,iCACR,GAAW;gBACf,0EAA0E;gBAC1E,YAAY,EAAE,CAAC,KAAK,EAAE,EAAE;;oBACtB,OAAA,YAAY,CAAC;wBACX,KAAK,EAAE,KAAK,CAAC,KAAK;wBAClB,IAAI,EAAE,MAAA,KAAK,CAAC,IAAI,mCAAI,GAAG,CAAC,IAAI;wBAC5B,GAAG,EAAE,KAAK,CAAC,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG;wBACjC,GAAG,EAAE,MAAA,KAAK,CAAC,GAAG,mCAAI,GAAG,CAAC,GAAG;qBAC1B,CAAC,CAAA;iBAAA,IACJ,CAAC;QACL,CAAC,EAAE,OAAO,CAAC,CAAC;IACd,CAAC,CAAC;IACF,OAAO,cAAc,CAAC;AACxB,CAAC,CAAC;AAzBW,QAAA,wBAAwB,4BAyBnC;AAEK,MAAM,6BAA6B,GAAG,CAAmC,GAAG,IAAsD,EAAE,EAAE,CAAC,IAAA,gCAAwB,EAAM,GAAG,IAAI,CAAC,CAAC,CAAC,kBAAkB;AAA3L,QAAA,6BAA6B,iCAA2I;AAC9K,MAAM,4BAA4B,GAAG,CAAkC,GAAG,IAAsD,EAAE,EAAE,CAAC,IAAA,gCAAwB,EAAM,GAAG,IAAI,CAAC,CAAC,CAAC,kBAAkB;AAAzL,QAAA,4BAA4B,gCAA0I;AAC5K,MAAM,6BAA6B,GAAG,CAAmC,GAAG,IAAsD,EAAE,EAAE,CAAC,IAAA,gCAAwB,EAAM,GAAG,IAAI,CAAC,CAAC,CAAC,kBAAkB;AAA3L,QAAA,6BAA6B,iCAA2I;AAC9K,MAAM,kCAAkC,GAAG,CAAwC,GAAG,IAAsD,EAAE,EAAE,CAAC,IAAA,gCAAwB,EAAM,GAAG,IAAI,CAAC,CAAC,CAAC,kBAAkB;AAArM,QAAA,kCAAkC,sCAAgJ;AACxL,MAAM,+BAA+B,GAAG,CAAqC,GAAG,IAAsD,EAAE,EAAE,CAAC,IAAA,gCAAwB,EAAM,GAAG,IAAI,CAAC,CAAC,CAAC,kBAAkB;AAA/L,QAAA,+BAA+B,mCAA6I;AAClL,MAAM,iCAAiC,GAAG,CAAuC,GAAG,IAAsD,EAAE,EAAE,CAAC,IAAA,gCAAwB,EAAM,GAAG,IAAI,CAAC,CAAC,CAAC,kBAAkB;AAAnM,QAAA,iCAAiC,qCAA+I;AAE7L;;;;;;;GAOG;AACI,MAAM,kBAAkB,GAAG,CAChC,OAAmE,EAChD,EAAE;IACrB,OAAO,CAAO,EAAE,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,EAAE,KAAK,EAAE,EAAE;;QAC7C,MAAM,GAAG,GAAG,OAAO,CAAC,SAAS,IAAI,OAAO,CAAC,GAAG,CAAC;QAC7C,IAAA,oBAAY,EACV,EAAE,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,IAAY,EAAE,EACvC;YACE,EAAE,EAAE,OAAO,CAAC,EAAE;YACd,kBAAkB,EAAE,OAAO,CAAC,kBAAkB;YAC9C,eAAe,EAAE,MAAA,OAAO,CAAC,eAAe,mCAAI,IAAI;YAChD,cAAc,EAAE,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,EAAE,EAAE;gBACpC,IAAI,CAAC,OAAO,CAAC,YAAY;oBAAE,OAAO;gBAElC,MAAM,CAAC,gBAAgB,CAAC,KAAK,EAAE,EAAE,KAAK,EAAE,MAAa,EAAE,CAAC,CAAC;YAC3D,CAAC;SACF,CACF,CAAC;IACJ,CAAC,CAAA,CAAC;AACJ,CAAC,CAAC;AAnBW,QAAA,kBAAkB,sBAmB7B","sourcesContent":["import type {\n BasicCrawlingContext,\n CheerioCrawlingContext,\n CrawlingContext,\n ErrorHandler,\n HttpCrawlingContext,\n JSDOMCrawlingContext,\n PlaywrightCrawlingContext,\n PuppeteerCrawlingContext,\n} from 'crawlee';\nimport * as Sentry from '@sentry/node';\nimport type { Page } from 'playwright';\n\nimport type { MaybePromise, PickRequired } from '../../utils/types';\nimport type { RouteHandler, RouterHandlerCtx } from '../router/types';\nimport type {\n CrawleeOneErrorHandlerInput,\n CrawleeOneErrorHandlerOptions,\n CrawleeOneIO,\n} from '../integrations/types';\nimport { apifyIO } from '../integrations/apify';\n\nexport type CaptureErrorInput = PickRequired<Partial<CrawleeOneErrorHandlerInput>, 'error'>;\nexport type CaptureError = (input: CaptureErrorInput) => MaybePromise<void>;\n\n/**\n * Error handling for CrawleeOne crawlers.\n *\n * By default, error reports are saved to Apify Dataset.\n *\n * See https://docs.apify.com/academy/node-js/analyzing-pages-and-fixing-errors#error-reporting\n */\nexport const captureError = async <TEnv extends object = object, TReport extends object = object>(\n input: CaptureErrorInput,\n options: CrawleeOneErrorHandlerOptions<TEnv, TReport>\n) => {\n const { error, log: parentLog } = input;\n const {\n io = apifyIO as any as CrawleeOneIO<TEnv, TReport>,\n reportingDatasetId,\n onErrorCapture,\n } = options;\n\n const log = parentLog?.child({ prefix: '[Error capture] ' }) ?? null;\n\n log?.error(`ERROR ${error.name}: ${error.message}`, error);\n console.error(`ERROR ${error.name}: ${error.message}`, error);\n\n // Let's create reporting dataset\n // If you already have one, this will continue adding to it\n const reportingDataset = reportingDatasetId ? await io.openDataset(reportingDatasetId) : null;\n const report = await io.generateErrorReport(\n { error, page: input.page ?? null, url: input.url ?? null, log },\n { ...options, io }\n );\n\n log?.error('[Error capture] Error captured', report);\n\n // And we push the report\n if (reportingDatasetId) {\n log?.info(`[Error capture] Pushing error to dataset ${reportingDatasetId}`);\n await reportingDataset?.pushData(report);\n log?.info(`[Error capture] DONE pushing error to dataset ${reportingDatasetId}`);\n }\n\n log?.error('[Error capture] Calling onErrorCapture');\n await onErrorCapture?.({ error, report });\n log?.error('[Error capture] Done calling onErrorCapture');\n\n // @ts-expect-error Tag the error, so we don't capture it twice.\n error._crawleeOneErrorCaptured = true;\n // Propagate the error\n throw error;\n};\n\n/**\n * Error handling for Crawlers as a function wrapper\n *\n * By default, error reports are saved to Apify Dataset.\n */\nexport const captureErrorWrapper = async <\n TEnv extends object = object,\n TReport extends object = object\n>(\n fn: (input: { captureError: CaptureError }) => MaybePromise<void>,\n options: CrawleeOneErrorHandlerOptions<TEnv, TReport>\n) => {\n const captureErrorWithArgs: CaptureError = (input) => captureError(input, options);\n\n try {\n // Pass the error capturing function to the wrapped function, so it can trigger it by itself\n await fn({ captureError: captureErrorWithArgs });\n } catch (error: any) {\n if (!error._crawleeOneErrorCaptured) {\n // And if the wrapped function fails, we capture error for them\n await captureErrorWithArgs({ error, url: null, page: null, log: null });\n }\n }\n};\n\n/**\n * Drop-in replacement for regular request handler callback for Crawlee route\n * that automatically tracks errors.\n *\n * By default, error reports are saved to Apify Dataset.\n *\n * @example\n *\n * router.addDefaultHandler(\n * captureErrorRouteHandler(async (ctx) => {\n * const { page, crawler } = ctx;\n * const url = page.url();\n * ...\n * })\n * );\n */\nexport const captureErrorRouteHandler = <\n Ctx extends CrawlingContext,\n TEnv extends object = object,\n TReport extends object = object\n>(\n handler: (ctx: RouterHandlerCtx<Ctx> & { captureError: CaptureError }) => MaybePromise<void>,\n options: CrawleeOneErrorHandlerOptions<TEnv, TReport>\n) => {\n // Wrap the original handler, so we can additionally pass it the captureError function\n const wrapperHandler = (ctx: Parameters<RouteHandler<Ctx>>[0]) => {\n return captureErrorWrapper(({ captureError }) => {\n return handler({\n ...(ctx as any),\n // And automatically feed contextual args (page, url, log) to captureError\n captureError: (input) =>\n captureError({\n error: input.error,\n page: input.page ?? ctx.page,\n url: input.url || ctx.request.url,\n log: input.log ?? ctx.log,\n }),\n });\n }, options);\n };\n return wrapperHandler;\n};\n\nexport const basicCaptureErrorRouteHandler = <Ctx extends BasicCrawlingContext>(...args: Parameters<typeof captureErrorRouteHandler<Ctx>>) => captureErrorRouteHandler<Ctx>(...args); // prettier-ignore\nexport const httpCaptureErrorRouteHandler = <Ctx extends HttpCrawlingContext>(...args: Parameters<typeof captureErrorRouteHandler<Ctx>>) => captureErrorRouteHandler<Ctx>(...args); // prettier-ignore\nexport const jsdomCaptureErrorRouteHandler = <Ctx extends JSDOMCrawlingContext>(...args: Parameters<typeof captureErrorRouteHandler<Ctx>>) => captureErrorRouteHandler<Ctx>(...args); // prettier-ignore\nexport const playwrightCaptureErrorRouteHandler = <Ctx extends PlaywrightCrawlingContext>(...args: Parameters<typeof captureErrorRouteHandler<Ctx>>) => captureErrorRouteHandler<Ctx>(...args); // prettier-ignore\nexport const cheerioCaptureErrorRouteHandler = <Ctx extends CheerioCrawlingContext>(...args: Parameters<typeof captureErrorRouteHandler<Ctx>>) => captureErrorRouteHandler<Ctx>(...args); // prettier-ignore\nexport const puppeteerCaptureErrorRouteHandler = <Ctx extends PuppeteerCrawlingContext>(...args: Parameters<typeof captureErrorRouteHandler<Ctx>>) => captureErrorRouteHandler<Ctx>(...args); // prettier-ignore\n\n/**\n * Create an `ErrorHandler` function that can be assigned to\n * `failedRequestHandler` option of `BasicCrawlerOptions`.\n *\n * The function saves error to a Dataset, and optionally forwards it to Sentry.\n *\n * By default, error reports are saved to Apify Dataset.\n */\nexport const createErrorHandler = <Ctx extends CrawlingContext>(\n options: CrawleeOneErrorHandlerOptions & { sendToSentry?: boolean }\n): ErrorHandler<Ctx> => {\n return async ({ request, log, page }, error) => {\n const url = request.loadedUrl || request.url;\n captureError(\n { error, url, log, page: page as Page },\n {\n io: options.io,\n reportingDatasetId: options.reportingDatasetId,\n allowScreenshot: options.allowScreenshot ?? true,\n onErrorCapture: ({ error, report }) => {\n if (!options.sendToSentry) return;\n\n Sentry.captureException(error, { extra: report as any });\n },\n }\n );\n };\n};\n"]}
|
|
@@ -87,10 +87,14 @@ exports.apifyIO = {
|
|
|
87
87
|
};
|
|
88
88
|
}),
|
|
89
89
|
openRequestQueue: (...args) => __awaiter(void 0, void 0, void 0, function* () {
|
|
90
|
-
|
|
90
|
+
const queue = yield apify_1.Actor.openRequestQueue(...args);
|
|
91
91
|
const clear = () => __awaiter(void 0, void 0, void 0, function* () {
|
|
92
|
-
|
|
93
|
-
|
|
92
|
+
let req;
|
|
93
|
+
do {
|
|
94
|
+
req = yield queue.fetchNextRequest();
|
|
95
|
+
if (req)
|
|
96
|
+
yield queue.markRequestHandled(req);
|
|
97
|
+
} while (req);
|
|
94
98
|
});
|
|
95
99
|
return {
|
|
96
100
|
addRequests: (...args) => queue.addRequests(...args),
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"apify.js","sourceRoot":"","sources":["../../../../src/lib/integrations/apify.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,iCAAwC;AACxC,qCAAsF;AAuCtF,MAAM,wBAAwB,GAA6C,CACzE,KAAK,EACL,OAAO,EACP,EAAE;IACF,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,KAAK,CAAC;IACxC,MAAM,EAAE,EAAE,EAAE,eAAe,EAAE,GAAG,OAAO,CAAC;IAExC,oEAAoE;IACpE,wCAAwC;IACxC,+CAA+C;IAC/C,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE,sBAAsB,EAAE,OAAO,EAAE,GAAG,MAAM,EAAE,CAAC,MAAM,EAAE,CAAC;IAEnF,MAAM,WAAW,GAAG,oCAAoC,OAAO,SAAS,UAAU,EAAE,CAAC;IAErF,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;IACnC,MAAM,GAAG,GAAG,SAAS,YAAY,EAAE,CAAC;IAEpC,IAAI,cAAc,GAAkB,IAAI,CAAC;IACzC,IAAI,gBAAgB,GAAkB,IAAI,CAAC;IAC3C,IAAI,OAAO,GAAkB,GAAG,aAAH,GAAG,cAAH,GAAG,GAAI,IAAI,CAAC;IACzC,IAAI,IAAI,IAAI,eAAe,EAAE;QAC3B,OAAO,GAAG,OAAO,IAAI,IAAI,CAAC,GAAG,EAAE,CAAC;QAChC,GAAG,aAAH,GAAG,uBAAH,GAAG,CAAE,IAAI,CAAC,yBAAyB,CAAC,CAAC;QACrC,MAAM,yBAAe,CAAC,YAAY,CAAC,IAAI,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;QAClD,GAAG,aAAH,GAAG,uBAAH,GAAG,CAAE,IAAI,CAAC,8BAA8B,CAAC,CAAC;QAC1C,0EAA0E;QAC1E,cAAc,GAAG,6CAA6C,OAAO,YAAY,GAAG,2BAA2B,CAAC;QAChH,gBAAgB,GAAG,6CAA6C,OAAO,YAAY,GAAG,4BAA4B,CAAC;KACpH;IAED,4BAA4B;IAC5B,MAAM,MAAM,GAAG;QACb,OAAO;QACP,UAAU;QACV,WAAW;QACX,SAAS,EAAE,KAAK,CAAC,IAAI;QACrB,YAAY,EAAE,KAAK,CAAC,QAAQ,EAAE;QAE9B,OAAO;QACP,gBAAgB;QAChB,cAAc;KACY,CAAC;IAE7B,OAAO,MAAM,CAAC;AAChB,CAAC,CAAA,CAAC;AAEF,MAAM,0BAA0B,GAAG,CAA8B,GAAQ,EAAE,EAAE;;IAC3E,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE,GAAG,aAAK,CAAC,MAAM,EAAE,CAAC;IAC/C,MAAM,WAAW,GACf,OAAO,IAAI,IAAI,IAAI,UAAU,IAAI,IAAI;QACnC,CAAC,CAAC,oCAAoC,OAAO,SAAS,UAAU,EAAE;QAClE,CAAC,CAAC,IAAI,CAAC;IACX,MAAM,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAE3C,MAAM,QAAQ,GAAG;QACf,OAAO;QACP,UAAU;QACV,WAAW;QACX,SAAS,EAAE,GAAG,CAAC,EAAE;QACjB,SAAS,EAAE,MAAA,GAAG,CAAC,OAAO,CAAC,EAAE,mCAAI,IAAI;QAEjC,WAAW,EAAE,MAAA,GAAG,CAAC,OAAO,CAAC,GAAG,mCAAI,IAAI;QACpC,SAAS,EAAE,MAAA,GAAG,CAAC,OAAO,CAAC,SAAS,mCAAI,IAAI;QAExC,WAAW,EAAE,GAAG,CAAC,OAAO,CAAC,SAAS,IAAI,SAAS;QAC/C,eAAe,EAAE,GAAG,CAAC,OAAO,CAAC,UAAU;KACX,CAAC;IAE/B,OAAO,QAAQ,CAAC;AAClB,CAAC,CAAC;AAEF;;;;GAIG;AACU,QAAA,OAAO,GAAsB;IACxC,WAAW,EAAE,CAAO,GAAG,IAAI,EAAE,EAAE;QAC7B,MAAM,OAAO,GAAG,MAAM,aAAK,CAAC,WAAW,CAAC,GAAG,IAAI,CAAC,CAAC;QACjD,MAAM,YAAY,GAAG,GAAS,EAAE,8DAAC,OAAA,MAAA,MAAA,CAAC,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC,0CAAE,SAAS,mCAAI,IAAI,CAAA,GAAA,CAAC;QAC9E,MAAM,QAAQ,GAAkC,CAAO,OAAO,EAAE,EAAE;YAChE,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,OAAO,iCAC/B,OAAO,KACV,SAAS,EAAE,IAAI,IACf,CAAC;YACH,OAAO,MAAM,CAAC,KAAK,CAAC;QACtB,CAAC,CAAA,CAAC;QAEF,OAAO;YACL,QAAQ,EAAE,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC;YACxC,QAAQ;YACR,YAAY;SACb,CAAC;IACJ,CAAC,CAAA;IACD,gBAAgB,EAAE,CAAO,GAAG,IAAI,EAAE,EAAE;QAClC,IAAI,KAAK,GAAG,MAAM,aAAK,CAAC,gBAAgB,CAAC,GAAG,IAAI,CAAC,CAAC;QAClD,MAAM,KAAK,GAAG,GAAS,EAAE;YACvB,MAAM,KAAK,CAAC,IAAI,EAAE,CAAC;YACnB,KAAK,GAAG,MAAM,aAAK,CAAC,gBAAgB,CAAC,GAAG,IAAI,CAAC,CAAC;QAChD,CAAC,CAAA,CAAC;QAEF,OAAO;YACL,WAAW,EAAE,CAAC,GAAG,IAAI,EAAE,EAAE,CAAC,KAAK,CAAC,WAAW,CAAC,GAAG,IAAI,CAAC;YACpD,kBAAkB,EAAE,CAAC,GAAG,IAAI,EAAE,EAAE,CAAC,KAAK,CAAC,kBAAkB,CAAC,GAAG,IAAI,CAAC;YAClE,gBAAgB,EAAE,CAAC,GAAG,IAAI,EAAE,EAAE,CAAC,KAAK,CAAC,gBAAgB,CAAC,GAAG,IAAI,CAAC;YAC9D,cAAc,EAAE,CAAC,GAAG,IAAI,EAAE,EAAE,CAAC,KAAK,CAAC,cAAc,CAAC,GAAG,IAAI,CAAC;YAC1D,UAAU,EAAE,CAAC,GAAG,IAAI,EAAE,EAAE,CAAC,KAAK,CAAC,UAAU,CAAC,GAAG,IAAI,CAAC;YAClD,YAAY,EAAE,CAAC,GAAG,IAAI,EAAE,EAAE,CAAC,KAAK,CAAC,YAAY,CAAC,GAAG,IAAI,CAAC;YACtD,IAAI,EAAE,CAAC,GAAG,IAAI,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC;YACtC,KAAK;SACN,CAAC;IACJ,CAAC,CAAA;IACD,iBAAiB,EAAE,CAAO,GAAG,IAAI,EAAE,EAAE;QACnC,MAAM,KAAK,GAAG,MAAM,aAAK,CAAC,iBAAiB,CAAC,GAAG,IAAI,CAAC,CAAC;QACrD,MAAM,KAAK,GAAG,GAAS,EAAE;YACvB,MAAM,KAAK,CAAC,UAAU,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC;QAC7D,CAAC,CAAA,CAAC;QAEF,OAAO;YACL,QAAQ,EAAE,CAAC,GAAG,IAAI,EAAE,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,IAAI,CAAC;YAC9C,QAAQ,EAAE,CAAC,GAAG,IAAI,EAAE,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,IAAI,CAAC;YAC9C,IAAI,EAAE,CAAC,GAAG,IAAI,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC;YACtC,KAAK;SACN,CAAC;IACJ,CAAC,CAAA;IACD,MAAM,EAAE,CAAC,GAAG,IAAI,EAAE,EAAE,CAAC,aAAK,CAAC,MAAM,CAAC,GAAG,IAAI,CAAC;IAC1C,QAAQ,EAAE,CAAC,GAAG,IAAI,EAAE,EAAE,CAAC,aAAK,CAAC,QAAQ,CAAC,GAAG,IAAI,CAAC;IAC9C,YAAY,EAAE,CAAO,GAAG,IAAI,EAAE,EAAE;QAC9B,MAAM,aAAK,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC;IAC5B,CAAC,CAAA;IACD,wBAAwB,EAAE,CAAC,GAAG,IAAI,EAAE,EAAE,CAAC,aAAK,CAAC,SAAS,CAAC,GAAG,IAAI,CAAC;IAC/D,+BAA+B,EAAE,CAAO,KAAU,EAAE,EAAE;QACpD,OAAO,OAAO,CAAC,GAAG,CAAC,gBAAgB;YACjC,CAAC,CAAC,MAAM,aAAK,CAAC,wBAAwB,CAAC,KAAK,aAAL,KAAK,uBAAL,KAAK,CAAE,KAAK,CAAC;YACpD,CAAC,CAAC,SAAS,CAAC;IAChB,CAAC,CAAA;IACD,kBAAkB,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,gBAAgB;IACxD,mBAAmB,EAAE,wBAAwB;IAC7C,qBAAqB,EAAE,0BAA0B;CACtB,CAAC","sourcesContent":["import { Actor, ApifyEnv } from 'apify';\nimport { CrawlingContext, Request as CrawleeRequest, playwrightUtils } from 'crawlee';\n\nimport type { CrawleeOneDataset, CrawleeOneIO } from './types';\n\nexport interface ApifyErrorReport {\n actorId: string | null;\n actorRunId: string | null;\n actorRunUrl: string;\n errorName: string;\n errorMessage: string;\n pageUrl: string | null;\n pageHtmlSnapshot: string | null;\n pageScreenshot: string | null;\n}\n\nexport interface ApifyEntryMetadata {\n actorId: string | null;\n actorRunId: string | null;\n actorRunUrl: string | null;\n contextId: string;\n requestId: string | null;\n\n /** The URL given to the crawler */\n originalUrl: string | null;\n /** The URL given to the crawler after possible redirects */\n loadedUrl: string | null;\n\n /** ISO datetime string that indicates the time when the request has been processed. */\n dateHandled: string;\n numberOfRetries: number;\n}\n\n/**\n * Integration between CrawleeOne and Apify.\n *\n * This is the default integration.\n */\nexport type ApifyCrawleeOneIO = CrawleeOneIO<ApifyEnv, ApifyErrorReport, ApifyEntryMetadata>;\n\nconst generateApifyErrorReport: ApifyCrawleeOneIO['generateErrorReport'] = async (\n input,\n options\n) => {\n const { error, page, url, log } = input;\n const { io, allowScreenshot } = options;\n\n // storeId is ID of current key-value store, where we save snapshots\n // We can also capture actor and run IDs\n // to have easy access in the reporting dataset\n const { actorId, actorRunId, defaultKeyValueStoreId: storeId } = await io.getEnv();\n\n const actorRunUrl = `https://console.apify.com/actors/${actorId}/runs/${actorRunId}`;\n\n const randomNumber = Math.random();\n const key = `ERROR-${randomNumber}`;\n\n let pageScreenshot: string | null = null;\n let pageHtmlSnapshot: string | null = null;\n let pageUrl: string | null = url ?? null;\n if (page && allowScreenshot) {\n pageUrl = pageUrl || page.url();\n log?.info('Capturing page snapshot');\n await playwrightUtils.saveSnapshot(page, { key });\n log?.info('DONE capturing page snapshot');\n // You will have to adjust the keys if you save them in a non-standard way\n pageScreenshot = `https://api.apify.com/v2/key-value-stores/${storeId}/records/${key}.jpg?disableRedirect=true`;\n pageHtmlSnapshot = `https://api.apify.com/v2/key-value-stores/${storeId}/records/${key}.html?disableRedirect=true`;\n }\n\n // We create a report object\n const report = {\n actorId,\n actorRunId,\n actorRunUrl,\n errorName: error.name,\n errorMessage: error.toString(),\n\n pageUrl,\n pageHtmlSnapshot,\n pageScreenshot,\n } satisfies ApifyErrorReport;\n\n return report;\n};\n\nconst generateApifyEntryMetadata = <Ctx extends CrawlingContext>(ctx: Ctx) => {\n const { actorId, actorRunId } = Actor.getEnv();\n const actorRunUrl =\n actorId != null && actorRunId != null\n ? `https://console.apify.com/actors/${actorId}/runs/${actorRunId}`\n : null;\n const handledAt = new Date().toISOString();\n\n const metadata = {\n actorId,\n actorRunId,\n actorRunUrl,\n contextId: ctx.id,\n requestId: ctx.request.id ?? null,\n\n originalUrl: ctx.request.url ?? null,\n loadedUrl: ctx.request.loadedUrl ?? null,\n\n dateHandled: ctx.request.handledAt || handledAt,\n numberOfRetries: ctx.request.retryCount,\n } satisfies ApifyEntryMetadata;\n\n return metadata;\n};\n\n/**\n * Integration between CrawleeOne and Apify.\n *\n * This is the default integration.\n */\nexport const apifyIO: ApifyCrawleeOneIO = {\n openDataset: async (...args) => {\n const dataset = await Actor.openDataset(...args);\n const getItemCount = async () => (await dataset.getInfo())?.itemCount ?? null;\n const getItems: CrawleeOneDataset['getItems'] = async (options) => {\n const result = await dataset.getData({\n ...options,\n skipEmpty: true,\n });\n return result.items;\n };\n\n return {\n pushData: dataset.pushData.bind(dataset),\n getItems,\n getItemCount,\n };\n },\n openRequestQueue: async (...args) => {\n let queue = await Actor.openRequestQueue(...args);\n const clear = async () => {\n await queue.drop();\n queue = await Actor.openRequestQueue(...args);\n };\n\n return {\n addRequests: (...args) => queue.addRequests(...args),\n markRequestHandled: (...args) => queue.markRequestHandled(...args),\n fetchNextRequest: (...args) => queue.fetchNextRequest(...args),\n reclaimRequest: (...args) => queue.reclaimRequest(...args),\n isFinished: (...args) => queue.isFinished(...args),\n handledCount: (...args) => queue.handledCount(...args),\n drop: (...args) => queue.drop(...args),\n clear,\n };\n },\n openKeyValueStore: async (...args) => {\n const store = await Actor.openKeyValueStore(...args);\n const clear = async () => {\n await store.forEachKey((key) => store.setValue(key, null));\n };\n\n return {\n getValue: (...args) => store.getValue(...args),\n setValue: (...args) => store.setValue(...args),\n drop: (...args) => store.drop(...args),\n clear,\n };\n },\n getEnv: (...args) => Actor.getEnv(...args),\n getInput: (...args) => Actor.getInput(...args),\n runInContext: async (...args) => {\n await Actor.main(...args);\n },\n triggerDownstreamCrawler: (...args) => Actor.metamorph(...args),\n createDefaultProxyConfiguration: async (input: any) => {\n return process.env.APIFY_IS_AT_HOME\n ? await Actor.createProxyConfiguration(input?.proxy)\n : undefined;\n },\n isTelemetryEnabled: () => !!process.env.APIFY_IS_AT_HOME,\n generateErrorReport: generateApifyErrorReport,\n generateEntryMetadata: generateApifyEntryMetadata,\n} satisfies ApifyCrawleeOneIO;\n"]}
|
|
1
|
+
{"version":3,"file":"apify.js","sourceRoot":"","sources":["../../../../src/lib/integrations/apify.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,iCAAwC;AACxC,qCAAsF;AAuCtF,MAAM,wBAAwB,GAA6C,CACzE,KAAK,EACL,OAAO,EACP,EAAE;IACF,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,KAAK,CAAC;IACxC,MAAM,EAAE,EAAE,EAAE,eAAe,EAAE,GAAG,OAAO,CAAC;IAExC,oEAAoE;IACpE,wCAAwC;IACxC,+CAA+C;IAC/C,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE,sBAAsB,EAAE,OAAO,EAAE,GAAG,MAAM,EAAE,CAAC,MAAM,EAAE,CAAC;IAEnF,MAAM,WAAW,GAAG,oCAAoC,OAAO,SAAS,UAAU,EAAE,CAAC;IAErF,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;IACnC,MAAM,GAAG,GAAG,SAAS,YAAY,EAAE,CAAC;IAEpC,IAAI,cAAc,GAAkB,IAAI,CAAC;IACzC,IAAI,gBAAgB,GAAkB,IAAI,CAAC;IAC3C,IAAI,OAAO,GAAkB,GAAG,aAAH,GAAG,cAAH,GAAG,GAAI,IAAI,CAAC;IACzC,IAAI,IAAI,IAAI,eAAe,EAAE;QAC3B,OAAO,GAAG,OAAO,IAAI,IAAI,CAAC,GAAG,EAAE,CAAC;QAChC,GAAG,aAAH,GAAG,uBAAH,GAAG,CAAE,IAAI,CAAC,yBAAyB,CAAC,CAAC;QACrC,MAAM,yBAAe,CAAC,YAAY,CAAC,IAAI,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;QAClD,GAAG,aAAH,GAAG,uBAAH,GAAG,CAAE,IAAI,CAAC,8BAA8B,CAAC,CAAC;QAC1C,0EAA0E;QAC1E,cAAc,GAAG,6CAA6C,OAAO,YAAY,GAAG,2BAA2B,CAAC;QAChH,gBAAgB,GAAG,6CAA6C,OAAO,YAAY,GAAG,4BAA4B,CAAC;KACpH;IAED,4BAA4B;IAC5B,MAAM,MAAM,GAAG;QACb,OAAO;QACP,UAAU;QACV,WAAW;QACX,SAAS,EAAE,KAAK,CAAC,IAAI;QACrB,YAAY,EAAE,KAAK,CAAC,QAAQ,EAAE;QAE9B,OAAO;QACP,gBAAgB;QAChB,cAAc;KACY,CAAC;IAE7B,OAAO,MAAM,CAAC;AAChB,CAAC,CAAA,CAAC;AAEF,MAAM,0BAA0B,GAAG,CAA8B,GAAQ,EAAE,EAAE;;IAC3E,MAAM,EAAE,OAAO,EAAE,UAAU,EAAE,GAAG,aAAK,CAAC,MAAM,EAAE,CAAC;IAC/C,MAAM,WAAW,GACf,OAAO,IAAI,IAAI,IAAI,UAAU,IAAI,IAAI;QACnC,CAAC,CAAC,oCAAoC,OAAO,SAAS,UAAU,EAAE;QAClE,CAAC,CAAC,IAAI,CAAC;IACX,MAAM,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAE3C,MAAM,QAAQ,GAAG;QACf,OAAO;QACP,UAAU;QACV,WAAW;QACX,SAAS,EAAE,GAAG,CAAC,EAAE;QACjB,SAAS,EAAE,MAAA,GAAG,CAAC,OAAO,CAAC,EAAE,mCAAI,IAAI;QAEjC,WAAW,EAAE,MAAA,GAAG,CAAC,OAAO,CAAC,GAAG,mCAAI,IAAI;QACpC,SAAS,EAAE,MAAA,GAAG,CAAC,OAAO,CAAC,SAAS,mCAAI,IAAI;QAExC,WAAW,EAAE,GAAG,CAAC,OAAO,CAAC,SAAS,IAAI,SAAS;QAC/C,eAAe,EAAE,GAAG,CAAC,OAAO,CAAC,UAAU;KACX,CAAC;IAE/B,OAAO,QAAQ,CAAC;AAClB,CAAC,CAAC;AAEF;;;;GAIG;AACU,QAAA,OAAO,GAAsB;IACxC,WAAW,EAAE,CAAO,GAAG,IAAI,EAAE,EAAE;QAC7B,MAAM,OAAO,GAAG,MAAM,aAAK,CAAC,WAAW,CAAC,GAAG,IAAI,CAAC,CAAC;QACjD,MAAM,YAAY,GAAG,GAAS,EAAE,8DAAC,OAAA,MAAA,MAAA,CAAC,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC,0CAAE,SAAS,mCAAI,IAAI,CAAA,GAAA,CAAC;QAC9E,MAAM,QAAQ,GAAkC,CAAO,OAAO,EAAE,EAAE;YAChE,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,OAAO,iCAC/B,OAAO,KACV,SAAS,EAAE,IAAI,IACf,CAAC;YACH,OAAO,MAAM,CAAC,KAAK,CAAC;QACtB,CAAC,CAAA,CAAC;QAEF,OAAO;YACL,QAAQ,EAAE,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC;YACxC,QAAQ;YACR,YAAY;SACb,CAAC;IACJ,CAAC,CAAA;IACD,gBAAgB,EAAE,CAAO,GAAG,IAAI,EAAE,EAAE;QAClC,MAAM,KAAK,GAAG,MAAM,aAAK,CAAC,gBAAgB,CAAC,GAAG,IAAI,CAAC,CAAC;QACpD,MAAM,KAAK,GAAG,GAAS,EAAE;YACvB,IAAI,GAA0B,CAAC;YAC/B,GAAG;gBACD,GAAG,GAAG,MAAM,KAAK,CAAC,gBAAgB,EAAE,CAAC;gBACrC,IAAI,GAAG;oBAAE,MAAM,KAAK,CAAC,kBAAkB,CAAC,GAAG,CAAC,CAAC;aAC9C,QAAQ,GAAG,EAAE;QAChB,CAAC,CAAA,CAAC;QAEF,OAAO;YACL,WAAW,EAAE,CAAC,GAAG,IAAI,EAAE,EAAE,CAAC,KAAK,CAAC,WAAW,CAAC,GAAG,IAAI,CAAC;YACpD,kBAAkB,EAAE,CAAC,GAAG,IAAI,EAAE,EAAE,CAAC,KAAK,CAAC,kBAAkB,CAAC,GAAG,IAAI,CAAC;YAClE,gBAAgB,EAAE,CAAC,GAAG,IAAI,EAAE,EAAE,CAAC,KAAK,CAAC,gBAAgB,CAAC,GAAG,IAAI,CAAC;YAC9D,cAAc,EAAE,CAAC,GAAG,IAAI,EAAE,EAAE,CAAC,KAAK,CAAC,cAAc,CAAC,GAAG,IAAI,CAAC;YAC1D,UAAU,EAAE,CAAC,GAAG,IAAI,EAAE,EAAE,CAAC,KAAK,CAAC,UAAU,CAAC,GAAG,IAAI,CAAC;YAClD,YAAY,EAAE,CAAC,GAAG,IAAI,EAAE,EAAE,CAAC,KAAK,CAAC,YAAY,CAAC,GAAG,IAAI,CAAC;YACtD,IAAI,EAAE,CAAC,GAAG,IAAI,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC;YACtC,KAAK;SACN,CAAC;IACJ,CAAC,CAAA;IACD,iBAAiB,EAAE,CAAO,GAAG,IAAI,EAAE,EAAE;QACnC,MAAM,KAAK,GAAG,MAAM,aAAK,CAAC,iBAAiB,CAAC,GAAG,IAAI,CAAC,CAAC;QACrD,MAAM,KAAK,GAAG,GAAS,EAAE;YACvB,MAAM,KAAK,CAAC,UAAU,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC;QAC7D,CAAC,CAAA,CAAC;QAEF,OAAO;YACL,QAAQ,EAAE,CAAC,GAAG,IAAI,EAAE,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,IAAI,CAAC;YAC9C,QAAQ,EAAE,CAAC,GAAG,IAAI,EAAE,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,GAAG,IAAI,CAAC;YAC9C,IAAI,EAAE,CAAC,GAAG,IAAI,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC;YACtC,KAAK;SACN,CAAC;IACJ,CAAC,CAAA;IACD,MAAM,EAAE,CAAC,GAAG,IAAI,EAAE,EAAE,CAAC,aAAK,CAAC,MAAM,CAAC,GAAG,IAAI,CAAC;IAC1C,QAAQ,EAAE,CAAC,GAAG,IAAI,EAAE,EAAE,CAAC,aAAK,CAAC,QAAQ,CAAC,GAAG,IAAI,CAAC;IAC9C,YAAY,EAAE,CAAO,GAAG,IAAI,EAAE,EAAE;QAC9B,MAAM,aAAK,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC;IAC5B,CAAC,CAAA;IACD,wBAAwB,EAAE,CAAC,GAAG,IAAI,EAAE,EAAE,CAAC,aAAK,CAAC,SAAS,CAAC,GAAG,IAAI,CAAC;IAC/D,+BAA+B,EAAE,CAAO,KAAU,EAAE,EAAE;QACpD,OAAO,OAAO,CAAC,GAAG,CAAC,gBAAgB;YACjC,CAAC,CAAC,MAAM,aAAK,CAAC,wBAAwB,CAAC,KAAK,aAAL,KAAK,uBAAL,KAAK,CAAE,KAAK,CAAC;YACpD,CAAC,CAAC,SAAS,CAAC;IAChB,CAAC,CAAA;IACD,kBAAkB,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,gBAAgB;IACxD,mBAAmB,EAAE,wBAAwB;IAC7C,qBAAqB,EAAE,0BAA0B;CACtB,CAAC","sourcesContent":["import { Actor, ApifyEnv } from 'apify';\nimport { CrawlingContext, Request as CrawleeRequest, playwrightUtils } from 'crawlee';\n\nimport type { CrawleeOneDataset, CrawleeOneIO } from './types';\n\nexport interface ApifyErrorReport {\n actorId: string | null;\n actorRunId: string | null;\n actorRunUrl: string;\n errorName: string;\n errorMessage: string;\n pageUrl: string | null;\n pageHtmlSnapshot: string | null;\n pageScreenshot: string | null;\n}\n\nexport interface ApifyEntryMetadata {\n actorId: string | null;\n actorRunId: string | null;\n actorRunUrl: string | null;\n contextId: string;\n requestId: string | null;\n\n /** The URL given to the crawler */\n originalUrl: string | null;\n /** The URL given to the crawler after possible redirects */\n loadedUrl: string | null;\n\n /** ISO datetime string that indicates the time when the request has been processed. */\n dateHandled: string;\n numberOfRetries: number;\n}\n\n/**\n * Integration between CrawleeOne and Apify.\n *\n * This is the default integration.\n */\nexport type ApifyCrawleeOneIO = CrawleeOneIO<ApifyEnv, ApifyErrorReport, ApifyEntryMetadata>;\n\nconst generateApifyErrorReport: ApifyCrawleeOneIO['generateErrorReport'] = async (\n input,\n options\n) => {\n const { error, page, url, log } = input;\n const { io, allowScreenshot } = options;\n\n // storeId is ID of current key-value store, where we save snapshots\n // We can also capture actor and run IDs\n // to have easy access in the reporting dataset\n const { actorId, actorRunId, defaultKeyValueStoreId: storeId } = await io.getEnv();\n\n const actorRunUrl = `https://console.apify.com/actors/${actorId}/runs/${actorRunId}`;\n\n const randomNumber = Math.random();\n const key = `ERROR-${randomNumber}`;\n\n let pageScreenshot: string | null = null;\n let pageHtmlSnapshot: string | null = null;\n let pageUrl: string | null = url ?? null;\n if (page && allowScreenshot) {\n pageUrl = pageUrl || page.url();\n log?.info('Capturing page snapshot');\n await playwrightUtils.saveSnapshot(page, { key });\n log?.info('DONE capturing page snapshot');\n // You will have to adjust the keys if you save them in a non-standard way\n pageScreenshot = `https://api.apify.com/v2/key-value-stores/${storeId}/records/${key}.jpg?disableRedirect=true`;\n pageHtmlSnapshot = `https://api.apify.com/v2/key-value-stores/${storeId}/records/${key}.html?disableRedirect=true`;\n }\n\n // We create a report object\n const report = {\n actorId,\n actorRunId,\n actorRunUrl,\n errorName: error.name,\n errorMessage: error.toString(),\n\n pageUrl,\n pageHtmlSnapshot,\n pageScreenshot,\n } satisfies ApifyErrorReport;\n\n return report;\n};\n\nconst generateApifyEntryMetadata = <Ctx extends CrawlingContext>(ctx: Ctx) => {\n const { actorId, actorRunId } = Actor.getEnv();\n const actorRunUrl =\n actorId != null && actorRunId != null\n ? `https://console.apify.com/actors/${actorId}/runs/${actorRunId}`\n : null;\n const handledAt = new Date().toISOString();\n\n const metadata = {\n actorId,\n actorRunId,\n actorRunUrl,\n contextId: ctx.id,\n requestId: ctx.request.id ?? null,\n\n originalUrl: ctx.request.url ?? null,\n loadedUrl: ctx.request.loadedUrl ?? null,\n\n dateHandled: ctx.request.handledAt || handledAt,\n numberOfRetries: ctx.request.retryCount,\n } satisfies ApifyEntryMetadata;\n\n return metadata;\n};\n\n/**\n * Integration between CrawleeOne and Apify.\n *\n * This is the default integration.\n */\nexport const apifyIO: ApifyCrawleeOneIO = {\n openDataset: async (...args) => {\n const dataset = await Actor.openDataset(...args);\n const getItemCount = async () => (await dataset.getInfo())?.itemCount ?? null;\n const getItems: CrawleeOneDataset['getItems'] = async (options) => {\n const result = await dataset.getData({\n ...options,\n skipEmpty: true,\n });\n return result.items;\n };\n\n return {\n pushData: dataset.pushData.bind(dataset),\n getItems,\n getItemCount,\n };\n },\n openRequestQueue: async (...args) => {\n const queue = await Actor.openRequestQueue(...args);\n const clear = async () => {\n let req: CrawleeRequest | null;\n do {\n req = await queue.fetchNextRequest();\n if (req) await queue.markRequestHandled(req);\n } while (req);\n };\n\n return {\n addRequests: (...args) => queue.addRequests(...args),\n markRequestHandled: (...args) => queue.markRequestHandled(...args),\n fetchNextRequest: (...args) => queue.fetchNextRequest(...args),\n reclaimRequest: (...args) => queue.reclaimRequest(...args),\n isFinished: (...args) => queue.isFinished(...args),\n handledCount: (...args) => queue.handledCount(...args),\n drop: (...args) => queue.drop(...args),\n clear,\n };\n },\n openKeyValueStore: async (...args) => {\n const store = await Actor.openKeyValueStore(...args);\n const clear = async () => {\n await store.forEachKey((key) => store.setValue(key, null));\n };\n\n return {\n getValue: (...args) => store.getValue(...args),\n setValue: (...args) => store.setValue(...args),\n drop: (...args) => store.drop(...args),\n clear,\n };\n },\n getEnv: (...args) => Actor.getEnv(...args),\n getInput: (...args) => Actor.getInput(...args),\n runInContext: async (...args) => {\n await Actor.main(...args);\n },\n triggerDownstreamCrawler: (...args) => Actor.metamorph(...args),\n createDefaultProxyConfiguration: async (input: any) => {\n return process.env.APIFY_IS_AT_HOME\n ? await Actor.createProxyConfiguration(input?.proxy)\n : undefined;\n },\n isTelemetryEnabled: () => !!process.env.APIFY_IS_AT_HOME,\n generateErrorReport: generateApifyErrorReport,\n generateEntryMetadata: generateApifyEntryMetadata,\n} satisfies ApifyCrawleeOneIO;\n"]}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
import { Log,
|
|
1
|
+
import { Log, RequestQueueOperationOptions } from 'crawlee';
|
|
2
|
+
import type { CrawlerUrl } from '../../types';
|
|
2
3
|
import type { CrawleeOneIO } from '../integrations/types';
|
|
3
|
-
export interface PushRequestsOptions<T extends
|
|
4
|
+
export interface PushRequestsOptions<T extends Exclude<CrawlerUrl, string> = Exclude<CrawlerUrl, string>> {
|
|
4
5
|
io?: CrawleeOneIO<any, any>;
|
|
5
6
|
log?: Log;
|
|
6
7
|
/**
|
|
@@ -36,4 +37,4 @@ export interface PushRequestsOptions<T extends CrawleeRequest = CrawleeRequest>
|
|
|
36
37
|
* - Limit the max size of the RequestQueue. No requests are added when RequestQueue is at or above the limit.
|
|
37
38
|
* - Transform and filter requests. Requests that did not pass the filter are not added to the RequestQueue.
|
|
38
39
|
*/
|
|
39
|
-
export declare const pushRequests: <T extends
|
|
40
|
+
export declare const pushRequests: <T extends import("crawlee").RequestOptions<import("crawlee").Dictionary> | import("crawlee").Request<import("crawlee").Dictionary>>(oneOrManyItems: T | T[], options?: PushRequestsOptions<T> | undefined) => Promise<unknown[]>;
|
|
@@ -11,8 +11,8 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
11
11
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
12
|
exports.pushRequests = void 0;
|
|
13
13
|
const crawlee_1 = require("crawlee");
|
|
14
|
-
const requestQueue_1 = require("./requestQueue");
|
|
15
14
|
const apify_1 = require("../integrations/apify");
|
|
15
|
+
const requestQueue_1 = require("./requestQueue");
|
|
16
16
|
const shortenToSize = (entries, maxCount, options) => __awaiter(void 0, void 0, void 0, function* () {
|
|
17
17
|
const { requestQueueId, log } = options !== null && options !== void 0 ? options : {};
|
|
18
18
|
const queueName = requestQueueId ? `"${requestQueueId}"` : 'DEFAULT';
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pushRequests.js","sourceRoot":"","sources":["../../../../src/lib/io/pushRequests.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,
|
|
1
|
+
{"version":3,"file":"pushRequests.js","sourceRoot":"","sources":["../../../../src/lib/io/pushRequests.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,qCAA4D;AAI5D,iDAAgD;AAChD,iDAAyD;AAoCzD,MAAM,aAAa,GAAG,CACpB,OAAY,EACZ,QAAgB,EAChB,OAAmE,EACnE,EAAE;IACF,MAAM,EAAE,cAAc,EAAE,GAAG,EAAE,GAAG,OAAO,aAAP,OAAO,cAAP,OAAO,GAAI,EAAE,CAAC;IAE9C,MAAM,SAAS,GAAG,cAAc,CAAC,CAAC,CAAC,IAAI,cAAc,GAAG,CAAC,CAAC,CAAC,SAAS,CAAC;IAErE,MAAM,WAAW,GAAG,IAAA,sCAAuB,EAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAE/D,uDAAuD;IACvD,MAAM,MAAM,GAAG,MAAM,WAAW,CAAC,MAAM,EAAE,CAAC;IAC1C,IAAI,MAAM,EAAE;QACV,GAAG,aAAH,GAAG,uBAAH,GAAG,CAAE,OAAO,CAAC,iBAAiB,SAAS,sBAAsB,QAAQ,cAAc,OAAO,CAAC,MAAM,6BAA6B,CAAC,CAAC;QAChI,OAAO,EAAE,CAAC;KACX,CAAC,kBAAkB;IAEpB,8EAA8E;IAC9E,MAAM,aAAa,GAAG,MAAM,WAAW,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;IAC/D,IAAI,aAAa,CAAC,MAAM,KAAK,OAAO,CAAC,MAAM,EAAE;QAC3C,GAAG,aAAH,GAAG,uBAAH,GAAG,CAAE,OAAO,CAAC,iBAAiB,SAAS,sBAAsB,QAAQ,cAAc,OAAO,CAAC,MAAM,6BAA6B,CAAC,CAAC;QAChI,OAAO,EAAE,CAAC;KACX,CAAC,kBAAkB;IAEpB,OAAO,aAAa,CAAC;AACvB,CAAC,CAAA,CAAC;AAEF;;;;;;GAMG;AACI,MAAM,YAAY,GAAG,CAC1B,cAAuB,EACvB,OAAgC,EAChC,EAAE;IACF,MAAM,EACJ,EAAE,GAAG,eAAuB,EAC5B,GAAG,GAAG,IAAI,aAAG,EAAE,EACf,QAAQ,EACR,SAAS,EACT,MAAM,EACN,cAAc,EACd,YAAY,GACb,GAAG,OAAO,aAAP,OAAO,cAAP,OAAO,GAAI,EAAE,CAAC;IAElB,MAAM,SAAS,GAAG,KAAK,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC;IACpF,MAAM,KAAK,GACT,QAAQ,IAAI,IAAI;QACd,CAAC,CAAC,MAAM,aAAa,CAAC,SAAS,EAAE,QAAQ,EAAE,EAAE,EAAE,EAAE,cAAc,EAAE,GAAG,EAAE,CAAC;QACvE,CAAC,CAAC,SAAS,CAAC;IAEhB,GAAG,CAAC,KAAK,CAAC,qBAAqB,KAAK,CAAC,MAAM,oBAAoB,CAAC,CAAC,CAAC,kBAAkB;IAEpF,MAAM,aAAa,GAAG,MAAM,KAAK,CAAC,MAAM,CAAC,CAAO,UAAU,EAAE,IAAI,EAAE,EAAE;QAClE,MAAM,GAAG,GAAG,MAAM,UAAU,CAAC;QAE7B,MAAM,eAAe,GAAG,SAAS,CAAC,CAAC,CAAC,MAAM,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;QACjE,MAAM,YAAY,GAAG,MAAM,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;QAEnE,IAAI,YAAY;YAAE,GAAG,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;QAE5C,OAAO,GAAG,CAAC;IACb,CAAC,CAAA,EAAE,OAAO,CAAC,OAAO,CAAC,EAAe,CAAC,CAAC,CAAC;IAErC,wCAAwC;IACxC,GAAG,CAAC,IAAI,CAAC,WAAW,aAAa,CAAC,MAAM,oBAAoB,CAAC,CAAC;IAC9D,MAAM,QAAQ,GAAG,MAAM,EAAE,CAAC,gBAAgB,CAAC,cAAc,CAAC,CAAC;IAC3D,MAAM,QAAQ,CAAC,WAAW,CAAC,aAAsB,EAAE,YAAY,CAAC,CAAC;IACjE,GAAG,CAAC,IAAI,CAAC,gBAAgB,aAAa,CAAC,MAAM,oBAAoB,CAAC,CAAC;IAEnE,OAAO,aAAa,CAAC;AACvB,CAAC,CAAA,CAAC;AAxCW,QAAA,YAAY,gBAwCvB","sourcesContent":["import { Log, RequestQueueOperationOptions } from 'crawlee';\n\nimport type { CrawlerUrl } from '../../types';\nimport type { CrawleeOneIO } from '../integrations/types';\nimport { apifyIO } from '../integrations/apify';\nimport { requestQueueSizeMonitor } from './requestQueue';\n\nexport interface PushRequestsOptions<\n T extends Exclude<CrawlerUrl, string> = Exclude<CrawlerUrl, string>\n> {\n io?: CrawleeOneIO<any, any>;\n log?: Log;\n /**\n * If set, only at most this many requests will be added to the RequestQueue.\n *\n * The count is determined from the RequestQueue that's used for the crawler run.\n *\n * This means that if `maxCount` is set to 50, but the\n * associated RequestQueue already handled 40 requests, then only 10 new requests\n * will be processed.\n */\n maxCount?: number;\n /**\n * Option to freely transform a request before pushing it to the RequestQueue.\n *\n * This serves mainly to allow users to transform the requests from actor input UI.\n */\n transform?: (req: T) => any;\n /**\n * Option to filter a request before pushing it to the RequestQueue.\n *\n * This serves mainly to allow users to filter the requests from actor input UI.\n */\n filter?: (req: T) => any;\n /** ID of the RequestQueue to which the data should be pushed */\n requestQueueId?: string;\n\n // Pass-through options\n queueOptions?: RequestQueueOperationOptions;\n}\n\nconst shortenToSize = async <T>(\n entries: T[],\n maxCount: number,\n options?: { io?: CrawleeOneIO; requestQueueId?: string; log?: Log }\n) => {\n const { requestQueueId, log } = options ?? {};\n\n const queueName = requestQueueId ? `\"${requestQueueId}\"` : 'DEFAULT';\n\n const sizeMonitor = requestQueueSizeMonitor(maxCount, options);\n\n // Ignore incoming entries if the queue is already full\n const isFull = await sizeMonitor.isFull();\n if (isFull) {\n log?.warning(`RequestQueue (${queueName}) is already full (${maxCount} entries), ${entries.length} entries will be discarded.`);\n return [];\n } // prettier-ignore\n\n // Show warning when only part of the incoming requests made it into the queue\n const slicedEntries = await sizeMonitor.shortenToSize(entries);\n if (slicedEntries.length !== entries.length) {\n log?.warning(`RequestQueue (${queueName}) has become full (${maxCount} entries), ${entries.length} entries will be discarded.`);\n return [];\n } // prettier-ignore\n\n return slicedEntries;\n};\n\n/**\n * Similar to `Actor.openRequestQueue().addRequests`, but with extra features:\n *\n * - Data can be sent elsewhere, not just to Apify. This is set by the `io` options. By default data is sent using Apify (cloud/local).\n * - Limit the max size of the RequestQueue. No requests are added when RequestQueue is at or above the limit.\n * - Transform and filter requests. Requests that did not pass the filter are not added to the RequestQueue.\n */\nexport const pushRequests = async <T extends Exclude<CrawlerUrl, string>>(\n oneOrManyItems: T | T[],\n options?: PushRequestsOptions<T>\n) => {\n const {\n io = apifyIO as CrawleeOneIO,\n log = new Log(),\n maxCount,\n transform,\n filter,\n requestQueueId,\n queueOptions,\n } = options ?? {};\n\n const manyItems = Array.isArray(oneOrManyItems) ? oneOrManyItems : [oneOrManyItems];\n const items =\n maxCount != null\n ? await shortenToSize(manyItems, maxCount, { io, requestQueueId, log })\n : manyItems;\n\n log.debug(`Preparing to push ${items.length} requests to queue`); // prettier-ignore\n\n const adjustedItems = await items.reduce(async (aggPromise, item) => {\n const agg = await aggPromise;\n\n const transformedItem = transform ? await transform(item) : item;\n const passedFilter = filter ? await filter(transformedItem) : true;\n\n if (passedFilter) agg.push(transformedItem);\n\n return agg;\n }, Promise.resolve([] as unknown[]));\n\n // Push requests to primary RequestQueue\n log.info(`Pushing ${adjustedItems.length} requests to queue`);\n const reqQueue = await io.openRequestQueue(requestQueueId);\n await reqQueue.addRequests(adjustedItems as any[], queueOptions);\n log.info(`Done pushing ${adjustedItems.length} requests to queue`);\n\n return adjustedItems;\n};\n"]}
|
package/dist/cjs/lib/log.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { type CrawlingContext, LogLevel as CrawleeLogLevel } from 'crawlee';
|
|
2
2
|
import type { ArrVal } from '../utils/types';
|
|
3
|
-
import type { CrawlerRouterWrapper } from './router';
|
|
3
|
+
import type { CrawlerRouterWrapper } from './router/types';
|
|
4
4
|
export declare const LOG_LEVEL: readonly ["debug", "info", "warn", "error", "off"];
|
|
5
5
|
export type LogLevel = ArrVal<typeof LOG_LEVEL>;
|
|
6
6
|
/** Map log levels of `crawlee-one` to log levels of `crawlee` */
|
package/dist/cjs/lib/log.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"log.js","sourceRoot":"","sources":["../../../src/lib/log.ts"],"names":[],"mappings":";;;AAAA,qCAA4E;AAK/D,QAAA,SAAS,GAAG,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,KAAK,CAAU,CAAC,CAAC,kBAAkB;AAG/F,iEAAiE;AACpD,QAAA,iBAAiB,GAAsC;IAClE,GAAG,EAAE,kBAAe,CAAC,GAAG;IACxB,KAAK,EAAE,kBAAe,CAAC,KAAK;IAC5B,IAAI,EAAE,kBAAe,CAAC,IAAI;IAC1B,IAAI,EAAE,kBAAe,CAAC,OAAO;IAC7B,KAAK,EAAE,kBAAe,CAAC,KAAK;CAC7B,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AACI,MAAM,sBAAsB,GAAG,CAIpC,QAAkB,EACkB,EAAE;IACtC,OAAO,CAAC,OAAO,EAAE,EAAE;QACjB,OAAO,CAAC,GAAG,EAAE,GAAG,IAAI,EAAE,EAAE;YACtB,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,wBAAwB,QAAQ,EAAE,CAAC,CAAC;YACjD,GAAG,CAAC,GAAG,CAAC,QAAQ,CAAC,yBAAiB,CAAC,QAAQ,CAAC,CAAC,CAAC;YAC9C,OAAO,OAAO,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,CAAC;QAC/B,CAAC,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC,CAAC;AAbW,QAAA,sBAAsB,0BAajC","sourcesContent":["import { type CrawlingContext, LogLevel as CrawleeLogLevel } from 'crawlee';\n\nimport type { ArrVal } from '../utils/types';\nimport type { CrawlerRouterWrapper } from './router';\n\nexport const LOG_LEVEL = ['debug', 'info', 'warn', 'error', 'off'] as const; // prettier-ignore\nexport type LogLevel = ArrVal<typeof LOG_LEVEL>;\n\n/** Map log levels of `crawlee-one` to log levels of `crawlee` */\nexport const logLevelToCrawlee: Record<LogLevel, CrawleeLogLevel> = {\n off: CrawleeLogLevel.OFF,\n debug: CrawleeLogLevel.DEBUG,\n info: CrawleeLogLevel.INFO,\n warn: CrawleeLogLevel.WARNING,\n error: CrawleeLogLevel.ERROR,\n};\n\n/**\n * Wrapper for Crawlee route handler that configures log level.\n *\n *\n * Usage with Crawlee's `RouterHandler.addDefaultHandler`\n * ```ts\n * const wrappedHandler = logLevelHandlerWrapper('debug')(handler)\n * await router.addDefaultHandler<Ctx>(wrappedHandler);\n * ```\n *\n * Usage with Crawlee's `RouterHandler.addHandler`\n * ```ts\n * const wrappedHandler = logLevelHandlerWrapper('error')(handler)\n * await router.addHandler<Ctx>(wrappedHandler);\n * ```\n *\n * Usage with `createCrawleeOne`\n * ```ts\n * const actor = await createCrawleeOne<CheerioCrawlingContext>({\n * validateInput,\n * router: createCheerioRouter(),\n * routes,\n * routeHandlers: ({ input }) => createHandlers(input!),\n * routerWrappers: ({ input }) => [\n * logLevelHandlerWrapper<CheerioCrawlingContext<any, any>>(input?.logLevel ?? 'info'),\n * ],\n * createCrawler: ({ router, input }) => createCrawler({ router, input, crawlerConfig }),\n * });\n * ```\n */\nexport const logLevelHandlerWrapper = <\n T extends CrawlingContext,\n RouterCtx extends Record<string, any> = Record<string, any>\n>(\n logLevel: LogLevel\n): CrawlerRouterWrapper<T, RouterCtx> => {\n return (handler) => {\n return (ctx, ...args) => {\n ctx.log.info(`Setting log level to ${logLevel}`);\n ctx.log.setLevel(logLevelToCrawlee[logLevel]);\n return handler(ctx, ...args);\n };\n };\n};\n"]}
|
|
1
|
+
{"version":3,"file":"log.js","sourceRoot":"","sources":["../../../src/lib/log.ts"],"names":[],"mappings":";;;AAAA,qCAA4E;AAK/D,QAAA,SAAS,GAAG,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,KAAK,CAAU,CAAC,CAAC,kBAAkB;AAG/F,iEAAiE;AACpD,QAAA,iBAAiB,GAAsC;IAClE,GAAG,EAAE,kBAAe,CAAC,GAAG;IACxB,KAAK,EAAE,kBAAe,CAAC,KAAK;IAC5B,IAAI,EAAE,kBAAe,CAAC,IAAI;IAC1B,IAAI,EAAE,kBAAe,CAAC,OAAO;IAC7B,KAAK,EAAE,kBAAe,CAAC,KAAK;CAC7B,CAAC;AAEF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AACI,MAAM,sBAAsB,GAAG,CAIpC,QAAkB,EACkB,EAAE;IACtC,OAAO,CAAC,OAAO,EAAE,EAAE;QACjB,OAAO,CAAC,GAAG,EAAE,GAAG,IAAI,EAAE,EAAE;YACtB,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,wBAAwB,QAAQ,EAAE,CAAC,CAAC;YACjD,GAAG,CAAC,GAAG,CAAC,QAAQ,CAAC,yBAAiB,CAAC,QAAQ,CAAC,CAAC,CAAC;YAC9C,OAAO,OAAO,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,CAAC;QAC/B,CAAC,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC,CAAC;AAbW,QAAA,sBAAsB,0BAajC","sourcesContent":["import { type CrawlingContext, LogLevel as CrawleeLogLevel } from 'crawlee';\n\nimport type { ArrVal } from '../utils/types';\nimport type { CrawlerRouterWrapper } from './router/types';\n\nexport const LOG_LEVEL = ['debug', 'info', 'warn', 'error', 'off'] as const; // prettier-ignore\nexport type LogLevel = ArrVal<typeof LOG_LEVEL>;\n\n/** Map log levels of `crawlee-one` to log levels of `crawlee` */\nexport const logLevelToCrawlee: Record<LogLevel, CrawleeLogLevel> = {\n off: CrawleeLogLevel.OFF,\n debug: CrawleeLogLevel.DEBUG,\n info: CrawleeLogLevel.INFO,\n warn: CrawleeLogLevel.WARNING,\n error: CrawleeLogLevel.ERROR,\n};\n\n/**\n * Wrapper for Crawlee route handler that configures log level.\n *\n *\n * Usage with Crawlee's `RouterHandler.addDefaultHandler`\n * ```ts\n * const wrappedHandler = logLevelHandlerWrapper('debug')(handler)\n * await router.addDefaultHandler<Ctx>(wrappedHandler);\n * ```\n *\n * Usage with Crawlee's `RouterHandler.addHandler`\n * ```ts\n * const wrappedHandler = logLevelHandlerWrapper('error')(handler)\n * await router.addHandler<Ctx>(wrappedHandler);\n * ```\n *\n * Usage with `createCrawleeOne`\n * ```ts\n * const actor = await createCrawleeOne<CheerioCrawlingContext>({\n * validateInput,\n * router: createCheerioRouter(),\n * routes,\n * routeHandlers: ({ input }) => createHandlers(input!),\n * routerWrappers: ({ input }) => [\n * logLevelHandlerWrapper<CheerioCrawlingContext<any, any>>(input?.logLevel ?? 'info'),\n * ],\n * createCrawler: ({ router, input }) => createCrawler({ router, input, crawlerConfig }),\n * });\n * ```\n */\nexport const logLevelHandlerWrapper = <\n T extends CrawlingContext,\n RouterCtx extends Record<string, any> = Record<string, any>\n>(\n logLevel: LogLevel\n): CrawlerRouterWrapper<T, RouterCtx> => {\n return (handler) => {\n return (ctx, ...args) => {\n ctx.log.info(`Setting log level to ${logLevel}`);\n ctx.log.setLevel(logLevelToCrawlee[logLevel]);\n return handler(ctx, ...args);\n };\n };\n};\n"]}
|