crawlee-one 1.0.8 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +5 -0
  2. package/dist/cjs/composer.d.ts +0 -0
  3. package/dist/cjs/composer.js +93 -0
  4. package/dist/cjs/composer.js.map +1 -0
  5. package/dist/cjs/index.d.ts +3 -2
  6. package/dist/cjs/index.js +3 -5
  7. package/dist/cjs/index.js.map +1 -1
  8. package/dist/cjs/lib/actor/actor.d.ts +15 -20
  9. package/dist/cjs/lib/actor/actor.js +47 -29
  10. package/dist/cjs/lib/actor/actor.js.map +1 -1
  11. package/dist/cjs/lib/actor/types.d.ts +37 -22
  12. package/dist/cjs/lib/actor/types.js.map +1 -1
  13. package/dist/cjs/lib/config.d.ts +6 -6
  14. package/dist/cjs/lib/config.js +3 -3
  15. package/dist/cjs/lib/config.js.map +1 -1
  16. package/dist/cjs/lib/error/errorHandler.d.ts +22 -15
  17. package/dist/cjs/lib/error/errorHandler.js +10 -32
  18. package/dist/cjs/lib/error/errorHandler.js.map +1 -1
  19. package/dist/cjs/lib/integrations/types.d.ts +6 -4
  20. package/dist/cjs/lib/integrations/types.js.map +1 -1
  21. package/dist/cjs/lib/log.d.ts +3 -3
  22. package/dist/cjs/lib/log.js +1 -1
  23. package/dist/cjs/lib/log.js.map +1 -1
  24. package/dist/cjs/lib/router/router.d.ts +35 -12
  25. package/dist/cjs/lib/router/router.js +52 -10
  26. package/dist/cjs/lib/router/router.js.map +1 -1
  27. package/dist/cjs/lib/router/types.d.ts +21 -14
  28. package/dist/cjs/lib/router/types.js +7 -0
  29. package/dist/cjs/lib/router/types.js.map +1 -1
  30. package/dist/cjs/lib/telemetry/sentry.d.ts +3 -0
  31. package/dist/cjs/lib/telemetry/sentry.js +71 -0
  32. package/dist/cjs/lib/telemetry/sentry.js.map +1 -0
  33. package/dist/cjs/lib/telemetry/types.d.ts +28 -0
  34. package/dist/cjs/lib/telemetry/types.js +3 -0
  35. package/dist/cjs/lib/telemetry/types.js.map +1 -0
  36. package/dist/cjs/types.d.ts +25 -6
  37. package/dist/cjs/types.js.map +1 -1
  38. package/package.json +1 -1
package/README.md CHANGED
@@ -53,6 +53,10 @@ Web crawlers written with Crawlee One can be configured via their input to handl
53
53
 
54
54
  ![](./docs/user-guide-input-ui-open.png)
55
55
 
56
+ ## How to write a Crawlee One crawler
57
+
58
+ // TODO
59
+
56
60
  ## Library contents
57
61
 
58
62
  Crawlee One includes a set of utility functions for:
@@ -94,6 +98,7 @@ Crawlee One allows you to configure the following via the input:
94
98
  ## Example projects
95
99
 
96
100
  - [SKCRIS Scraper](https://github.com/JuroOravec/apify-actor-skcris)
101
+ - [Profesia.sk Scraper](https://github.com/JuroOravec/apify-actor-profesia-sk)
97
102
 
98
103
  ---
99
104
 
File without changes
@@ -0,0 +1,93 @@
1
+ "use strict";
2
+ // @ts-nocheck
3
+ // interface ComposerCrawlerDef {
4
+ // }
5
+ // ```
6
+ // crawler mainCrawler {
7
+ // type: playwright
8
+ // datasetId: '45678'
9
+ // errorDatasetId: '098765'
10
+ // options: {
11
+ // ...
12
+ // }
13
+ // }
14
+ // route detailPage {
15
+ // // NOTE: If `match` is a regex, the regex is compared against the URL
16
+ // match: /[\W]profesia\.sk\/praca\/zoznam-[a-z0-9-]+\/?(?:[?#~]|$)/i
17
+ // handler: detailPageHandler
18
+ // }
19
+ // ```
20
+ // As JS:
21
+ // ```js
22
+ // import { detailPageHandler } from './handlers';
23
+ // const scraperDef = {
24
+ // crawlers: {
25
+ // mainCrawler {
26
+ // type: playwright
27
+ // datasetId: '45678'
28
+ // errorDatasetId: '098765'
29
+ // options: {
30
+ // ...
31
+ // },
32
+ // }
33
+ // },
34
+ // routes: {
35
+ // detailPage: {
36
+ // match: /[\W]profesia\.sk\/praca\/zoznam-[a-z0-9-]+\/?(?:[?#~]|$)/i,
37
+ // handler: detailPageHandler,
38
+ // }
39
+ // }
40
+ // };
41
+ // ```
42
+ // NOTES:
43
+ // - Enum with available route labels would be extracted from this definition.
44
+ // - If there is only 1 crawler defined, all routes use that. If there is more crawlers,
45
+ // they should define which crawler it relates to.
46
+ // ```
47
+ // crawler mainCrawler {
48
+ // type: playwright
49
+ // datasetId: '45678'
50
+ // errorDatasetId: '098765'
51
+ // options: {
52
+ // ...
53
+ // }
54
+ // ...
55
+ // }
56
+ // crawler productDetailsCrawler {
57
+ // type: cheerio
58
+ // datasetId: '45678'
59
+ // requestQueueId: 'abcdef'
60
+ // options: {
61
+ // ...
62
+ // }
63
+ // }
64
+ // requestQueue extraQueue {
65
+ // ...
66
+ // }
67
+ // dataset mainDataset {
68
+ // datasetId: '45678'
69
+ // ...
70
+ // }
71
+ // keyValueStore mainStore {
72
+ // datasetId: 'xyz'
73
+ // ...
74
+ // }
75
+ // route detailPage {
76
+ // crawler: 'productDetailsCrawler',
77
+ // // NOTE: If `match` is a regex, the regex is compared against the URL
78
+ // match: /[\W]profesia\.sk\/praca\/zoznam-[a-z0-9-]+\/?(?:[?#~]|$)/i
79
+ // handler: detailPageHandler,
80
+ // }
81
+ // route listing {
82
+ // crawler: 'mainCrawler',
83
+ // // Note: route object name is the 'label' by default, but label can be overriden
84
+ // label: 'DETAIL_PAGE'
85
+ // // NOTE: Otherwise `match` is a function that returns true/false
86
+ // match: async (url, ctx, route, handlers) => {
87
+ // const dom = cheerioPortadom(ctx.$.root(), url);
88
+ // const isNotCustomDesign = await dom.findMany('body.listing:not(.custom-design)').length;
89
+ // return isUrlOfCompanyProfile(url) && !!isNotCustomDesign;
90
+ // },
91
+ // }
92
+ // ```
93
+ //# sourceMappingURL=composer.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"composer.js","sourceRoot":"","sources":["../../src/composer.ts"],"names":[],"mappings":";AAAA,cAAc;AACd,iCAAiC;AAEjC,IAAI;AAEJ,MAAM;AACN,wBAAwB;AACxB,qBAAqB;AACrB,uBAAuB;AACvB,6BAA6B;AAC7B,eAAe;AACf,UAAU;AACV,MAAM;AACN,IAAI;AAEJ,qBAAqB;AACrB,0EAA0E;AAC1E,uEAAuE;AACvE,+BAA+B;AAC/B,IAAI;AACJ,MAAM;AAEN,SAAS;AACT,QAAQ;AACR,kDAAkD;AAElD,uBAAuB;AACvB,gBAAgB;AAChB,oBAAoB;AACpB,yBAAyB;AACzB,2BAA2B;AAC3B,iCAAiC;AACjC,mBAAmB;AACnB,cAAc;AACd,WAAW;AACX,QAAQ;AACR,OAAO;AACP,cAAc;AACd,oBAAoB;AACpB,4EAA4E;AAC5E,oCAAoC;AACpC,QAAQ;AACR,MAAM;AACN,KAAK;AACL,MAAM;AAEN,SAAS;AACT,8EAA8E;AAC9E,wFAAwF;AACxF,oDAAoD;AAEpD,MAAM;AACN,wBAAwB;AACxB,qBAAqB;AACrB,uBAAuB;AACvB,6BAA6B;AAC7B,eAAe;AACf,UAAU;AACV,MAAM;AACN,QAAQ;AACR,IAAI;AAEJ,kCAAkC;AAClC,kBAAkB;AAClB,uBAAuB;AACvB,6BAA6B;AAC7B,eAAe;AACf,UAAU;AACV,MAAM;AACN,IAAI;AAEJ,4BAA4B;AAC5B,QAAQ;AACR,IAAI;AAEJ,wBAAwB;AACxB,uBAAuB;AACvB,QAAQ;AACR,IAAI;AAEJ,4BAA4B;AAC5B,qBAAqB;AACrB,QAAQ;AACR,IAAI;AAEJ,qBAAqB;AACrB,sCAAsC;AACtC,0EAA0E;AAC1E,uEAAuE;AACvE,gCAAgC;AAChC,IAAI;AAEJ,kBAAkB;AAClB,4BAA4B;AAC5B,qFAAqF;AACrF,yBAAyB;AACzB,qEAAqE;AACrE,kDAAkD;AAClD,sDAAsD;AACtD,+FAA+F;AAC/F,gEAAgE;AAChE,OAAO;AACP,IAAI;AACJ,MAAM","sourcesContent":["// @ts-nocheck\n// interface ComposerCrawlerDef {\n\n// }\n\n// ```\n// crawler mainCrawler {\n// type: playwright\n// datasetId: '45678'\n// errorDatasetId: '098765'\n// options: {\n// ...\n// }\n// }\n\n// route detailPage {\n// // NOTE: If `match` is a regex, the regex is compared against the URL\n// match: /[\\W]profesia\\.sk\\/praca\\/zoznam-[a-z0-9-]+\\/?(?:[?#~]|$)/i\n// handler: detailPageHandler\n// }\n// ```\n\n// As JS:\n// ```js\n// import { detailPageHandler } from './handlers';\n\n// const scraperDef = {\n// crawlers: {\n// mainCrawler {\n// type: playwright\n// datasetId: '45678'\n// errorDatasetId: '098765'\n// options: {\n// ...\n// },\n// }\n// },\n// routes: {\n// detailPage: {\n// match: /[\\W]profesia\\.sk\\/praca\\/zoznam-[a-z0-9-]+\\/?(?:[?#~]|$)/i,\n// handler: detailPageHandler,\n// }\n// }\n// };\n// ```\n\n// NOTES:\n// - Enum with available route labels would be extracted from this definition.\n// - If there is only 1 crawler defined, all routes use that. If there is more crawlers,\n// they should define which crawler it relates to.\n\n// ```\n// crawler mainCrawler {\n// type: playwright\n// datasetId: '45678'\n// errorDatasetId: '098765'\n// options: {\n// ...\n// }\n// ...\n// }\n\n// crawler productDetailsCrawler {\n// type: cheerio\n// datasetId: '45678'\n// requestQueueId: 'abcdef'\n// options: {\n// ...\n// }\n// }\n\n// requestQueue extraQueue {\n// ...\n// }\n\n// dataset mainDataset {\n// datasetId: '45678'\n// ...\n// }\n\n// keyValueStore mainStore {\n// datasetId: 'xyz'\n// ...\n// }\n\n// route detailPage {\n// crawler: 'productDetailsCrawler',\n// // NOTE: If `match` is a regex, the regex is compared against the URL\n// match: /[\\W]profesia\\.sk\\/praca\\/zoznam-[a-z0-9-]+\\/?(?:[?#~]|$)/i\n// handler: detailPageHandler,\n// }\n\n// route listing {\n// crawler: 'mainCrawler',\n// // Note: route object name is the 'label' by default, but label can be overriden\n// label: 'DETAIL_PAGE'\n// // NOTE: Otherwise `match` is a function that returns true/false\n// match: async (url, ctx, route, handlers) => {\n// const dom = cheerioPortadom(ctx.$.root(), url);\n// const isNotCustomDesign = await dom.findMany('body.listing:not(.custom-design)').length;\n// return isUrlOfCompanyProfile(url) && !!isNotCustomDesign;\n// },\n// }\n// ```\n"]}
@@ -1,4 +1,4 @@
1
- export { createAndRunCrawleeOne, createHttpCrawlerOptions } from './lib/actor/actor';
1
+ export * from './lib/actor/actor';
2
2
  export * from './lib/actor/types';
3
3
  export * from './lib/actorSpec';
4
4
  export * from './lib/config';
@@ -11,7 +11,6 @@ export * from './lib/actions/domUtils';
11
11
  export * from './lib/actions/page';
12
12
  export * from './lib/actions/scrapeListing';
13
13
  export * from './lib/error/errorHandler';
14
- export * from './lib/error/sentry';
15
14
  export * from './lib/migrate/localMigrator';
16
15
  export * from './lib/migrate/localState';
17
16
  export * from './lib/migrate/types';
@@ -25,3 +24,5 @@ export * from './lib/test/mockApifyClient';
25
24
  export type { CrawlerUrl, CrawlerType } from './types';
26
25
  export * from './lib/integrations/apify';
27
26
  export * from './lib/integrations/types';
27
+ export * from './lib/telemetry/types';
28
+ export * from './lib/telemetry/sentry';
package/dist/cjs/index.js CHANGED
@@ -14,10 +14,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
14
  for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
15
  };
16
16
  Object.defineProperty(exports, "__esModule", { value: true });
17
- exports.createHttpCrawlerOptions = exports.createAndRunCrawleeOne = void 0;
18
- var actor_1 = require("./lib/actor/actor");
19
- Object.defineProperty(exports, "createAndRunCrawleeOne", { enumerable: true, get: function () { return actor_1.createAndRunCrawleeOne; } });
20
- Object.defineProperty(exports, "createHttpCrawlerOptions", { enumerable: true, get: function () { return actor_1.createHttpCrawlerOptions; } });
17
+ __exportStar(require("./lib/actor/actor"), exports);
21
18
  __exportStar(require("./lib/actor/types"), exports);
22
19
  __exportStar(require("./lib/actorSpec"), exports);
23
20
  __exportStar(require("./lib/config"), exports);
@@ -30,7 +27,6 @@ __exportStar(require("./lib/actions/domUtils"), exports);
30
27
  __exportStar(require("./lib/actions/page"), exports);
31
28
  __exportStar(require("./lib/actions/scrapeListing"), exports);
32
29
  __exportStar(require("./lib/error/errorHandler"), exports);
33
- __exportStar(require("./lib/error/sentry"), exports);
34
30
  __exportStar(require("./lib/migrate/localMigrator"), exports);
35
31
  __exportStar(require("./lib/migrate/localState"), exports);
36
32
  __exportStar(require("./lib/migrate/types"), exports);
@@ -43,4 +39,6 @@ __exportStar(require("./lib/test/actor"), exports);
43
39
  __exportStar(require("./lib/test/mockApifyClient"), exports);
44
40
  __exportStar(require("./lib/integrations/apify"), exports);
45
41
  __exportStar(require("./lib/integrations/types"), exports);
42
+ __exportStar(require("./lib/telemetry/types"), exports);
43
+ __exportStar(require("./lib/telemetry/sentry"), exports);
46
44
  //# sourceMappingURL=index.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;AAAA,2CAAqF;AAA5E,+GAAA,sBAAsB,OAAA;AAAE,iHAAA,wBAAwB,OAAA;AACzD,oDAAkC;AAClC,kDAAgC;AAChC,+CAA6B;AAC7B,mDAAiC;AACjC,wDAAsC;AACtC,oDAAkC;AAClC,wDAAsC;AACtC,oDAAkC;AAClC,yDAAuC;AACvC,qDAAmC;AACnC,8DAA4C;AAC5C,2DAAyC;AACzC,qDAAmC;AACnC,8DAA4C;AAC5C,2DAAyC;AACzC,sDAAoC;AACpC,4DAA0C;AAC1C,2DAAyC;AACzC,sDAAoC;AACpC,qDAAmC;AACnC,4CAA0B;AAC1B,mDAAiC;AACjC,6DAA2C;AAE3C,2DAAyC;AACzC,2DAAyC","sourcesContent":["export { createAndRunCrawleeOne, createHttpCrawlerOptions } from './lib/actor/actor';\nexport * from './lib/actor/types';\nexport * from './lib/actorSpec';\nexport * from './lib/config';\nexport * from './lib/io/dataset';\nexport * from './lib/io/requestQueue';\nexport * from './lib/io/pushData';\nexport * from './lib/io/pushRequests';\nexport * from './lib/actions/dom';\nexport * from './lib/actions/domUtils';\nexport * from './lib/actions/page';\nexport * from './lib/actions/scrapeListing';\nexport * from './lib/error/errorHandler';\nexport * from './lib/error/sentry';\nexport * from './lib/migrate/localMigrator';\nexport * from './lib/migrate/localState';\nexport * from './lib/migrate/types';\nexport * from './lib/readme/apify/readme';\nexport * from './lib/readme/apify/types';\nexport * from './lib/router/router';\nexport * from './lib/router/types';\nexport * from './lib/log';\nexport * from './lib/test/actor';\nexport * from './lib/test/mockApifyClient';\nexport type { CrawlerUrl, CrawlerType } from './types';\nexport * from './lib/integrations/apify';\nexport * from './lib/integrations/types';\n"]}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;AAAA,oDAAkC;AAClC,oDAAkC;AAClC,kDAAgC;AAChC,+CAA6B;AAC7B,mDAAiC;AACjC,wDAAsC;AACtC,oDAAkC;AAClC,wDAAsC;AACtC,oDAAkC;AAClC,yDAAuC;AACvC,qDAAmC;AACnC,8DAA4C;AAC5C,2DAAyC;AACzC,8DAA4C;AAC5C,2DAAyC;AACzC,sDAAoC;AACpC,4DAA0C;AAC1C,2DAAyC;AACzC,sDAAoC;AACpC,qDAAmC;AACnC,4CAA0B;AAC1B,mDAAiC;AACjC,6DAA2C;AAE3C,2DAAyC;AACzC,2DAAyC;AACzC,wDAAsC;AACtC,yDAAuC","sourcesContent":["export * from './lib/actor/actor';\nexport * from './lib/actor/types';\nexport * from './lib/actorSpec';\nexport * from './lib/config';\nexport * from './lib/io/dataset';\nexport * from './lib/io/requestQueue';\nexport * from './lib/io/pushData';\nexport * from './lib/io/pushRequests';\nexport * from './lib/actions/dom';\nexport * from './lib/actions/domUtils';\nexport * from './lib/actions/page';\nexport * from './lib/actions/scrapeListing';\nexport * from './lib/error/errorHandler';\nexport * from './lib/migrate/localMigrator';\nexport * from './lib/migrate/localState';\nexport * from './lib/migrate/types';\nexport * from './lib/readme/apify/readme';\nexport * from './lib/readme/apify/types';\nexport * from './lib/router/router';\nexport * from './lib/router/types';\nexport * from './lib/log';\nexport * from './lib/test/actor';\nexport * from './lib/test/mockApifyClient';\nexport type { CrawlerUrl, CrawlerType } from './types';\nexport * from './lib/integrations/apify';\nexport * from './lib/integrations/types';\nexport * from './lib/telemetry/types';\nexport * from './lib/telemetry/sentry';\n"]}
@@ -1,53 +1,48 @@
1
1
  /// <reference types="lodash" />
2
2
  import { BasicCrawler, CrawlingContext, BasicCrawlerOptions } from 'crawlee';
3
- import * as Sentry from '@sentry/node';
4
3
  import type { CrawlerMeta, CrawlerType } from '../../types';
5
4
  import type { MaybePromise, PickPartial } from '../../utils/types';
6
5
  import type { CrawleeOneIO } from '../integrations/types';
7
- import type { ActorContext, ActorDefinition } from './types';
6
+ import type { CrawleeOneTelemetry } from '../telemetry/types';
7
+ import type { CrawleeOneActorCtx, CrawleeOneActorDef } from './types';
8
8
  /**
9
- * Create default configuration for an opinionated Crawlee actor,
10
- * and run the actor within Apify's `Actor.main()` context.
9
+ * Options available when creating default configuration for an opinionated Crawlee actor,
10
+ * which is then run within Apify's `Actor.main()` context.
11
11
  *
12
12
  * Apify context can be replaced with custom implementation using the `actorConfig.io` option.
13
13
  *
14
14
  * Read more about what this actor does at {@link createCrawleeOne}.
15
15
  */
16
- export declare const createAndRunCrawleeOne: <TCrawlerType extends CrawlerType, Ctx extends CrawlerMeta<TCrawlerType, any>["context"] = CrawlingContext<BasicCrawler<import("crawlee").BasicCrawlingContext<import("crawlee").Dictionary>>, import("crawlee").Dictionary>, Labels extends string = string, Input extends Record<string, any> = Record<string, any>, TIO extends CrawleeOneIO<object, object, object> = CrawleeOneIO<object, object, object>>(args: {
16
+ export interface RunCrawleeOneOptions<TCrawlerType extends CrawlerType, Labels extends string = string, Input extends Record<string, any> = Record<string, any>, TIO extends CrawleeOneIO = CrawleeOneIO, Telem extends CrawleeOneTelemetry<any, any> = CrawleeOneTelemetry<any, any>, Ctx extends CrawlerMeta<TCrawlerType, any>['context'] = CrawlingContext<BasicCrawler>> {
17
17
  /** String idetifying the actor class, e.g. `'cheerio'` */
18
18
  actorType: TCrawlerType;
19
19
  actorName: string;
20
20
  /** Config passed to the {@link createCrawleeOne} */
21
- actorConfig: PickPartial<ActorDefinition<Ctx, Labels, Input, TIO>, "io" | "router" | "createCrawler">;
21
+ actorConfig: PickPartial<CrawleeOneActorDef<Labels, Input, TIO, Telem, Ctx>, 'router' | 'createCrawler' | 'io' | 'telemetry'>;
22
22
  /**
23
23
  * If using default `createCrawler` implementation, these are crawler options
24
24
  * that may be overriden by user input.
25
25
  */
26
- crawlerConfigDefaults?: CrawlerMeta<TCrawlerType, any>["options"] | undefined;
26
+ crawlerConfigDefaults?: CrawlerMeta<TCrawlerType, any>['options'];
27
27
  /**
28
28
  * If using default `createCrawler` implementation, these are crawler options
29
29
  * that will override user input.
30
30
  *
31
31
  * This is useful for testing env.
32
32
  */
33
- crawlerConfigOverrides?: CrawlerMeta<TCrawlerType, any>["options"] | undefined;
34
- /**
35
- * Sentry configuration. If using default `createCrawler` implementation,
36
- * failed requests are optionally reported to Sentry.
37
- *
38
- * To disable Sentry, set `"enabled": false`.
39
- */
40
- sentryOptions?: Sentry.NodeOptions | undefined;
33
+ crawlerConfigOverrides?: CrawlerMeta<TCrawlerType, any>['options'];
41
34
  /**
42
35
  * Callback with the created actor. The callback is called within
43
36
  * the `Actor.main()` context.
44
37
  */
45
- onActorReady?: ((actor: ActorContext<Ctx, Labels, Input, TIO>) => MaybePromise<void>) | undefined;
46
- }) => Promise<void>;
38
+ onActorReady?: (actor: CrawleeOneActorCtx<Labels, Input, TIO, Telem, Ctx>) => MaybePromise<void>;
39
+ }
47
40
  /**
48
- * Create opinionated Crawlee crawler that uses router for handling requests.
41
+ * Create opinionated Crawlee crawler that uses, and run it within Apify's `Actor.main()` context.
42
+ *
43
+ * Apify context can be replaced with custom implementation using the `actorConfig.io` option.
49
44
  *
50
- * This is a quality-of-life function that does the following for you:
45
+ * This function does the following for you:
51
46
  *
52
47
  * 1) Full TypeScript coverage - Ensure all components use the same Crawler / CrawlerContext.
53
48
  *
@@ -72,7 +67,7 @@ export declare const createAndRunCrawleeOne: <TCrawlerType extends CrawlerType,
72
67
  * 9) Apify context (e.g. calling `Actor.getInput`) can be replaced with custom
73
68
  * implementation using the `io` option.
74
69
  */
75
- export declare const createCrawleeOne: <Ctx extends CrawlingContext<unknown, import("crawlee").Dictionary> = CrawlingContext<BasicCrawler<import("crawlee").BasicCrawlingContext<import("crawlee").Dictionary>>, import("crawlee").Dictionary>, Labels extends string = string, Input extends Record<string, any> = Record<string, any>, TIO extends CrawleeOneIO<object, object, object> = CrawleeOneIO<object, object, object>>(config: PickPartial<ActorDefinition<Ctx, Labels, Input, TIO>, "io">) => Promise<ActorContext<Ctx, Labels, Input, TIO>>;
70
+ export declare const runCrawleeOne: <TType extends CrawlerType, Labels extends string = string, Input extends Record<string, any> = Record<string, any>, TIO extends CrawleeOneIO<object, object, object> = CrawleeOneIO<object, object, object>, Telem extends CrawleeOneTelemetry<any, any, CrawlingContext<BasicCrawler<import("crawlee").BasicCrawlingContext<import("crawlee").Dictionary>>, import("crawlee").Dictionary>> = CrawleeOneTelemetry<any, any, CrawlingContext<BasicCrawler<import("crawlee").BasicCrawlingContext<import("crawlee").Dictionary>>, import("crawlee").Dictionary>>, Ctx extends CrawlerMeta<TType, any>["context"] = CrawlerMeta<TType, any>["context"]>(args: RunCrawleeOneOptions<TType, Labels, Input, TIO, Telem, Ctx>) => Promise<void>;
76
71
  /** Given the actor input, create common crawler options. */
77
72
  export declare const createHttpCrawlerOptions: <TOpts extends BasicCrawlerOptions<any> = BasicCrawlerOptions<import("crawlee").BasicCrawlingContext<import("crawlee").Dictionary>>, Input extends Record<string, any> = Record<string, any>>({ input, defaults, overrides, }: {
78
73
  /** Actor input */
@@ -9,12 +9,11 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
9
9
  });
10
10
  };
11
11
  Object.defineProperty(exports, "__esModule", { value: true });
12
- exports.createHttpCrawlerOptions = exports.createCrawleeOne = exports.createAndRunCrawleeOne = void 0;
12
+ exports.createHttpCrawlerOptions = exports.runCrawleeOne = void 0;
13
13
  const crawlee_1 = require("crawlee");
14
14
  const lodash_1 = require("lodash");
15
15
  const got_scraping_1 = require("got-scraping");
16
16
  const errorHandler_1 = require("../error/errorHandler");
17
- const sentry_1 = require("../error/sentry");
18
17
  const pushData_1 = require("../io/pushData");
19
18
  const dataset_1 = require("../io/dataset");
20
19
  const pushRequests_1 = require("../io/pushRequests");
@@ -31,7 +30,7 @@ const actorClassByType = {
31
30
  puppeteer: crawlee_1.PuppeteerCrawler,
32
31
  };
33
32
  const isRouter = (r) => {
34
- return !!(r.addHandler && r.addDefaultHandler);
33
+ return !!((r === null || r === void 0 ? void 0 : r.addHandler) && (r === null || r === void 0 ? void 0 : r.addDefaultHandler));
35
34
  };
36
35
  const isFunc = (f) => {
37
36
  return typeof f === 'function';
@@ -53,27 +52,48 @@ const genHookFn = (actor, fnStr) => {
53
52
  return (...args) => __awaiter(void 0, void 0, void 0, function* () { return hookFn(...args, hookCtx); });
54
53
  };
55
54
  /**
56
- * Create default configuration for an opinionated Crawlee actor,
57
- * and run the actor within Apify's `Actor.main()` context.
55
+ * Create opinionated Crawlee crawler that uses, and run it within Apify's `Actor.main()` context.
58
56
  *
59
57
  * Apify context can be replaced with custom implementation using the `actorConfig.io` option.
60
58
  *
61
- * Read more about what this actor does at {@link createCrawleeOne}.
59
+ * This function does the following for you:
60
+ *
61
+ * 1) Full TypeScript coverage - Ensure all components use the same Crawler / CrawlerContext.
62
+ *
63
+ * 2) Get Actor input from `Actor.getInput` if not given.
64
+ *
65
+ * 3) (Optional) Validate Actor input
66
+ *
67
+ * 4) Set up router such that requests that reach default route are
68
+ * redirected to labelled routes based on which item from "routes" they match.
69
+ *
70
+ * 5) Register all route handlers for you.
71
+ *
72
+ * 6) (Optional) Wrap all route handlers in a wrapper. Use this e.g.
73
+ * if you want to add a field to the context object, or handle errors
74
+ * from a single place.
75
+ *
76
+ * 7) (Optional) Support transformation and filtering of (scraped) entries,
77
+ * configured via Actor input.
78
+ *
79
+ * 8) (Optional) Support Actor metamorphing, configured via Actor input.
80
+ *
81
+ * 9) Apify context (e.g. calling `Actor.getInput`) can be replaced with custom
82
+ * implementation using the `io` option.
62
83
  */
63
- const createAndRunCrawleeOne = (args) => __awaiter(void 0, void 0, void 0, function* () {
64
- const { actorType, actorName, actorConfig, crawlerConfigDefaults, crawlerConfigOverrides, sentryOptions, onActorReady, } = args;
65
- const { io = apify_1.apifyIO } = actorConfig;
66
- yield (0, sentry_1.setupSentry)(Object.assign(Object.assign({}, sentryOptions), { serverName: actorName }), { io });
84
+ const runCrawleeOne = (args) => __awaiter(void 0, void 0, void 0, function* () {
85
+ const { actorType, actorName, actorConfig, crawlerConfigDefaults, crawlerConfigOverrides, onActorReady, } = args;
86
+ const { io = apify_1.apifyIO, telemetry } = actorConfig;
67
87
  // See docs:
68
88
  // - https://docs.apify.com/sdk/js/
69
89
  // - https://docs.apify.com/academy/deploying-your-code/inputs-outputs#accepting-input-with-the-apify-sdk
70
90
  // - https://docs.apify.com/sdk/js/docs/upgrading/upgrading-to-v3#apify-sdk
71
91
  yield io.runInContext(() => __awaiter(void 0, void 0, void 0, function* () {
72
92
  var _a, _b, _c;
93
+ yield (telemetry === null || telemetry === void 0 ? void 0 : telemetry.setup({ actorType, actorName, actorConfig: Object.assign(Object.assign({}, actorConfig), { io }) }));
73
94
  const actorDefaults = {
74
- io,
75
95
  router: crawlee_1.Router.create(),
76
- routerWrappers: ({ input }) => {
96
+ routeHandlerWrappers: ({ input }) => {
77
97
  var _a;
78
98
  return [
79
99
  (0, log_1.logLevelHandlerWrapper)((_a = input === null || input === void 0 ? void 0 : input.logLevel) !== null && _a !== void 0 ? _a : 'info'),
@@ -85,25 +105,27 @@ const createAndRunCrawleeOne = (args) => __awaiter(void 0, void 0, void 0, funct
85
105
  input,
86
106
  defaults: crawlerConfigDefaults,
87
107
  overrides: Object.assign({ requestHandler: router, proxyConfiguration: proxy,
88
- // Capture errors in a separate (Apify) Dataset and pass errors to Sentry
108
+ // Capture errors in a separate (Apify) Dataset and pass errors to telemetry
89
109
  failedRequestHandler: (0, errorHandler_1.createErrorHandler)({
90
110
  io,
91
111
  reportingDatasetId: (_a = input === null || input === void 0 ? void 0 : input.errorReportingDatasetId) !== null && _a !== void 0 ? _a : 'REPORTING',
92
- sendToSentry: (_b = input === null || input === void 0 ? void 0 : input.errorSendToSentry) !== null && _b !== void 0 ? _b : true,
112
+ sendToTelemetry: (_b = input === null || input === void 0 ? void 0 : input.errorSendToTelemetry) !== null && _b !== void 0 ? _b : true,
113
+ onSendErrorToTelemetry: telemetry === null || telemetry === void 0 ? void 0 : telemetry.onSendErrorToTelemetry,
93
114
  }) }, crawlerConfigOverrides),
94
115
  });
95
116
  const CrawlerClass = actorClassByType[actorType];
96
117
  return new CrawlerClass(options);
97
118
  },
98
- routes: [],
99
- routeHandlers: {},
100
119
  };
101
- const actor = yield (0, exports.createCrawleeOne)(Object.assign(Object.assign({}, actorConfig), { io, router: (_a = actorConfig.router) !== null && _a !== void 0 ? _a : actorDefaults.router, routerWrappers: (_b = actorConfig.routerWrappers) !== null && _b !== void 0 ? _b : actorDefaults.routerWrappers, createCrawler: (_c = actorConfig.createCrawler) !== null && _c !== void 0 ? _c : actorDefaults.createCrawler }));
120
+ const actor = yield createCrawleeOne(Object.assign(Object.assign({}, actorConfig), { io, router: (_a = actorConfig.router) !== null && _a !== void 0 ? _a : actorDefaults.router, routeHandlerWrappers: (_b = actorConfig.routeHandlerWrappers) !== null && _b !== void 0 ? _b : actorDefaults.routeHandlerWrappers, createCrawler: (_c = actorConfig.createCrawler) !== null && _c !== void 0 ? _c : actorDefaults.createCrawler }));
102
121
  yield (onActorReady === null || onActorReady === void 0 ? void 0 : onActorReady(actor));
103
122
  }), { statusMessage: 'Crawling finished!' });
104
123
  });
105
- exports.createAndRunCrawleeOne = createAndRunCrawleeOne;
124
+ exports.runCrawleeOne = runCrawleeOne;
106
125
  /**
126
+ * NOTE: If you want to run a scraper, see {@link runCrawleeOne}. This is lower-level
127
+ * function that should be used only if you want to override the default behaviour of runCrawleeOne.
128
+ *
107
129
  * Create opinionated Crawlee crawler that uses router for handling requests.
108
130
  *
109
131
  * This is a quality-of-life function that does the following for you:
@@ -132,7 +154,7 @@ exports.createAndRunCrawleeOne = createAndRunCrawleeOne;
132
154
  * implementation using the `io` option.
133
155
  */
134
156
  const createCrawleeOne = (config) => __awaiter(void 0, void 0, void 0, function* () {
135
- const { io = apify_1.apifyIO } = config;
157
+ const { io = apify_1.apifyIO, telemetry } = config;
136
158
  // Mutable state that is available to the actor hooks
137
159
  const state = {};
138
160
  // Initialize actor inputs
@@ -161,10 +183,11 @@ const createCrawleeOne = (config) => __awaiter(void 0, void 0, void 0, function*
161
183
  : yield config.router(getConfig());
162
184
  const routes = isFunc(config.routes) ? yield config.routes(getConfig()) : config.routes; // prettier-ignore
163
185
  const routeHandlers = isFunc(config.routeHandlers) ? yield config.routeHandlers(getConfig()) : config.routeHandlers; // prettier-ignore
164
- const routerWrappers = isFunc(config.routerWrappers) ? yield config.routerWrappers(getConfig()) : config.routerWrappers; // prettier-ignore
186
+ const routeHandlerWrappers = isFunc(config.routeHandlerWrappers) ? yield config.routeHandlerWrappers(getConfig()) : config.routeHandlerWrappers; // prettier-ignore
165
187
  // Create Crawlee crawler
166
188
  const getActorCtx = () => ({
167
189
  io,
190
+ telemetry,
168
191
  router,
169
192
  routes,
170
193
  routeHandlers,
@@ -188,26 +211,21 @@ const createCrawleeOne = (config) => __awaiter(void 0, void 0, void 0, function*
188
211
  // Extra data that we make available to the route handlers
189
212
  const routerContext = { actor, pushData: scopedPushData };
190
213
  // Set up router
191
- yield (0, router_1.setupDefaultRoute)({
214
+ yield (0, router_1.setupDefaultHandlers)({
192
215
  io,
193
216
  router,
194
- routerWrappers,
217
+ routeHandlerWrappers,
195
218
  routerContext,
196
219
  routes,
197
220
  routeHandlers,
198
221
  input,
199
222
  });
200
- yield (0, router_1.registerHandlers)({
201
- router,
202
- routerWrappers,
203
- routerContext,
204
- routeHandlers,
205
- });
223
+ // Register labelled handlers
224
+ yield (0, router_1.registerHandlers)(router, routeHandlers, { routerContext, handlerWrappers: routeHandlerWrappers });
206
225
  // Now that the actor is ready, enqueue the URLs right away
207
226
  yield scopedPushRequest(startUrls);
208
227
  return actor;
209
228
  });
210
- exports.createCrawleeOne = createCrawleeOne;
211
229
  const resolveInput = (input, state, options) => __awaiter(void 0, void 0, void 0, function* () {
212
230
  var _d;
213
231
  const { io = apify_1.apifyIO } = options !== null && options !== void 0 ? options : {};
@@ -1 +1 @@
1
- {"version":3,"file":"actor.js","sourceRoot":"","sources":["../../../../src/lib/actor/actor.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,qCAaiB;AACjB,mCAAgD;AAEhD,+CAA2C;AAI3C,wDAA2D;AAC3D,4CAA8C;AAC9C,6CAA8E;AAC9E,2CAAqD;AACrD,qDAAuE;AAEvE,iDAAgD;AAChD,6CAAuE;AACvE,sCAWmB;AACnB,gCAAmE;AAUnE,MAAM,gBAAgB,GAAG;IACvB,KAAK,EAAE,sBAAY;IACnB,IAAI,EAAE,qBAAW;IACjB,OAAO,EAAE,wBAAc;IACvB,KAAK,EAAE,sBAAY;IACnB,UAAU,EAAE,2BAAiB;IAC7B,SAAS,EAAE,0BAAgB;CAC+C,CAAC;AAE7E,MAAM,QAAQ,GAAG,CAAC,CAAM,EAA2B,EAAE;IACnD,OAAO,CAAC,CAAC,CAAE,CAAmB,CAAC,UAAU,IAAK,CAAmB,CAAC,iBAAiB,CAAC,CAAC;AACvF,CAAC,CAAC;AACF,MAAM,MAAM,GAAG,CAAC,CAAM,EAAgC,EAAE;IACtD,OAAO,OAAO,CAAC,KAAK,UAAU,CAAC;AACjC,CAAC,CAAC;AAEF,kEAAkE;AAClE,MAAM,SAAS,GAAG,CAMhB,KAA4E,EAC5E,KAAc,EACd,EAAE;IACF,IAAI,CAAC,KAAK;QAAE,OAAO,IAAI,CAAC;IAExB,MAAM,OAAO,GAAG;QACd,EAAE,EAAE,KAAK,CAAC,EAAE;QACZ,KAAK,EAAE,KAAK,CAAC,KAAK;QAClB,KAAK,EAAE,KAAK,CAAC,KAAK;QAClB,YAAY,EAAZ,uBAAY;QACZ,WAAW,EAAE,0BAAW;KACO,CAAC;IAElC,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC;IAC3B,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC;IAEzB,OAAO,CAAO,GAAG,IAAI,EAAE,EAAE,kDAAC,OAAA,MAAM,CAAC,GAAG,IAAI,EAAE,OAAO,CAAC,CAAA,GAAA,CAAC;AACrD,CAAC,CAAC;AAEF;;;;;;;GAOG;AACI,MAAM,sBAAsB,GAAG,CAMpC,IAiCD,EAAiB,EAAE;IAClB,MAAM,EACJ,SAAS,EACT,SAAS,EACT,WAAW,EACX,qBAAqB,EACrB,sBAAsB,EACtB,aAAa,EACb,YAAY,GACb,GAAG,IAAI,CAAC;IAET,MAAM,EAAE,EAAE,GAAG,eAAqB,EAAE,GAAG,WAAW,CAAC;IAEnD,MAAM,IAAA,oBAAW,kCAAM,aAAa,KAAE,UAAU,EAAE,SAAS,KAAI,EAAE,EAAE,EAAE,CAAC,CAAC;IAEvE,YAAY;IACZ,mCAAmC;IACnC,yGAAyG;IACzG,2EAA2E;IAC3E,MAAM,EAAE,CAAC,YAAY,CACnB,GAAS,EAAE;;QACT,MAAM,aAAa,GAA8D;YAC/E,EAAE;YACF,MAAM,EAAE,gBAAM,CAAC,MAAM,EAAO;YAC5B,cAAc,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE;;gBAAC,OAAA;oBAC7B,IAAA,4BAAsB,EAAW,MAAA,KAAK,aAAL,KAAK,uBAAL,KAAK,CAAE,QAAQ,mCAAI,MAAM,CAAC;iBAC5D,CAAA;aAAA;YACD,aAAa,EAAE,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE,EAAE;;gBAC1C,MAAM,OAAO,GAAG,IAAA,gCAAwB,EAGtC;oBACA,KAAK;oBACL,QAAQ,EAAE,qBAAqB;oBAC/B,SAAS,kBACP,cAAc,EAAE,MAAM,EACtB,kBAAkB,EAAE,KAAK;wBACzB,yEAAyE;wBACzE,oBAAoB,EAAE,IAAA,iCAAkB,EAAC;4BACvC,EAAE;4BACF,kBAAkB,EAAE,MAAA,KAAK,aAAL,KAAK,uBAAL,KAAK,CAAE,uBAAuB,mCAAI,WAAW;4BACjE,YAAY,EAAE,MAAA,KAAK,aAAL,KAAK,uBAAL,KAAK,CAAE,iBAAiB,mCAAI,IAAI;yBAC/C,CAAC,IACC,sBAAsB,CAC1B;iBACF,CAAC,CAAC;gBACH,MAAM,YAAY,GAAG,gBAAgB,CAAC,SAAS,CAAQ,CAAC;gBACxD,OAAO,IAAI,YAAY,CAAC,OAAO,CAAC,CAAC;YACnC,CAAC;YACD,MAAM,EAAE,EAAE;YACV,aAAa,EAAE,EAAS;SACzB,CAAC;QAEF,MAAM,KAAK,GAAG,MAAM,IAAA,wBAAgB,kCAC/B,WAAW,KACd,EAAE,EACF,MAAM,EAAE,MAAA,WAAW,CAAC,MAAM,mCAAK,aAAa,CAAC,MAAc,EAC3D,cAAc,EAAE,MAAA,WAAW,CAAC,cAAc,mCAAK,aAAa,CAAC,cAAsB,EACnF,aAAa,EAAE,MAAA,WAAW,CAAC,aAAa,mCAAK,aAAa,CAAC,aAAqB,IAChF,CAAC;QAEH,MAAM,CAAA,YAAY,aAAZ,YAAY,uBAAZ,YAAY,CAAG,KAAK,CAAC,CAAA,CAAC;IAC9B,CAAC,CAAA,EACD,EAAE,aAAa,EAAE,oBAAoB,EAAE,CACxC,CAAC;AACJ,CAAC,CAAA,CAAC;AAxGW,QAAA,sBAAsB,0BAwGjC;AAEF;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AACI,MAAM,gBAAgB,GAAG,CAM9B,MAAmE,EACnB,EAAE;IAClD,MAAM,EAAE,EAAE,GAAG,eAAqB,EAAE,GAAG,MAAM,CAAC;IAE9C,qDAAqD;IACrD,MAAM,KAAK,GAAG,EAAE,CAAC;IAEjB,0BAA0B;IAC1B,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK;QAC3B,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC;YACpB,CAAC,CAAC,MAAM,MAAM,CAAC,KAAK,iCAAM,MAAM,KAAE,EAAE,IAAG;YACvC,CAAC,CAAC,MAAM,CAAC,KAAK;QAChB,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAS,CAAC;IAC/B,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,YAAY,CAAe,QAAQ,EAAE,KAAK,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC;IAEvF,IAAI,MAAM,CAAC,aAAa;QAAE,MAAM,MAAM,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;IAE5D,MAAM,EAAE,QAAQ,EAAE,GAAG,CAAC,KAAK,aAAL,KAAK,cAAL,KAAK,GAAI,EAAE,CAAsB,CAAC;IACxD,MAAM,GAAG,GAAG,IAAI,aAAG,CAAC,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAC,CAAC,uBAAiB,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC;IAEnF,gFAAgF;IAChF,MAAM,SAAS,GAAG,GAAG,EAAE,CAAC,iCAAM,MAAM,KAAE,KAAK,EAAE,KAAK,EAAE,EAAE,EAAE,GAAG,IAAG,CAAC;IAE/D,eAAe;IACf,MAAM,YAAY,GAChB,MAAM,CAAC,KAAK,IAAI,IAAI,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,+BAA+B,CAAC,KAAK,aAAL,KAAK,cAAL,KAAK,GAAI,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IAClG,MAAM,KAAK,GACT,MAAM,CAAC,KAAK,IAAI,IAAI;QAClB,CAAC,CAAC,YAAY;QACd,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC;YACtB,CAAC,CAAC,MAAM,MAAM,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC;YACjC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;IAEnB,+BAA+B;IAC/B,MAAM,MAAM,GAAuB,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC;QACxD,CAAC,CAAC,MAAM,CAAC,MAAM;QACf,CAAC,CAAC,MAAO,MAAM,CAAC,MAAc,CAAC,SAAS,EAAE,CAAC,CAAC;IAC9C,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,kBAAkB;IAC3G,MAAM,aAAa,GAAG,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,aAAa,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,kBAAkB;IACvI,MAAM,cAAc,GAAG,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,cAAc,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,kBAAkB;IAE3I,yBAAyB;IACzB,MAAM,WAAW,GAAG,GAAG,EAAE,CAAC,CAAC;QACzB,EAAE;QACF,MAAM;QACN,MAAM;QACN,aAAa;QACb,KAAK;QACL,MAAM;QACN,KAAK;QACL,KAAK;QACL,GAAG;KACJ,CAAC,CAAC;IACH,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC,WAAW,EAAE,CAAC,CAAC;IAE1D,mCAAmC;IACnC,MAAM,QAAQ,mBAAK,OAAO,IAAK,WAAW,EAAE,CAAE,CAAC;IAC/C,MAAM,UAAU,GAAG,sBAAsB,CAAC,QAAQ,CAAC,CAAC;IACpD,MAAM,SAAS,GAAG,qBAAqB,CAAC,QAAQ,CAAC,CAAC;IAClD,MAAM,cAAc,GAAG,oBAAoB,CAAC,QAAQ,CAAC,CAAC;IACtD,MAAM,iBAAiB,GAAG,wBAAwB,CAAC,QAAQ,CAAC,CAAC;IAC7D,MAAM,SAAS,GAAG,MAAM,qBAAqB,CAAC,QAAQ,CAAC,CAAC;IAExD,MAAM,KAAK,GAAG,gCACT,QAAQ,KACX,OAAO;QACP,UAAU;QACV,SAAS,EACT,QAAQ,EAAE,cAAc,EACxB,YAAY,EAAE,iBAAiB,EAC/B,SAAS,GACsC,CAAC;IAElD,0DAA0D;IAC1D,MAAM,aAAa,GAAG,EAAE,KAAK,EAAE,QAAQ,EAAE,cAAc,EAAE,CAAC;IAE1D,gBAAgB;IAChB,MAAM,IAAA,0BAAiB,EAAkE;QACvF,EAAE;QACF,MAAM;QACN,cAAc;QACd,aAAa;QACb,MAAM;QACN,aAAa;QACb,KAAK;KACN,CAAC,CAAC;IACH,MAAM,IAAA,yBAAgB,EAA2D;QAC/E,MAAM;QACN,cAAc;QACd,aAAa;QACb,aAAa;KACd,CAAC,CAAC;IAEH,2DAA2D;IAC3D,MAAM,iBAAiB,CAAC,SAA6B,CAAC,CAAC;IAEvD,OAAO,KAAK,CAAC;AACf,CAAC,CAAA,CAAC;AAvGW,QAAA,gBAAgB,oBAuG3B;AAEF,MAAM,YAAY,GAAG,CACnB,KAAoB,EACpB,KAA8B,EAC9B,OAA+B,EAC/B,EAAE;;IACF,MAAM,EAAE,EAAE,GAAG,eAAuB,EAAE,GAAG,OAAO,aAAP,OAAO,cAAP,OAAO,GAAI,EAAE,CAAC;IACvD,MAAM,EAAE,cAAc,EAAE,uBAAuB,EAAE,GAAG,CAAC,KAAK,aAAL,KAAK,cAAL,KAAK,GAAI,EAAE,CAAoB,CAAC;IAErF,MAAM,YAAY,GAAG,cAAc,CAAC,CAAC,CAAC,MAAM,0BAAW,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC,IAAI,EAAU,CAAC,CAAC,CAAC,IAAI,CAAC;IAClG,MAAM,OAAO,GAAG,SAAS,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE,EAAE,EAAE,uBAAuB,CAAC,CAAC;IACzE,MAAM,aAAa,GAAG,MAAA,CAAC,MAAM,CAAA,OAAO,aAAP,OAAO,uBAAP,OAAO,EAAI,CAAA,CAAC,mCAAI,IAAI,CAAC;IAClD,MAAM,aAAa,iDAAQ,YAAY,GAAK,aAAa,GAAK,KAAK,CAAE,CAAC;IAEtE,OAAO,aAAkB,CAAC;AAC5B,CAAC,CAAA,CAAC;AAEF;;;;GAIG;AACH,MAAM,sBAAsB,GAAG,CAM7B,KAGC,EACD,EAAE;;IACF,MAAM,EACJ,sBAAsB,EACtB,qBAAqB,EACrB,mBAAmB,EACnB,kBAAkB,EAClB,qBAAqB,EACrB,oBAAoB,EACpB,kBAAkB,EAClB,iBAAiB,EACjB,kBAAkB,EAClB,yBAAyB,GAC1B,GAAG,CAAC,MAAA,KAAK,CAAC,KAAK,mCAAI,EAAE,CAAyC,CAAC;IAEhE,MAAM,SAAS,GAAG,qBAAqB,CAAC,KAAK,CAAC,CAAC;IAE/C,MAAM,UAAU,GAAoB,CAAO,QAAQ,EAAE,OAAO,EAAE,EAAE;;QAC9D,2CAA2C;QAC3C,IAAI,kBAAkB,IAAI,yBAAyB,KAAK,WAAW,EAAE;YACnE,MAAM,KAAK,GAAG,MAAM,KAAK,CAAC,EAAE,CAAC,iBAAiB,CAAC,kBAAkB,CAAC,CAAC;YACnE,MAAM,KAAK,CAAC,IAAI,EAAE,CAAC;SACpB;QAED,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,qBAAqB,CAAC,2CAAI,CAAA,CAAC;QAClD,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,kBAAkB,CAAC,2CAAI,CAAA,CAAC;QAC/C,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,sBAAsB,CAAC,2CAAI,CAAA,CAAC;QACnD,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,mBAAmB,CAAC,2CAAI,CAAA,CAAC;QAEhD,MAAM,MAAM,GAAG,MAAM,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QAE1D,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,oBAAoB,CAAC,2CAAI,CAAA,CAAC;QACjD,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,iBAAiB,CAAC,2CAAI,CAAA,CAAC;QAC9C,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,qBAAqB,CAAC,2CAAI,CAAA,CAAC;QAClD,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,kBAAkB,CAAC,2CAAI,CAAA,CAAC;QAE/C,iDAAiD;QACjD,MAAM,SAAS,EAAE,CAAC;QAElB,OAAO,MAAM,CAAC;IAChB,CAAC,CAAA,CAAC;IAEF,OAAO,UAAU,CAAC;AACpB,CAAC,CAAC;AAEF,mFAAmF;AACnF,MAAM,qBAAqB,GAAG,CAAC,KAAyC,EAAE,EAAE;IAC1E,iDAAiD;IACjD,MAAM,SAAS,GAAc,CAAO,SAA+B,EAAE,EAAE;;QACrE,MAAM,EACJ,gBAAgB,EAChB,mBAAmB,EACnB,mBAAmB,GACpB,GAAG,IAAA,iBAAQ,EAAC,EAAE,EAAE,SAAS,EAAE,MAAA,KAAK,CAAC,KAAK,mCAAI,EAAE,CAAC,CAAC,CAAC,kBAAkB;QAElE,IAAI,CAAC,gBAAgB;YAAE,OAAO;QAE9B,MAAM,KAAK,CAAC,EAAE,CAAC,wBAAwB,CAAC,gBAAgB,EAAE,mBAAmB,EAAE;YAC7E,KAAK,EAAE,mBAAmB;SAC3B,CAAC,CAAC;IACL,CAAC,CAAA,CAAC;IAEF,OAAO,SAAS,CAAC;AACnB,CAAC,CAAC;AAEF,uEAAuE;AACvE,MAAM,oBAAoB,GAAG,CAAC,KAA2D,EAAE,EAAE;;IAC3F,MAAM,EACJ,mBAAmB,EACnB,cAAc,EACd,gBAAgB,EAChB,eAAe,EACf,YAAY,EACZ,eAAe,EACf,gBAAgB,EAChB,kBAAkB,EAClB,kBAAkB,EAClB,sBAAsB,EACtB,yBAAyB,GAC1B,GAAG,CAAC,MAAA,KAAK,CAAC,KAAK,mCAAI,EAAE,CAA6D,CAAC;IAEpF,MAAM,cAAc,GAA6B,CAAO,OAAO,EAAE,GAAG,EAAE,OAAO,EAAE,EAAE;QAC/E,MAAM,WAAW,GAAG,SAAS,CAAC,KAAK,EAAE,eAAe,CAAC,CAAC;QACtD,MAAM,QAAQ,GAAG,SAAS,CAAC,KAAK,EAAE,YAAY,CAAC,CAAC;QAEhD,MAAM,aAAa,GAAG,gBACpB,EAAE,EAAE,KAAK,CAAC,EAAE,EACZ,GAAG,EAAE,KAAK,CAAC,GAAG,EACd,WAAW,EAAE,mBAAmB,EAChC,QAAQ,EAAE,gBAAgB,EAC1B,QAAQ,EAAE,gBAAgB,EAC1B,SAAS,EAAE,kBAAkB,EAC7B,SAAS,EAAE,WAAW,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,EAChE,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,EACvD,SAAS,EAAE,eAAe,EAC1B,cAAc,EACd,YAAY,EAAE,kBAAkB,EAChC,gBAAgB,EAAE,sBAAsB,EACxC,mBAAmB,EAAE,yBAAyB,IAC3C,OAAO,CACuB,CAAC;QAEpC,OAAO,IAAA,mBAAQ,EAAC,OAAO,EAAE,GAAG,EAAE,aAAa,CAAC,CAAC;IAC/C,CAAC,CAAA,CAAC;IAEF,OAAO,cAAc,CAAC;AACxB,CAAC,CAAC;AAEF,2EAA2E;AAC3E,MAAM,wBAAwB,GAAG,CAAC,KAA2D,EAAE,EAAE;;IAC/F,MAAM,EAAE,cAAc,EAAE,iBAAiB,EAAE,gBAAgB,EAAE,aAAa,EAAE,GAAG,CAAC,MAAA,KAAK,CAAC,KAAK,mCACzF,EAAE,CAAsB,CAAC;IAE3B,MAAM,iBAAiB,GAAiC,CAAO,OAAO,EAAE,OAAO,EAAE,EAAE;QACjF,MAAM,WAAW,GAAG,SAAS,CAAC,KAAK,EAAE,gBAAgB,CAAC,CAAC;QACvD,MAAM,QAAQ,GAAG,SAAS,CAAC,KAAK,EAAE,aAAa,CAAC,CAAC;QAEjD,MAAM,aAAa,GAAG,gBACpB,EAAE,EAAE,KAAK,CAAC,EAAE,EACZ,GAAG,EAAE,KAAK,CAAC,GAAG,EACd,QAAQ,EAAE,iBAAiB,EAC3B,SAAS,EAAE,WAAW,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,EAChE,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,EACvD,cAAc,IACX,OAAO,CACwB,CAAC;QAErC,OAAO,IAAA,2BAAY,EAAC,OAAO,EAAE,aAAa,CAAC,CAAC;IAC9C,CAAC,CAAA,CAAC;IAEF,OAAO,iBAAiB,CAAC;AAC3B,CAAC,CAAC;AAEF,4DAA4D;AACrD,MAAM,wBAAwB,GAAG,CAGtC,EACA,KAAK,EACL,QAAQ,EACR,SAAS,GAcV,EAAE,EAAE;IACH,MAAM,sBAAsB,GAAG,CAAoC,MAAS,EAAE,EAAE,CAC9E,IAAA,aAAI,EAAC,MAAM,EAAE,MAAM,CAAC,IAAI,CAAC,qBAAY,CAAC,CAAC,CAAC;IAE1C,OAAO,8CAEF,IAAA,eAAM,EAAC,QAAQ,aAAR,QAAQ,cAAR,QAAQ,GAAK,EAAY,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,KAAK,SAAS,CAAC,GAEjE,IAAA,eAAM,EAAC,sBAAsB,CAAC,KAAK,aAAL,KAAK,cAAL,KAAK,GAAI,EAAE,CAAC,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,KAAK,SAAS,CAAC,GAE3E,IAAA,eAAM,EAAC,SAAS,aAAT,SAAS,cAAT,SAAS,GAAK,EAAY,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,KAAK,SAAS,CAAC,CAC7C,CAAC;AAC7B,CAAC,CAAC;AAhCW,QAAA,wBAAwB,4BAgCnC;AAEF,MAAM,qBAAqB,GAAG,CAC5B,KAA2D,EAC3D,EAAE;;IACF,MAAM,EAAE,SAAS,EAAE,oBAAoB,EAAE,qBAAqB,EAAE,GAAG,CAAC,MAAA,KAAK,CAAC,KAAK,mCAC7E,EAAE,CAAwB,CAAC;IAE7B,MAAM,OAAO,GAAG,CAAC,GAAG,CAAC,SAAS,aAAT,SAAS,cAAT,SAAS,GAAI,EAAE,CAAC,CAAC,CAAC;IAEvC,IAAI,oBAAoB,EAAE;QACxB,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,mCAAmC,oBAAoB,EAAE,CAAC,CAAC;QAC3E,MAAM,CAAC,SAAS,EAAE,KAAK,CAAC,GAAG,oBAAoB,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC3D,MAAM,eAAe,GAAG,MAAM,IAAA,8BAAoB,EAAM,SAAS,EAAE,KAAK,EAAE,EAAE,EAAE,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,CAAC;QAC5F,OAAO,CAAC,IAAI,CAAC,GAAG,eAAe,CAAC,CAAC;KAClC;IAED,IAAI,qBAAqB,EAAE;QACzB,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,kCAAkC,CAAC,CAAC;QACpD,MAAM,UAAU,GAAG,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,qBAAqB,CAAC,2CAAI,CAAA,CAAC;QACrE,IAAI,UAAU;YAAE,OAAO,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,CAAC;KAC7C;IAED,OAAO,OAAO,CAAC;AACjB,CAAC,CAAA,CAAC","sourcesContent":["import {\n BasicCrawler,\n CrawlingContext,\n RouterHandler,\n BasicCrawlerOptions,\n CheerioCrawler,\n Router,\n HttpCrawler,\n JSDOMCrawler,\n PlaywrightCrawler,\n PuppeteerCrawler,\n Log,\n Request as CrawleeRequest,\n} from 'crawlee';\nimport { omitBy, pick, defaults } from 'lodash';\nimport * as Sentry from '@sentry/node';\nimport { gotScraping } from 'got-scraping';\n\nimport type { CrawlerMeta, CrawlerType } from '../../types';\nimport type { MaybePromise, PickPartial } from '../../utils/types';\nimport { createErrorHandler } from '../error/errorHandler';\nimport { setupSentry } from '../error/sentry';\nimport { type PushDataOptions, itemCacheKey, pushData } from '../io/pushData';\nimport { getColumnFromDataset } from '../io/dataset';\nimport { PushRequestsOptions, pushRequests } from '../io/pushRequests';\nimport type { CrawleeOneIO } from '../integrations/types';\nimport { apifyIO } from '../integrations/apify';\nimport { registerHandlers, setupDefaultRoute } from '../router/router';\nimport {\n CrawlerConfigActorInput,\n OutputActorInput,\n MetamorphActorInput,\n PrivacyActorInput,\n crawlerInput,\n StartUrlsActorInput,\n InputActorInput,\n RequestActorInput,\n AllActorInputs,\n LoggingActorInput,\n} from '../config';\nimport { logLevelHandlerWrapper, logLevelToCrawlee } from '../log';\nimport type {\n ActorContext,\n ActorDefinition,\n ActorHookContext,\n ActorRouterContext,\n Metamorph,\n RunCrawler,\n} from './types';\n\nconst actorClassByType = {\n basic: BasicCrawler,\n http: HttpCrawler,\n cheerio: CheerioCrawler,\n jsdom: JSDOMCrawler,\n playwright: PlaywrightCrawler,\n puppeteer: PuppeteerCrawler,\n} satisfies Record<CrawlerType, { new (options: Record<string, any>): any }>;\n\nconst isRouter = (r: any): r is RouterHandler<any> => {\n return !!((r as RouterHandler).addHandler && (r as RouterHandler).addDefaultHandler);\n};\nconst isFunc = (f: any): f is (...args: any[]) => any => {\n return typeof f === 'function';\n};\n\n/** Run a function that was defined as a string via Actor input */\nconst genHookFn = <\n Ctx extends CrawlingContext<any> = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO\n>(\n actor: Pick<ActorContext<Ctx, Labels, Input, TIO>, 'input' | 'state' | 'io'>,\n fnStr?: string\n) => {\n if (!fnStr) return null;\n\n const hookCtx = {\n io: actor.io,\n input: actor.input,\n state: actor.state,\n itemCacheKey,\n sendRequest: gotScraping,\n } satisfies ActorHookContext<TIO>;\n\n const hookFn = eval(fnStr);\n if (!hookFn) return null;\n\n return async (...args) => hookFn(...args, hookCtx);\n};\n\n/**\n * Create default configuration for an opinionated Crawlee actor,\n * and run the actor within Apify's `Actor.main()` context.\n *\n * Apify context can be replaced with custom implementation using the `actorConfig.io` option.\n *\n * Read more about what this actor does at {@link createCrawleeOne}.\n */\nexport const createAndRunCrawleeOne = async <\n TCrawlerType extends CrawlerType,\n Ctx extends CrawlerMeta<TCrawlerType, any>['context'] = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO\n>(args: {\n /** String idetifying the actor class, e.g. `'cheerio'` */\n actorType: TCrawlerType;\n actorName: string;\n /** Config passed to the {@link createCrawleeOne} */\n actorConfig: PickPartial<\n ActorDefinition<Ctx, Labels, Input, TIO>,\n 'router' | 'createCrawler' | 'io'\n >;\n /**\n * If using default `createCrawler` implementation, these are crawler options\n * that may be overriden by user input.\n */\n crawlerConfigDefaults?: CrawlerMeta<TCrawlerType, any>['options'];\n /**\n * If using default `createCrawler` implementation, these are crawler options\n * that will override user input.\n *\n * This is useful for testing env.\n */\n crawlerConfigOverrides?: CrawlerMeta<TCrawlerType, any>['options'];\n /**\n * Sentry configuration. If using default `createCrawler` implementation,\n * failed requests are optionally reported to Sentry.\n *\n * To disable Sentry, set `\"enabled\": false`.\n */\n sentryOptions?: Sentry.NodeOptions;\n /**\n * Callback with the created actor. The callback is called within\n * the `Actor.main()` context.\n */\n onActorReady?: (actor: ActorContext<Ctx, Labels, Input, TIO>) => MaybePromise<void>;\n}): Promise<void> => {\n const {\n actorType,\n actorName,\n actorConfig,\n crawlerConfigDefaults,\n crawlerConfigOverrides,\n sentryOptions,\n onActorReady,\n } = args;\n\n const { io = apifyIO as any as TIO } = actorConfig;\n\n await setupSentry({ ...sentryOptions, serverName: actorName }, { io });\n\n // See docs:\n // - https://docs.apify.com/sdk/js/\n // - https://docs.apify.com/academy/deploying-your-code/inputs-outputs#accepting-input-with-the-apify-sdk\n // - https://docs.apify.com/sdk/js/docs/upgrading/upgrading-to-v3#apify-sdk\n await io.runInContext(\n async () => {\n const actorDefaults: ActorDefinition<Ctx, Labels, Input & AllActorInputs, TIO> = {\n io,\n router: Router.create<Ctx>(),\n routerWrappers: ({ input }) => [\n logLevelHandlerWrapper<Ctx, any>(input?.logLevel ?? 'info'),\n ],\n createCrawler: ({ router, proxy, input }) => {\n const options = createHttpCrawlerOptions<\n CrawlerMeta<TCrawlerType, any>['options'],\n Input\n >({\n input,\n defaults: crawlerConfigDefaults,\n overrides: {\n requestHandler: router,\n proxyConfiguration: proxy,\n // Capture errors in a separate (Apify) Dataset and pass errors to Sentry\n failedRequestHandler: createErrorHandler({\n io,\n reportingDatasetId: input?.errorReportingDatasetId ?? 'REPORTING',\n sendToSentry: input?.errorSendToSentry ?? true,\n }),\n ...crawlerConfigOverrides,\n },\n });\n const CrawlerClass = actorClassByType[actorType] as any;\n return new CrawlerClass(options);\n },\n routes: [],\n routeHandlers: {} as any,\n };\n\n const actor = await createCrawleeOne<Ctx, Labels, Input, TIO>({\n ...actorConfig,\n io,\n router: actorConfig.router ?? (actorDefaults.router as any),\n routerWrappers: actorConfig.routerWrappers ?? (actorDefaults.routerWrappers as any),\n createCrawler: actorConfig.createCrawler ?? (actorDefaults.createCrawler as any),\n });\n\n await onActorReady?.(actor);\n },\n { statusMessage: 'Crawling finished!' }\n );\n};\n\n/**\n * Create opinionated Crawlee crawler that uses router for handling requests.\n *\n * This is a quality-of-life function that does the following for you:\n *\n * 1) Full TypeScript coverage - Ensure all components use the same Crawler / CrawlerContext.\n *\n * 2) Get Actor input from `Actor.getInput` if not given.\n *\n * 3) (Optional) Validate Actor input\n *\n * 4) Set up router such that requests that reach default route are\n * redirected to labelled routes based on which item from \"routes\" they match.\n *\n * 5) Register all route handlers for you.\n *\n * 6) (Optional) Wrap all route handlers in a wrapper. Use this e.g.\n * if you want to add a field to the context object, or handle errors\n * from a single place.\n *\n * 7) (Optional) Support transformation and filtering of (scraped) entries,\n * configured via Actor input.\n *\n * 8) (Optional) Support Actor metamorphing, configured via Actor input.\n *\n * 9) Apify context (e.g. calling `Actor.getInput`) can be replaced with custom\n * implementation using the `io` option.\n */\nexport const createCrawleeOne = async <\n Ctx extends CrawlingContext = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO\n>(\n config: PickPartial<ActorDefinition<Ctx, Labels, Input, TIO>, 'io'>\n): Promise<ActorContext<Ctx, Labels, Input, TIO>> => {\n const { io = apifyIO as any as TIO } = config;\n\n // Mutable state that is available to the actor hooks\n const state = {};\n\n // Initialize actor inputs\n const rawInput = config.input\n ? isFunc(config.input)\n ? await config.input({ ...config, io })\n : config.input\n : await io.getInput<Input>();\n const input = Object.freeze(await resolveInput<Input | null>(rawInput, state, { io }));\n\n if (config.validateInput) await config.validateInput(input);\n\n const { logLevel } = (input ?? {}) as LoggingActorInput;\n const log = new Log({ level: logLevel ? logLevelToCrawlee[logLevel] : undefined });\n\n // This is context that is available to options that use initialization function\n const getConfig = () => ({ ...config, input, state, io, log });\n\n // Set up proxy\n const defaultProxy =\n config.proxy == null ? await io.createDefaultProxyConfiguration(input ?? undefined) : undefined;\n const proxy =\n config.proxy == null\n ? defaultProxy\n : isFunc(config.proxy)\n ? await config.proxy(getConfig())\n : config.proxy;\n\n // Run initialization functions\n const router: RouterHandler<Ctx> = isRouter(config.router)\n ? config.router\n : await (config.router as any)(getConfig());\n const routes = isFunc(config.routes) ? await config.routes(getConfig()) : config.routes; // prettier-ignore\n const routeHandlers = isFunc(config.routeHandlers) ? await config.routeHandlers(getConfig()) : config.routeHandlers; // prettier-ignore\n const routerWrappers = isFunc(config.routerWrappers) ? await config.routerWrappers(getConfig()) : config.routerWrappers; // prettier-ignore\n\n // Create Crawlee crawler\n const getActorCtx = () => ({\n io,\n router,\n routes,\n routeHandlers,\n proxy,\n config,\n input,\n state,\n log,\n });\n const crawler = await config.createCrawler(getActorCtx());\n\n // Create actor (our custom entity)\n const preActor = { crawler, ...getActorCtx() };\n const runCrawler = createScopedCrawlerRun(preActor);\n const metamorph = createScopedMetamorph(preActor);\n const scopedPushData = createScopedPushData(preActor);\n const scopedPushRequest = createScopedPushRequests(preActor);\n const startUrls = await getStartUrlsFromInput(preActor);\n\n const actor = {\n ...preActor,\n crawler,\n runCrawler,\n metamorph,\n pushData: scopedPushData,\n pushRequests: scopedPushRequest,\n startUrls,\n } satisfies ActorContext<Ctx, Labels, Input, TIO>;\n\n // Extra data that we make available to the route handlers\n const routerContext = { actor, pushData: scopedPushData };\n\n // Set up router\n await setupDefaultRoute<Ctx, ActorRouterContext<Ctx, Labels, Input, TIO>, Labels, Input>({\n io,\n router,\n routerWrappers,\n routerContext,\n routes,\n routeHandlers,\n input,\n });\n await registerHandlers<Ctx, ActorRouterContext<Ctx, Labels, Input, TIO>, Labels>({\n router,\n routerWrappers,\n routerContext,\n routeHandlers,\n });\n\n // Now that the actor is ready, enqueue the URLs right away\n await scopedPushRequest(startUrls as CrawleeRequest[]);\n\n return actor;\n};\n\nconst resolveInput = async <T extends Record<string, any> | null>(\n input: object | null,\n state: Record<string, unknown>,\n options?: { io?: CrawleeOneIO }\n) => {\n const { io = apifyIO as CrawleeOneIO } = options ?? {};\n const { inputExtendUrl, inputExtendFromFunction } = (input ?? {}) as InputActorInput;\n\n const inputFromUrl = inputExtendUrl ? await gotScraping.get(inputExtendUrl).json<object>() : null;\n const inputFn = genHookFn({ state, input, io }, inputExtendFromFunction);\n const inputFromFunc = (await inputFn?.()) ?? null;\n const extendedInput = { ...inputFromUrl, ...inputFromFunc, ...input };\n\n return extendedInput as T;\n};\n\n/**\n * Create a function that wraps `crawler.run(requests, runOtions)` with additional\n * features like:\n * - Automatically metamorph into another actor after the run finishes\n */\nconst createScopedCrawlerRun = <\n Ctx extends CrawlingContext<any> = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO\n>(\n actor: Omit<\n ActorContext<Ctx, Labels, Input, TIO>,\n 'runCrawler' | 'metamorph' | 'pushData' | 'pushRequests' | 'startUrls'\n >\n) => {\n const {\n requestTransformBefore,\n requestTransformAfter,\n requestFilterBefore,\n requestFilterAfter,\n outputTransformBefore,\n outputTransformAfter,\n outputFilterBefore,\n outputFilterAfter,\n outputCacheStoreId,\n outputCacheActionOnResult,\n } = (actor.input ?? {}) as OutputActorInput & RequestActorInput;\n\n const metamorph = createScopedMetamorph(actor);\n\n const runCrawler: RunCrawler<Ctx> = async (requests, options) => {\n // Clear cache if it was set from the input\n if (outputCacheStoreId && outputCacheActionOnResult === 'overwrite') {\n const store = await actor.io.openKeyValueStore(outputCacheStoreId);\n await store.drop();\n }\n\n await genHookFn(actor, outputTransformBefore)?.();\n await genHookFn(actor, outputFilterBefore)?.();\n await genHookFn(actor, requestTransformBefore)?.();\n await genHookFn(actor, requestFilterBefore)?.();\n\n const runRes = await actor.crawler.run(requests, options);\n\n await genHookFn(actor, outputTransformAfter)?.();\n await genHookFn(actor, outputFilterAfter)?.();\n await genHookFn(actor, requestTransformAfter)?.();\n await genHookFn(actor, requestFilterAfter)?.();\n\n // Trigger metamorph if it was set from the input\n await metamorph();\n\n return runRes;\n };\n\n return runCrawler;\n};\n\n/** Create a function that triggers metamorph, using Actor's inputs as defaults. */\nconst createScopedMetamorph = (actor: Pick<ActorContext, 'input' | 'io'>) => {\n // Trigger metamorph if it was set from the input\n const metamorph: Metamorph = async (overrides?: MetamorphActorInput) => {\n const {\n metamorphActorId,\n metamorphActorBuild,\n metamorphActorInput,\n } = defaults({}, overrides, actor.input ?? {}); // prettier-ignore\n\n if (!metamorphActorId) return;\n\n await actor.io.triggerDownstreamCrawler(metamorphActorId, metamorphActorInput, {\n build: metamorphActorBuild,\n });\n };\n\n return metamorph;\n};\n\n/** pushData wrapper that pre-populates options based on actor input */\nconst createScopedPushData = (actor: Pick<ActorContext, 'input' | 'state' | 'io' | 'log'>) => {\n const {\n includePersonalData,\n requestQueueId,\n outputMaxEntries,\n outputTransform,\n outputFilter,\n outputDatasetId,\n outputPickFields,\n outputRenameFields,\n outputCacheStoreId,\n outputCachePrimaryKeys,\n outputCacheActionOnResult,\n } = (actor.input ?? {}) as OutputActorInput & PrivacyActorInput & RequestActorInput;\n\n const scopedPushData: ActorContext['pushData'] = async (entries, ctx, options) => {\n const transformFn = genHookFn(actor, outputTransform);\n const filterFn = genHookFn(actor, outputFilter);\n\n const mergedOptions = {\n io: actor.io,\n log: actor.log,\n showPrivate: includePersonalData,\n maxCount: outputMaxEntries,\n pickKeys: outputPickFields,\n remapKeys: outputRenameFields,\n transform: transformFn ? (item) => transformFn(item) : undefined,\n filter: filterFn ? (item) => filterFn(item) : undefined,\n datasetId: outputDatasetId,\n requestQueueId,\n cacheStoreId: outputCacheStoreId,\n cachePrimaryKeys: outputCachePrimaryKeys,\n cacheActionOnResult: outputCacheActionOnResult,\n ...options,\n } satisfies PushDataOptions<object>;\n\n return pushData(entries, ctx, mergedOptions);\n };\n\n return scopedPushData;\n};\n\n/** pushRequests wrapper that pre-populates options based on actor input */\nconst createScopedPushRequests = (actor: Pick<ActorContext, 'input' | 'state' | 'io' | 'log'>) => {\n const { requestQueueId, requestMaxEntries, requestTransform, requestFilter } = (actor.input ??\n {}) as RequestActorInput;\n\n const scopedPushRequest: ActorContext['pushRequests'] = async (entries, options) => {\n const transformFn = genHookFn(actor, requestTransform);\n const filterFn = genHookFn(actor, requestFilter);\n\n const mergedOptions = {\n io: actor.io,\n log: actor.log,\n maxCount: requestMaxEntries,\n transform: transformFn ? (item) => transformFn(item) : undefined,\n filter: filterFn ? (item) => filterFn(item) : undefined,\n requestQueueId,\n ...options,\n } satisfies PushRequestsOptions<any>;\n\n return pushRequests(entries, mergedOptions);\n };\n\n return scopedPushRequest;\n};\n\n/** Given the actor input, create common crawler options. */\nexport const createHttpCrawlerOptions = <\n TOpts extends BasicCrawlerOptions<any> = BasicCrawlerOptions,\n Input extends Record<string, any> = Record<string, any>\n>({\n input,\n defaults,\n overrides,\n}: {\n /** Actor input */\n input: Input | null;\n /**\n * Default config options set by us. These may be overriden\n * by values from actor input (set by user).\n */\n defaults?: TOpts;\n /**\n * These config options will overwrite both the default and user\n * options. This is useful for hard-setting values e.g. in tests.\n */\n overrides?: TOpts;\n}) => {\n const pickCrawlerInputFields = <T extends CrawlerConfigActorInput>(config: T) =>\n pick(config, Object.keys(crawlerInput));\n\n return {\n // ----- 1. DEFAULTS -----\n ...omitBy(defaults ?? ({} as TOpts), (field) => field === undefined),\n // ----- 2. CONFIG FROM INPUT -----\n ...omitBy(pickCrawlerInputFields(input ?? {}), (field) => field === undefined),\n // ----- 3. OVERRIDES - E.G. TEST CONFIG -----\n ...omitBy(overrides ?? ({} as TOpts), (field) => field === undefined),\n } satisfies Partial<TOpts>;\n};\n\nconst getStartUrlsFromInput = async (\n actor: Pick<ActorContext, 'input' | 'state' | 'io' | 'log'>\n) => {\n const { startUrls, startUrlsFromDataset, startUrlsFromFunction } = (actor.input ??\n {}) as StartUrlsActorInput;\n\n const urlsAgg = [...(startUrls ?? [])];\n\n if (startUrlsFromDataset) {\n actor.log.debug(`Loading start URLs from Dataset ${startUrlsFromDataset}`);\n const [datasetId, field] = startUrlsFromDataset.split('#');\n const urlsFromDataset = await getColumnFromDataset<any>(datasetId, field, { io: actor.io });\n urlsAgg.push(...urlsFromDataset);\n }\n\n if (startUrlsFromFunction) {\n actor.log.debug(`Loading start URLs from function`);\n const urlsFromFn = await genHookFn(actor, startUrlsFromFunction)?.();\n if (urlsFromFn) urlsAgg.push(...urlsFromFn);\n }\n\n return urlsAgg;\n};\n"]}
1
+ {"version":3,"file":"actor.js","sourceRoot":"","sources":["../../../../src/lib/actor/actor.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,qCAaiB;AACjB,mCAAgD;AAChD,+CAA2C;AAI3C,wDAA2D;AAC3D,6CAA8E;AAC9E,2CAAqD;AACrD,qDAAuE;AAEvE,iDAAgD;AAChD,6CAA0E;AAC1E,sCAWmB;AACnB,gCAAmE;AAWnE,MAAM,gBAAgB,GAAG;IACvB,KAAK,EAAE,sBAAY;IACnB,IAAI,EAAE,qBAAW;IACjB,OAAO,EAAE,wBAAc;IACvB,KAAK,EAAE,sBAAY;IACnB,UAAU,EAAE,2BAAiB;IAC7B,SAAS,EAAE,0BAAgB;CAC+C,CAAC;AAE7E,MAAM,QAAQ,GAAG,CAAC,CAAM,EAA2B,EAAE;IACnD,OAAO,CAAC,CAAC,CAAC,CAAC,CAAmB,aAAnB,CAAC,uBAAD,CAAC,CAAoB,UAAU,MAAK,CAAmB,aAAnB,CAAC,uBAAD,CAAC,CAAoB,iBAAiB,CAAA,CAAC,CAAC;AACzF,CAAC,CAAC;AAEF,MAAM,MAAM,GAAG,CAAC,CAAM,EAAgC,EAAE;IACtD,OAAO,OAAO,CAAC,KAAK,UAAU,CAAC;AACjC,CAAC,CAAC;AAEF,kEAAkE;AAClE,MAAM,SAAS,GAAG,CAIhB,KAAoF,EACpF,KAAc,EACd,EAAE;IACF,IAAI,CAAC,KAAK;QAAE,OAAO,IAAI,CAAC;IAExB,MAAM,OAAO,GAAG;QACd,EAAE,EAAE,KAAK,CAAC,EAAE;QACZ,KAAK,EAAE,KAAK,CAAC,KAAK;QAClB,KAAK,EAAE,KAAK,CAAC,KAAK;QAClB,YAAY,EAAZ,uBAAY;QACZ,WAAW,EAAE,0BAAW;KACe,CAAC;IAE1C,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC;IAC3B,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC;IAEzB,OAAO,CAAO,GAAG,IAAI,EAAE,EAAE,kDAAC,OAAA,MAAM,CAAC,GAAG,IAAI,EAAE,OAAO,CAAC,CAAA,GAAA,CAAC;AACrD,CAAC,CAAC;AA6CF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6BG;AACI,MAAM,aAAa,GAAG,CAQ3B,IAAiE,EAClD,EAAE;IACjB,MAAM,EACJ,SAAS,EACT,SAAS,EACT,WAAW,EACX,qBAAqB,EACrB,sBAAsB,EACtB,YAAY,GACb,GAAG,IAAI,CAAC;IAET,MAAM,EAAE,EAAE,GAAG,eAAqB,EAAE,SAAS,EAAE,GAAG,WAAW,CAAC;IAE9D,YAAY;IACZ,mCAAmC;IACnC,yGAAyG;IACzG,2EAA2E;IAC3E,MAAM,EAAE,CAAC,YAAY,CACnB,GAAS,EAAE;;QACT,MAAM,CAAA,SAAS,aAAT,SAAS,uBAAT,SAAS,CAAE,KAAK,CAAC,EAAE,SAAS,EAAE,SAAS,EAAE,WAAW,kCAAO,WAAW,KAAE,EAAE,GAAE,EAAE,CAAC,CAAA,CAAC;QAEtF,MAAM,aAAa,GAGf;YACF,MAAM,EAAE,gBAAM,CAAC,MAAM,EAAO;YAC5B,oBAAoB,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE;;gBAAC,OAAA;oBACnC,IAAA,4BAAsB,EAAW,MAAA,KAAK,aAAL,KAAK,uBAAL,KAAK,CAAE,QAAQ,mCAAI,MAAM,CAAC;iBAC5D,CAAA;aAAA;YACD,aAAa,EAAE,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE,EAAE;;gBAC1C,MAAM,OAAO,GAAG,IAAA,gCAAwB,EAA4C;oBAClF,KAAK;oBACL,QAAQ,EAAE,qBAAqB;oBAC/B,SAAS,kBACP,cAAc,EAAE,MAAM,EACtB,kBAAkB,EAAE,KAAK;wBACzB,4EAA4E;wBAC5E,oBAAoB,EAAE,IAAA,iCAAkB,EAAC;4BACvC,EAAE;4BACF,kBAAkB,EAAE,MAAA,KAAK,aAAL,KAAK,uBAAL,KAAK,CAAE,uBAAuB,mCAAI,WAAW;4BACjE,eAAe,EAAE,MAAA,KAAK,aAAL,KAAK,uBAAL,KAAK,CAAE,oBAAoB,mCAAI,IAAI;4BACpD,sBAAsB,EAAE,SAAS,aAAT,SAAS,uBAAT,SAAS,CAAE,sBAAsB;yBAC1D,CAAC,IACC,sBAAsB,CAC1B;iBACF,CAAC,CAAC;gBACH,MAAM,YAAY,GAAG,gBAAgB,CAAC,SAAS,CAAQ,CAAC;gBACxD,OAAO,IAAI,YAAY,CAAC,OAAO,CAAC,CAAC;YACnC,CAAC;SACF,CAAC;QAEF,MAAM,KAAK,GAAG,MAAM,gBAAgB,iCAC/B,WAAW,KACd,EAAE,EACF,MAAM,EAAE,MAAA,WAAW,CAAC,MAAM,mCAAK,aAAa,CAAC,MAAc,EAC3D,oBAAoB,EAClB,MAAA,WAAW,CAAC,oBAAoB,mCAAK,aAAa,CAAC,oBAA4B,EACjF,aAAa,EAAE,MAAA,WAAW,CAAC,aAAa,mCAAK,aAAa,CAAC,aAAqB,IAChF,CAAC;QAEH,MAAM,CAAA,YAAY,aAAZ,YAAY,uBAAZ,YAAY,CAAG,KAAK,CAAC,CAAA,CAAC;IAC9B,CAAC,CAAA,EACD,EAAE,aAAa,EAAE,oBAAoB,EAAE,CACxC,CAAC;AACJ,CAAC,CAAA,CAAC;AAxEW,QAAA,aAAa,iBAwExB;AAEF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AACH,MAAM,gBAAgB,GAAG,CAOvB,MAA6E,EAChB,EAAE;IAC/D,MAAM,EAAE,EAAE,GAAG,eAAqB,EAAE,SAAS,EAAE,GAAG,MAAM,CAAC;IAEzD,qDAAqD;IACrD,MAAM,KAAK,GAAG,EAAE,CAAC;IAEjB,0BAA0B;IAC1B,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK;QAC3B,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC;YACpB,CAAC,CAAC,MAAM,MAAM,CAAC,KAAK,iCAAM,MAAM,KAAE,EAAE,IAAG;YACvC,CAAC,CAAC,MAAM,CAAC,KAAK;QAChB,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAS,CAAC;IAC/B,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,YAAY,CAAe,QAAQ,EAAE,KAAK,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC;IAEvF,IAAI,MAAM,CAAC,aAAa;QAAE,MAAM,MAAM,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;IAE5D,MAAM,EAAE,QAAQ,EAAE,GAAG,CAAC,KAAK,aAAL,KAAK,cAAL,KAAK,GAAI,EAAE,CAAsB,CAAC;IACxD,MAAM,GAAG,GAAG,IAAI,aAAG,CAAC,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAC,CAAC,uBAAiB,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC;IAEnF,gFAAgF;IAChF,MAAM,SAAS,GAAG,GAAG,EAAE,CAAC,iCAAM,MAAM,KAAE,KAAK,EAAE,KAAK,EAAE,EAAE,EAAE,GAAG,IAAG,CAAC;IAE/D,eAAe;IACf,MAAM,YAAY,GAChB,MAAM,CAAC,KAAK,IAAI,IAAI,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,+BAA+B,CAAC,KAAK,aAAL,KAAK,cAAL,KAAK,GAAI,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IAClG,MAAM,KAAK,GACT,MAAM,CAAC,KAAK,IAAI,IAAI;QAClB,CAAC,CAAC,YAAY;QACd,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC;YACtB,CAAC,CAAC,MAAM,MAAM,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC;YACjC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;IAEnB,+BAA+B;IAC/B,MAAM,MAAM,GAAuB,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC;QACxD,CAAC,CAAC,MAAM,CAAC,MAAM;QACf,CAAC,CAAC,MAAO,MAAM,CAAC,MAAc,CAAC,SAAS,EAAE,CAAC,CAAC;IAC9C,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,kBAAkB;IAC3G,MAAM,aAAa,GAAG,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,aAAa,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,kBAAkB;IACvI,MAAM,oBAAoB,GAAG,MAAM,CAAC,MAAM,CAAC,oBAAoB,CAAC,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,oBAAoB,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,oBAAoB,CAAC,CAAC,kBAAkB;IAEnK,yBAAyB;IACzB,MAAM,WAAW,GAAG,GAAG,EAAE,CAAC,CAAC;QACzB,EAAE;QACF,SAAS;QACT,MAAM;QACN,MAAM;QACN,aAAa;QACb,KAAK;QACL,MAAM;QACN,KAAK;QACL,KAAK;QACL,GAAG;KACJ,CAAC,CAAC;IACH,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC,WAAW,EAAE,CAAC,CAAC;IAE1D,mCAAmC;IACnC,MAAM,QAAQ,mBAAK,OAAO,IAAK,WAAW,EAAE,CAAE,CAAC;IAC/C,MAAM,UAAU,GAAG,sBAAsB,CAAC,QAAQ,CAAC,CAAC;IACpD,MAAM,SAAS,GAAG,qBAAqB,CAAC,QAAQ,CAAC,CAAC;IAClD,MAAM,cAAc,GAAG,oBAAoB,CAAC,QAAQ,CAAC,CAAC;IACtD,MAAM,iBAAiB,GAAG,wBAAwB,CAAC,QAAQ,CAAC,CAAC;IAC7D,MAAM,SAAS,GAAG,MAAM,qBAAqB,CAAC,QAAQ,CAAC,CAAC;IAExD,MAAM,KAAK,GAAG,gCACT,QAAQ,KACX,OAAO;QACP,UAAU;QACV,SAAS,EACT,QAAQ,EAAE,cAAc,EACxB,YAAY,EAAE,iBAAiB,EAC/B,SAAS,GACmD,CAAC;IAE/D,0DAA0D;IAC1D,MAAM,aAAa,GAAG,EAAE,KAAK,EAAE,QAAQ,EAAE,cAAc,EAAE,CAAC;IAE1D,gBAAgB;IAChB,MAAM,IAAA,6BAAoB,EAKxB;QACA,EAAE;QACF,MAAM;QACN,oBAAoB;QACpB,aAAa;QACb,MAAM;QACN,aAAa;QACb,KAAK;KACN,CAAC,CAAC;IAEH,6BAA6B;IAC7B,MAAM,IAAA,yBAAgB,EACpB,MAAM,EACN,aAAa,EACb,EAAE,aAAa,EAAE,eAAe,EAAE,oBAAoB,EAAE,CACzD,CAAC;IAEF,2DAA2D;IAC3D,MAAM,iBAAiB,CAAC,SAA6B,CAAC,CAAC;IAEvD,OAAO,KAAK,CAAC;AACf,CAAC,CAAA,CAAC;AAEF,MAAM,YAAY,GAAG,CACnB,KAAoB,EACpB,KAA8B,EAC9B,OAA+B,EAC/B,EAAE;;IACF,MAAM,EAAE,EAAE,GAAG,eAAuB,EAAE,GAAG,OAAO,aAAP,OAAO,cAAP,OAAO,GAAI,EAAE,CAAC;IACvD,MAAM,EAAE,cAAc,EAAE,uBAAuB,EAAE,GAAG,CAAC,KAAK,aAAL,KAAK,cAAL,KAAK,GAAI,EAAE,CAAoB,CAAC;IAErF,MAAM,YAAY,GAAG,cAAc,CAAC,CAAC,CAAC,MAAM,0BAAW,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC,IAAI,EAAU,CAAC,CAAC,CAAC,IAAI,CAAC;IAClG,MAAM,OAAO,GAAG,SAAS,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE,EAAE,EAAE,uBAAuB,CAAC,CAAC;IACzE,MAAM,aAAa,GAAG,MAAA,CAAC,MAAM,CAAA,OAAO,aAAP,OAAO,uBAAP,OAAO,EAAI,CAAA,CAAC,mCAAI,IAAI,CAAC;IAClD,MAAM,aAAa,iDAAQ,YAAY,GAAK,aAAa,GAAK,KAAK,CAAE,CAAC;IAEtE,OAAO,aAAkB,CAAC;AAC5B,CAAC,CAAA,CAAC;AAEF;;;;GAIG;AACH,MAAM,sBAAsB,GAAG,CAO7B,KAGC,EACD,EAAE;;IACF,MAAM,EACJ,sBAAsB,EACtB,qBAAqB,EACrB,mBAAmB,EACnB,kBAAkB,EAClB,qBAAqB,EACrB,oBAAoB,EACpB,kBAAkB,EAClB,iBAAiB,EACjB,kBAAkB,EAClB,yBAAyB,GAC1B,GAAG,CAAC,MAAA,KAAK,CAAC,KAAK,mCAAI,EAAE,CAAyC,CAAC;IAEhE,MAAM,SAAS,GAAG,qBAAqB,CAAC,KAAK,CAAC,CAAC;IAE/C,MAAM,UAAU,GAAoB,CAAO,QAAQ,EAAE,OAAO,EAAE,EAAE;;QAC9D,2CAA2C;QAC3C,IAAI,kBAAkB,IAAI,yBAAyB,KAAK,WAAW,EAAE;YACnE,MAAM,KAAK,GAAG,MAAM,KAAK,CAAC,EAAE,CAAC,iBAAiB,CAAC,kBAAkB,CAAC,CAAC;YACnE,MAAM,KAAK,CAAC,IAAI,EAAE,CAAC;SACpB;QAED,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,qBAAqB,CAAC,2CAAI,CAAA,CAAC;QAClD,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,kBAAkB,CAAC,2CAAI,CAAA,CAAC;QAC/C,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,sBAAsB,CAAC,2CAAI,CAAA,CAAC;QACnD,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,mBAAmB,CAAC,2CAAI,CAAA,CAAC;QAEhD,MAAM,MAAM,GAAG,MAAM,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QAE1D,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,oBAAoB,CAAC,2CAAI,CAAA,CAAC;QACjD,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,iBAAiB,CAAC,2CAAI,CAAA,CAAC;QAC9C,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,qBAAqB,CAAC,2CAAI,CAAA,CAAC;QAClD,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,kBAAkB,CAAC,2CAAI,CAAA,CAAC;QAE/C,iDAAiD;QACjD,MAAM,SAAS,EAAE,CAAC;QAElB,OAAO,MAAM,CAAC;IAChB,CAAC,CAAA,CAAC;IAEF,OAAO,UAAU,CAAC;AACpB,CAAC,CAAC;AAEF,mFAAmF;AACnF,MAAM,qBAAqB,GAAG,CAAC,KAA+C,EAAE,EAAE;IAChF,iDAAiD;IACjD,MAAM,SAAS,GAAc,CAAO,SAA+B,EAAE,EAAE;;QACrE,MAAM,EACJ,gBAAgB,EAChB,mBAAmB,EACnB,mBAAmB,GACpB,GAAG,IAAA,iBAAQ,EAAC,EAAE,EAAE,SAAS,EAAE,MAAA,KAAK,CAAC,KAAK,mCAAI,EAAE,CAAC,CAAC,CAAC,kBAAkB;QAElE,IAAI,CAAC,gBAAgB;YAAE,OAAO;QAE9B,MAAM,KAAK,CAAC,EAAE,CAAC,wBAAwB,CAAC,gBAAgB,EAAE,mBAAmB,EAAE;YAC7E,KAAK,EAAE,mBAAmB;SAC3B,CAAC,CAAC;IACL,CAAC,CAAA,CAAC;IAEF,OAAO,SAAS,CAAC;AACnB,CAAC,CAAC;AAEF,uEAAuE;AACvE,MAAM,oBAAoB,GAAG,CAC3B,KAAiE,EACjE,EAAE;;IACF,MAAM,EACJ,mBAAmB,EACnB,cAAc,EACd,gBAAgB,EAChB,eAAe,EACf,YAAY,EACZ,eAAe,EACf,gBAAgB,EAChB,kBAAkB,EAClB,kBAAkB,EAClB,sBAAsB,EACtB,yBAAyB,GAC1B,GAAG,CAAC,MAAA,KAAK,CAAC,KAAK,mCAAI,EAAE,CAA6D,CAAC;IAEpF,MAAM,cAAc,GAAmC,CAAO,OAAO,EAAE,GAAG,EAAE,OAAO,EAAE,EAAE;QACrF,MAAM,WAAW,GAAG,SAAS,CAAC,KAAK,EAAE,eAAe,CAAC,CAAC;QACtD,MAAM,QAAQ,GAAG,SAAS,CAAC,KAAK,EAAE,YAAY,CAAC,CAAC;QAEhD,MAAM,aAAa,GAAG,gBACpB,EAAE,EAAE,KAAK,CAAC,EAAE,EACZ,GAAG,EAAE,KAAK,CAAC,GAAG,EACd,WAAW,EAAE,mBAAmB,EAChC,QAAQ,EAAE,gBAAgB,EAC1B,QAAQ,EAAE,gBAAgB,EAC1B,SAAS,EAAE,kBAAkB,EAC7B,SAAS,EAAE,WAAW,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,EAChE,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,EACvD,SAAS,EAAE,eAAe,EAC1B,cAAc,EACd,YAAY,EAAE,kBAAkB,EAChC,gBAAgB,EAAE,sBAAsB,EACxC,mBAAmB,EAAE,yBAAyB,IAC3C,OAAO,CACuB,CAAC;QAEpC,OAAO,IAAA,mBAAQ,EAAC,OAAO,EAAE,GAAG,EAAE,aAAa,CAAC,CAAC;IAC/C,CAAC,CAAA,CAAC;IAEF,OAAO,cAAc,CAAC;AACxB,CAAC,CAAC;AAEF,2EAA2E;AAC3E,MAAM,wBAAwB,GAAG,CAC/B,KAAiE,EACjE,EAAE;;IACF,MAAM,EAAE,cAAc,EAAE,iBAAiB,EAAE,gBAAgB,EAAE,aAAa,EAAE,GAAG,CAAC,MAAA,KAAK,CAAC,KAAK,mCACzF,EAAE,CAAsB,CAAC;IAE3B,MAAM,iBAAiB,GAAuC,CAAO,OAAO,EAAE,OAAO,EAAE,EAAE;QACvF,MAAM,WAAW,GAAG,SAAS,CAAC,KAAK,EAAE,gBAAgB,CAAC,CAAC;QACvD,MAAM,QAAQ,GAAG,SAAS,CAAC,KAAK,EAAE,aAAa,CAAC,CAAC;QAEjD,MAAM,aAAa,GAAG,gBACpB,EAAE,EAAE,KAAK,CAAC,EAAE,EACZ,GAAG,EAAE,KAAK,CAAC,GAAG,EACd,QAAQ,EAAE,iBAAiB,EAC3B,SAAS,EAAE,WAAW,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,EAChE,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,EACvD,cAAc,IACX,OAAO,CACwB,CAAC;QAErC,OAAO,IAAA,2BAAY,EAAC,OAAO,EAAE,aAAa,CAAC,CAAC;IAC9C,CAAC,CAAA,CAAC;IAEF,OAAO,iBAAiB,CAAC;AAC3B,CAAC,CAAC;AAEF,4DAA4D;AACrD,MAAM,wBAAwB,GAAG,CAGtC,EACA,KAAK,EACL,QAAQ,EACR,SAAS,GAcV,EAAE,EAAE;IACH,MAAM,sBAAsB,GAAG,CAAoC,MAAS,EAAE,EAAE,CAC9E,IAAA,aAAI,EAAC,MAAM,EAAE,MAAM,CAAC,IAAI,CAAC,qBAAY,CAAC,CAAC,CAAC;IAE1C,OAAO,8CAEF,IAAA,eAAM,EAAC,QAAQ,aAAR,QAAQ,cAAR,QAAQ,GAAK,EAAY,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,KAAK,SAAS,CAAC,GAEjE,IAAA,eAAM,EAAC,sBAAsB,CAAC,KAAK,aAAL,KAAK,cAAL,KAAK,GAAI,EAAE,CAAC,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,KAAK,SAAS,CAAC,GAE3E,IAAA,eAAM,EAAC,SAAS,aAAT,SAAS,cAAT,SAAS,GAAK,EAAY,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,KAAK,SAAS,CAAC,CAC7C,CAAC;AAC7B,CAAC,CAAC;AAhCW,QAAA,wBAAwB,4BAgCnC;AAEF,MAAM,qBAAqB,GAAG,CAC5B,KAAiE,EACjE,EAAE;;IACF,MAAM,EAAE,SAAS,EAAE,oBAAoB,EAAE,qBAAqB,EAAE,GAAG,CAAC,MAAA,KAAK,CAAC,KAAK,mCAC7E,EAAE,CAAwB,CAAC;IAE7B,MAAM,OAAO,GAAG,CAAC,GAAG,CAAC,SAAS,aAAT,SAAS,cAAT,SAAS,GAAI,EAAE,CAAC,CAAC,CAAC;IAEvC,IAAI,oBAAoB,EAAE;QACxB,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,mCAAmC,oBAAoB,EAAE,CAAC,CAAC;QAC3E,MAAM,CAAC,SAAS,EAAE,KAAK,CAAC,GAAG,oBAAoB,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC3D,MAAM,eAAe,GAAG,MAAM,IAAA,8BAAoB,EAAM,SAAS,EAAE,KAAK,EAAE,EAAE,EAAE,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,CAAC;QAC5F,OAAO,CAAC,IAAI,CAAC,GAAG,eAAe,CAAC,CAAC;KAClC;IAED,IAAI,qBAAqB,EAAE;QACzB,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,kCAAkC,CAAC,CAAC;QACpD,MAAM,UAAU,GAAG,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,qBAAqB,CAAC,2CAAI,CAAA,CAAC;QACrE,IAAI,UAAU;YAAE,OAAO,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,CAAC;KAC7C;IAED,OAAO,OAAO,CAAC;AACjB,CAAC,CAAA,CAAC","sourcesContent":["import {\n BasicCrawler,\n CrawlingContext,\n RouterHandler,\n BasicCrawlerOptions,\n CheerioCrawler,\n Router,\n HttpCrawler,\n JSDOMCrawler,\n PlaywrightCrawler,\n PuppeteerCrawler,\n Log,\n Request as CrawleeRequest,\n} from 'crawlee';\nimport { omitBy, pick, defaults } from 'lodash';\nimport { gotScraping } from 'got-scraping';\n\nimport type { CrawlerMeta, CrawlerType } from '../../types';\nimport type { MaybePromise, PickPartial } from '../../utils/types';\nimport { createErrorHandler } from '../error/errorHandler';\nimport { type PushDataOptions, itemCacheKey, pushData } from '../io/pushData';\nimport { getColumnFromDataset } from '../io/dataset';\nimport { PushRequestsOptions, pushRequests } from '../io/pushRequests';\nimport type { CrawleeOneIO } from '../integrations/types';\nimport { apifyIO } from '../integrations/apify';\nimport { registerHandlers, setupDefaultHandlers } from '../router/router';\nimport {\n CrawlerConfigActorInput,\n OutputActorInput,\n MetamorphActorInput,\n PrivacyActorInput,\n crawlerInput,\n StartUrlsActorInput,\n InputActorInput,\n RequestActorInput,\n AllActorInputs,\n LoggingActorInput,\n} from '../config';\nimport { logLevelHandlerWrapper, logLevelToCrawlee } from '../log';\nimport type { CrawleeOneTelemetry } from '../telemetry/types';\nimport type {\n CrawleeOneActorCtx,\n CrawleeOneActorDef,\n CrawleeOneHookCtx,\n CrawleeOneActorRouterCtx,\n Metamorph,\n RunCrawler,\n} from './types';\n\nconst actorClassByType = {\n basic: BasicCrawler,\n http: HttpCrawler,\n cheerio: CheerioCrawler,\n jsdom: JSDOMCrawler,\n playwright: PlaywrightCrawler,\n puppeteer: PuppeteerCrawler,\n} satisfies Record<CrawlerType, { new (options: Record<string, any>): any }>;\n\nconst isRouter = (r: any): r is RouterHandler<any> => {\n return !!((r as RouterHandler)?.addHandler && (r as RouterHandler)?.addDefaultHandler);\n};\n\nconst isFunc = (f: any): f is (...args: any[]) => any => {\n return typeof f === 'function';\n};\n\n/** Run a function that was defined as a string via Actor input */\nconst genHookFn = <\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO\n>(\n actor: Pick<CrawleeOneActorCtx<any, Input, TIO, any, any>, 'input' | 'state' | 'io'>,\n fnStr?: string\n) => {\n if (!fnStr) return null;\n\n const hookCtx = {\n io: actor.io,\n input: actor.input,\n state: actor.state,\n itemCacheKey,\n sendRequest: gotScraping,\n } satisfies CrawleeOneHookCtx<Input, TIO>;\n\n const hookFn = eval(fnStr);\n if (!hookFn) return null;\n\n return async (...args) => hookFn(...args, hookCtx);\n};\n\n/**\n * Options available when creating default configuration for an opinionated Crawlee actor,\n * which is then run within Apify's `Actor.main()` context.\n *\n * Apify context can be replaced with custom implementation using the `actorConfig.io` option.\n *\n * Read more about what this actor does at {@link createCrawleeOne}.\n */\nexport interface RunCrawleeOneOptions<\n TCrawlerType extends CrawlerType,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO,\n Telem extends CrawleeOneTelemetry<any, any> = CrawleeOneTelemetry<any, any>,\n Ctx extends CrawlerMeta<TCrawlerType, any>['context'] = CrawlingContext<BasicCrawler>\n> {\n /** String idetifying the actor class, e.g. `'cheerio'` */\n actorType: TCrawlerType;\n actorName: string;\n /** Config passed to the {@link createCrawleeOne} */\n actorConfig: PickPartial<\n CrawleeOneActorDef<Labels, Input, TIO, Telem, Ctx>,\n 'router' | 'createCrawler' | 'io' | 'telemetry'\n >;\n /**\n * If using default `createCrawler` implementation, these are crawler options\n * that may be overriden by user input.\n */\n crawlerConfigDefaults?: CrawlerMeta<TCrawlerType, any>['options'];\n /**\n * If using default `createCrawler` implementation, these are crawler options\n * that will override user input.\n *\n * This is useful for testing env.\n */\n crawlerConfigOverrides?: CrawlerMeta<TCrawlerType, any>['options'];\n /**\n * Callback with the created actor. The callback is called within\n * the `Actor.main()` context.\n */\n onActorReady?: (actor: CrawleeOneActorCtx<Labels, Input, TIO, Telem, Ctx>) => MaybePromise<void>;\n}\n\n/**\n * Create opinionated Crawlee crawler that uses, and run it within Apify's `Actor.main()` context.\n *\n * Apify context can be replaced with custom implementation using the `actorConfig.io` option.\n *\n * This function does the following for you:\n *\n * 1) Full TypeScript coverage - Ensure all components use the same Crawler / CrawlerContext.\n *\n * 2) Get Actor input from `Actor.getInput` if not given.\n *\n * 3) (Optional) Validate Actor input\n *\n * 4) Set up router such that requests that reach default route are\n * redirected to labelled routes based on which item from \"routes\" they match.\n *\n * 5) Register all route handlers for you.\n *\n * 6) (Optional) Wrap all route handlers in a wrapper. Use this e.g.\n * if you want to add a field to the context object, or handle errors\n * from a single place.\n *\n * 7) (Optional) Support transformation and filtering of (scraped) entries,\n * configured via Actor input.\n *\n * 8) (Optional) Support Actor metamorphing, configured via Actor input.\n *\n * 9) Apify context (e.g. calling `Actor.getInput`) can be replaced with custom\n * implementation using the `io` option.\n */\nexport const runCrawleeOne = async <\n TType extends CrawlerType,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO,\n Telem extends CrawleeOneTelemetry<any, any> = CrawleeOneTelemetry<any, any>,\n Ctx extends CrawlerMeta<TType, any>['context'] = CrawlerMeta<TType, any>['context']\n>(\n args: RunCrawleeOneOptions<TType, Labels, Input, TIO, Telem, Ctx>\n): Promise<void> => {\n const {\n actorType,\n actorName,\n actorConfig,\n crawlerConfigDefaults,\n crawlerConfigOverrides,\n onActorReady,\n } = args;\n\n const { io = apifyIO as any as TIO, telemetry } = actorConfig;\n\n // See docs:\n // - https://docs.apify.com/sdk/js/\n // - https://docs.apify.com/academy/deploying-your-code/inputs-outputs#accepting-input-with-the-apify-sdk\n // - https://docs.apify.com/sdk/js/docs/upgrading/upgrading-to-v3#apify-sdk\n await io.runInContext(\n async () => {\n await telemetry?.setup({ actorType, actorName, actorConfig: { ...actorConfig, io } });\n\n const actorDefaults: Pick<\n CrawleeOneActorDef<Labels, Input & AllActorInputs, TIO, Telem, Ctx>,\n 'router' | 'routeHandlerWrappers' | 'createCrawler'\n > = {\n router: Router.create<Ctx>(),\n routeHandlerWrappers: ({ input }) => [\n logLevelHandlerWrapper<Ctx, any>(input?.logLevel ?? 'info'),\n ],\n createCrawler: ({ router, proxy, input }) => {\n const options = createHttpCrawlerOptions<CrawlerMeta<TType, any>['options'], Input>({\n input,\n defaults: crawlerConfigDefaults,\n overrides: {\n requestHandler: router,\n proxyConfiguration: proxy,\n // Capture errors in a separate (Apify) Dataset and pass errors to telemetry\n failedRequestHandler: createErrorHandler({\n io,\n reportingDatasetId: input?.errorReportingDatasetId ?? 'REPORTING',\n sendToTelemetry: input?.errorSendToTelemetry ?? true,\n onSendErrorToTelemetry: telemetry?.onSendErrorToTelemetry,\n }),\n ...crawlerConfigOverrides,\n },\n });\n const CrawlerClass = actorClassByType[actorType] as any;\n return new CrawlerClass(options);\n },\n };\n\n const actor = await createCrawleeOne<Labels, Input, TIO, Telem, Ctx>({\n ...actorConfig,\n io,\n router: actorConfig.router ?? (actorDefaults.router as any),\n routeHandlerWrappers:\n actorConfig.routeHandlerWrappers ?? (actorDefaults.routeHandlerWrappers as any),\n createCrawler: actorConfig.createCrawler ?? (actorDefaults.createCrawler as any),\n });\n\n await onActorReady?.(actor);\n },\n { statusMessage: 'Crawling finished!' }\n );\n};\n\n/**\n * NOTE: If you want to run a scraper, see {@link runCrawleeOne}. This is lower-level\n * function that should be used only if you want to override the default behaviour of runCrawleeOne.\n *\n * Create opinionated Crawlee crawler that uses router for handling requests.\n *\n * This is a quality-of-life function that does the following for you:\n *\n * 1) Full TypeScript coverage - Ensure all components use the same Crawler / CrawlerContext.\n *\n * 2) Get Actor input from `Actor.getInput` if not given.\n *\n * 3) (Optional) Validate Actor input\n *\n * 4) Set up router such that requests that reach default route are\n * redirected to labelled routes based on which item from \"routes\" they match.\n *\n * 5) Register all route handlers for you.\n *\n * 6) (Optional) Wrap all route handlers in a wrapper. Use this e.g.\n * if you want to add a field to the context object, or handle errors\n * from a single place.\n *\n * 7) (Optional) Support transformation and filtering of (scraped) entries,\n * configured via Actor input.\n *\n * 8) (Optional) Support Actor metamorphing, configured via Actor input.\n *\n * 9) Apify context (e.g. calling `Actor.getInput`) can be replaced with custom\n * implementation using the `io` option.\n */\nconst createCrawleeOne = async <\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO,\n Telem extends CrawleeOneTelemetry<any, any> = CrawleeOneTelemetry<any, any>,\n Ctx extends CrawlingContext = CrawlingContext<BasicCrawler>\n>(\n config: PickPartial<CrawleeOneActorDef<Labels, Input, TIO, Telem, Ctx>, 'io'>\n): Promise<CrawleeOneActorCtx<Labels, Input, TIO, Telem, Ctx>> => {\n const { io = apifyIO as any as TIO, telemetry } = config;\n\n // Mutable state that is available to the actor hooks\n const state = {};\n\n // Initialize actor inputs\n const rawInput = config.input\n ? isFunc(config.input)\n ? await config.input({ ...config, io })\n : config.input\n : await io.getInput<Input>();\n const input = Object.freeze(await resolveInput<Input | null>(rawInput, state, { io }));\n\n if (config.validateInput) await config.validateInput(input);\n\n const { logLevel } = (input ?? {}) as LoggingActorInput;\n const log = new Log({ level: logLevel ? logLevelToCrawlee[logLevel] : undefined });\n\n // This is context that is available to options that use initialization function\n const getConfig = () => ({ ...config, input, state, io, log });\n\n // Set up proxy\n const defaultProxy =\n config.proxy == null ? await io.createDefaultProxyConfiguration(input ?? undefined) : undefined;\n const proxy =\n config.proxy == null\n ? defaultProxy\n : isFunc(config.proxy)\n ? await config.proxy(getConfig())\n : config.proxy;\n\n // Run initialization functions\n const router: RouterHandler<Ctx> = isRouter(config.router)\n ? config.router\n : await (config.router as any)(getConfig());\n const routes = isFunc(config.routes) ? await config.routes(getConfig()) : config.routes; // prettier-ignore\n const routeHandlers = isFunc(config.routeHandlers) ? await config.routeHandlers(getConfig()) : config.routeHandlers; // prettier-ignore\n const routeHandlerWrappers = isFunc(config.routeHandlerWrappers) ? await config.routeHandlerWrappers(getConfig()) : config.routeHandlerWrappers; // prettier-ignore\n\n // Create Crawlee crawler\n const getActorCtx = () => ({\n io,\n telemetry,\n router,\n routes,\n routeHandlers,\n proxy,\n config,\n input,\n state,\n log,\n });\n const crawler = await config.createCrawler(getActorCtx());\n\n // Create actor (our custom entity)\n const preActor = { crawler, ...getActorCtx() };\n const runCrawler = createScopedCrawlerRun(preActor);\n const metamorph = createScopedMetamorph(preActor);\n const scopedPushData = createScopedPushData(preActor);\n const scopedPushRequest = createScopedPushRequests(preActor);\n const startUrls = await getStartUrlsFromInput(preActor);\n\n const actor = {\n ...preActor,\n crawler,\n runCrawler,\n metamorph,\n pushData: scopedPushData,\n pushRequests: scopedPushRequest,\n startUrls,\n } satisfies CrawleeOneActorCtx<Labels, Input, TIO, Telem, Ctx>;\n\n // Extra data that we make available to the route handlers\n const routerContext = { actor, pushData: scopedPushData };\n\n // Set up router\n await setupDefaultHandlers<\n Ctx,\n CrawleeOneActorRouterCtx<Ctx, Labels, Input, TIO, Telem>,\n Labels,\n Input\n >({\n io,\n router,\n routeHandlerWrappers,\n routerContext,\n routes,\n routeHandlers,\n input,\n });\n\n // Register labelled handlers\n await registerHandlers<Ctx, CrawleeOneActorRouterCtx<Ctx, Labels, Input, TIO, Telem>, Labels>(\n router,\n routeHandlers,\n { routerContext, handlerWrappers: routeHandlerWrappers }\n );\n\n // Now that the actor is ready, enqueue the URLs right away\n await scopedPushRequest(startUrls as CrawleeRequest[]);\n\n return actor;\n};\n\nconst resolveInput = async <T extends Record<string, any> | null>(\n input: object | null,\n state: Record<string, unknown>,\n options?: { io?: CrawleeOneIO }\n) => {\n const { io = apifyIO as CrawleeOneIO } = options ?? {};\n const { inputExtendUrl, inputExtendFromFunction } = (input ?? {}) as InputActorInput;\n\n const inputFromUrl = inputExtendUrl ? await gotScraping.get(inputExtendUrl).json<object>() : null;\n const inputFn = genHookFn({ state, input, io }, inputExtendFromFunction);\n const inputFromFunc = (await inputFn?.()) ?? null;\n const extendedInput = { ...inputFromUrl, ...inputFromFunc, ...input };\n\n return extendedInput as T;\n};\n\n/**\n * Create a function that wraps `crawler.run(requests, runOtions)` with additional\n * features like:\n * - Automatically metamorph into another actor after the run finishes\n */\nconst createScopedCrawlerRun = <\n Ctx extends CrawlingContext<any> = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO,\n Telem extends CrawleeOneTelemetry<any, any> = CrawleeOneTelemetry<any, any>\n>(\n actor: Omit<\n CrawleeOneActorCtx<Labels, Input, TIO, Telem, Ctx>,\n 'runCrawler' | 'metamorph' | 'pushData' | 'pushRequests' | 'startUrls'\n >\n) => {\n const {\n requestTransformBefore,\n requestTransformAfter,\n requestFilterBefore,\n requestFilterAfter,\n outputTransformBefore,\n outputTransformAfter,\n outputFilterBefore,\n outputFilterAfter,\n outputCacheStoreId,\n outputCacheActionOnResult,\n } = (actor.input ?? {}) as OutputActorInput & RequestActorInput;\n\n const metamorph = createScopedMetamorph(actor);\n\n const runCrawler: RunCrawler<Ctx> = async (requests, options) => {\n // Clear cache if it was set from the input\n if (outputCacheStoreId && outputCacheActionOnResult === 'overwrite') {\n const store = await actor.io.openKeyValueStore(outputCacheStoreId);\n await store.drop();\n }\n\n await genHookFn(actor, outputTransformBefore)?.();\n await genHookFn(actor, outputFilterBefore)?.();\n await genHookFn(actor, requestTransformBefore)?.();\n await genHookFn(actor, requestFilterBefore)?.();\n\n const runRes = await actor.crawler.run(requests, options);\n\n await genHookFn(actor, outputTransformAfter)?.();\n await genHookFn(actor, outputFilterAfter)?.();\n await genHookFn(actor, requestTransformAfter)?.();\n await genHookFn(actor, requestFilterAfter)?.();\n\n // Trigger metamorph if it was set from the input\n await metamorph();\n\n return runRes;\n };\n\n return runCrawler;\n};\n\n/** Create a function that triggers metamorph, using Actor's inputs as defaults. */\nconst createScopedMetamorph = (actor: Pick<CrawleeOneActorCtx, 'input' | 'io'>) => {\n // Trigger metamorph if it was set from the input\n const metamorph: Metamorph = async (overrides?: MetamorphActorInput) => {\n const {\n metamorphActorId,\n metamorphActorBuild,\n metamorphActorInput,\n } = defaults({}, overrides, actor.input ?? {}); // prettier-ignore\n\n if (!metamorphActorId) return;\n\n await actor.io.triggerDownstreamCrawler(metamorphActorId, metamorphActorInput, {\n build: metamorphActorBuild,\n });\n };\n\n return metamorph;\n};\n\n/** pushData wrapper that pre-populates options based on actor input */\nconst createScopedPushData = (\n actor: Pick<CrawleeOneActorCtx, 'input' | 'state' | 'io' | 'log'>\n) => {\n const {\n includePersonalData,\n requestQueueId,\n outputMaxEntries,\n outputTransform,\n outputFilter,\n outputDatasetId,\n outputPickFields,\n outputRenameFields,\n outputCacheStoreId,\n outputCachePrimaryKeys,\n outputCacheActionOnResult,\n } = (actor.input ?? {}) as OutputActorInput & PrivacyActorInput & RequestActorInput;\n\n const scopedPushData: CrawleeOneActorCtx['pushData'] = async (entries, ctx, options) => {\n const transformFn = genHookFn(actor, outputTransform);\n const filterFn = genHookFn(actor, outputFilter);\n\n const mergedOptions = {\n io: actor.io,\n log: actor.log,\n showPrivate: includePersonalData,\n maxCount: outputMaxEntries,\n pickKeys: outputPickFields,\n remapKeys: outputRenameFields,\n transform: transformFn ? (item) => transformFn(item) : undefined,\n filter: filterFn ? (item) => filterFn(item) : undefined,\n datasetId: outputDatasetId,\n requestQueueId,\n cacheStoreId: outputCacheStoreId,\n cachePrimaryKeys: outputCachePrimaryKeys,\n cacheActionOnResult: outputCacheActionOnResult,\n ...options,\n } satisfies PushDataOptions<object>;\n\n return pushData(entries, ctx, mergedOptions);\n };\n\n return scopedPushData;\n};\n\n/** pushRequests wrapper that pre-populates options based on actor input */\nconst createScopedPushRequests = (\n actor: Pick<CrawleeOneActorCtx, 'input' | 'state' | 'io' | 'log'>\n) => {\n const { requestQueueId, requestMaxEntries, requestTransform, requestFilter } = (actor.input ??\n {}) as RequestActorInput;\n\n const scopedPushRequest: CrawleeOneActorCtx['pushRequests'] = async (entries, options) => {\n const transformFn = genHookFn(actor, requestTransform);\n const filterFn = genHookFn(actor, requestFilter);\n\n const mergedOptions = {\n io: actor.io,\n log: actor.log,\n maxCount: requestMaxEntries,\n transform: transformFn ? (item) => transformFn(item) : undefined,\n filter: filterFn ? (item) => filterFn(item) : undefined,\n requestQueueId,\n ...options,\n } satisfies PushRequestsOptions<any>;\n\n return pushRequests(entries, mergedOptions);\n };\n\n return scopedPushRequest;\n};\n\n/** Given the actor input, create common crawler options. */\nexport const createHttpCrawlerOptions = <\n TOpts extends BasicCrawlerOptions<any> = BasicCrawlerOptions,\n Input extends Record<string, any> = Record<string, any>\n>({\n input,\n defaults,\n overrides,\n}: {\n /** Actor input */\n input: Input | null;\n /**\n * Default config options set by us. These may be overriden\n * by values from actor input (set by user).\n */\n defaults?: TOpts;\n /**\n * These config options will overwrite both the default and user\n * options. This is useful for hard-setting values e.g. in tests.\n */\n overrides?: TOpts;\n}) => {\n const pickCrawlerInputFields = <T extends CrawlerConfigActorInput>(config: T) =>\n pick(config, Object.keys(crawlerInput));\n\n return {\n // ----- 1. DEFAULTS -----\n ...omitBy(defaults ?? ({} as TOpts), (field) => field === undefined),\n // ----- 2. CONFIG FROM INPUT -----\n ...omitBy(pickCrawlerInputFields(input ?? {}), (field) => field === undefined),\n // ----- 3. OVERRIDES - E.G. TEST CONFIG -----\n ...omitBy(overrides ?? ({} as TOpts), (field) => field === undefined),\n } satisfies Partial<TOpts>;\n};\n\nconst getStartUrlsFromInput = async (\n actor: Pick<CrawleeOneActorCtx, 'input' | 'state' | 'io' | 'log'>\n) => {\n const { startUrls, startUrlsFromDataset, startUrlsFromFunction } = (actor.input ??\n {}) as StartUrlsActorInput;\n\n const urlsAgg = [...(startUrls ?? [])];\n\n if (startUrlsFromDataset) {\n actor.log.debug(`Loading start URLs from Dataset ${startUrlsFromDataset}`);\n const [datasetId, field] = startUrlsFromDataset.split('#');\n const urlsFromDataset = await getColumnFromDataset<any>(datasetId, field, { io: actor.io });\n urlsAgg.push(...urlsFromDataset);\n }\n\n if (startUrlsFromFunction) {\n actor.log.debug(`Loading start URLs from function`);\n const urlsFromFn = await genHookFn(actor, startUrlsFromFunction)?.();\n if (urlsFromFn) urlsAgg.push(...urlsFromFn);\n }\n\n return urlsAgg;\n};\n"]}