crawlee-one 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/README.md +81 -0
  2. package/dist/cjs/cli/cli.d.ts +1 -0
  3. package/dist/cjs/cli/cli.js +61 -0
  4. package/dist/cjs/cli/cli.js.map +1 -0
  5. package/dist/cjs/cli/index.d.ts +2 -0
  6. package/dist/cjs/cli/index.js +6 -0
  7. package/dist/cjs/cli/index.js.map +1 -0
  8. package/dist/cjs/index.d.ts +24 -0
  9. package/dist/cjs/index.js +43 -0
  10. package/dist/cjs/index.js.map +1 -0
  11. package/dist/cjs/lib/actions/dom.d.ts +102 -0
  12. package/dist/cjs/lib/actions/dom.js +743 -0
  13. package/dist/cjs/lib/actions/dom.js.map +1 -0
  14. package/dist/cjs/lib/actions/domUtils.d.ts +42 -0
  15. package/dist/cjs/lib/actions/domUtils.js +126 -0
  16. package/dist/cjs/lib/actions/domUtils.js.map +1 -0
  17. package/dist/cjs/lib/actions/page.d.ts +69 -0
  18. package/dist/cjs/lib/actions/page.js +205 -0
  19. package/dist/cjs/lib/actions/page.js.map +1 -0
  20. package/dist/cjs/lib/actions/scrapeListing.d.ts +78 -0
  21. package/dist/cjs/lib/actions/scrapeListing.js +242 -0
  22. package/dist/cjs/lib/actions/scrapeListing.js.map +1 -0
  23. package/dist/cjs/lib/actor/actor.d.ts +90 -0
  24. package/dist/cjs/lib/actor/actor.js +306 -0
  25. package/dist/cjs/lib/actor/actor.js.map +1 -0
  26. package/dist/cjs/lib/actor/types.d.ts +162 -0
  27. package/dist/cjs/lib/actor/types.js +3 -0
  28. package/dist/cjs/lib/actor/types.js.map +1 -0
  29. package/dist/cjs/lib/actor.d.ts +189 -0
  30. package/dist/cjs/lib/actor.js +225 -0
  31. package/dist/cjs/lib/actor.js.map +1 -0
  32. package/dist/cjs/lib/actorSpec.d.ts +20 -0
  33. package/dist/cjs/lib/actorSpec.js +3 -0
  34. package/dist/cjs/lib/actorSpec.js.map +1 -0
  35. package/dist/cjs/lib/config.d.ts +561 -0
  36. package/dist/cjs/lib/config.js +707 -0
  37. package/dist/cjs/lib/config.js.map +1 -0
  38. package/dist/cjs/lib/dataset/maxCount.d.ts +30 -0
  39. package/dist/cjs/lib/dataset/maxCount.js +55 -0
  40. package/dist/cjs/lib/dataset/maxCount.js.map +1 -0
  41. package/dist/cjs/lib/dataset/pushData.d.ts +123 -0
  42. package/dist/cjs/lib/dataset/pushData.js +182 -0
  43. package/dist/cjs/lib/dataset/pushData.js.map +1 -0
  44. package/dist/cjs/lib/dataset.d.ts +98 -0
  45. package/dist/cjs/lib/dataset.js +122 -0
  46. package/dist/cjs/lib/dataset.js.map +1 -0
  47. package/dist/cjs/lib/dom.d.ts +78 -0
  48. package/dist/cjs/lib/dom.js +243 -0
  49. package/dist/cjs/lib/dom.js.map +1 -0
  50. package/dist/cjs/lib/error/errorHandler.d.ts +112 -0
  51. package/dist/cjs/lib/error/errorHandler.js +164 -0
  52. package/dist/cjs/lib/error/errorHandler.js.map +1 -0
  53. package/dist/cjs/lib/error/sentry.d.ts +11 -0
  54. package/dist/cjs/lib/error/sentry.js +60 -0
  55. package/dist/cjs/lib/error/sentry.js.map +1 -0
  56. package/dist/cjs/lib/integrations/apify.d.ts +67 -0
  57. package/dist/cjs/lib/integrations/apify.js +106 -0
  58. package/dist/cjs/lib/integrations/apify.js.map +1 -0
  59. package/dist/cjs/lib/integrations/types.d.ts +274 -0
  60. package/dist/cjs/lib/integrations/types.js +3 -0
  61. package/dist/cjs/lib/integrations/types.js.map +1 -0
  62. package/dist/cjs/lib/io/dataset.d.ts +67 -0
  63. package/dist/cjs/lib/io/dataset.js +86 -0
  64. package/dist/cjs/lib/io/dataset.js.map +1 -0
  65. package/dist/cjs/lib/io/maxCount.d.ts +30 -0
  66. package/dist/cjs/lib/io/maxCount.js +55 -0
  67. package/dist/cjs/lib/io/maxCount.js.map +1 -0
  68. package/dist/cjs/lib/io/pushData.d.ts +124 -0
  69. package/dist/cjs/lib/io/pushData.js +193 -0
  70. package/dist/cjs/lib/io/pushData.js.map +1 -0
  71. package/dist/cjs/lib/io/pushRequests.d.ts +38 -0
  72. package/dist/cjs/lib/io/pushRequests.js +63 -0
  73. package/dist/cjs/lib/io/pushRequests.js.map +1 -0
  74. package/dist/cjs/lib/io/requestQueue.d.ts +28 -0
  75. package/dist/cjs/lib/io/requestQueue.js +40 -0
  76. package/dist/cjs/lib/io/requestQueue.js.map +1 -0
  77. package/dist/cjs/lib/log.d.ts +38 -0
  78. package/dist/cjs/lib/log.js +54 -0
  79. package/dist/cjs/lib/log.js.map +1 -0
  80. package/dist/cjs/lib/migrate/localMigrator.d.ts +10 -0
  81. package/dist/cjs/lib/migrate/localMigrator.js +57 -0
  82. package/dist/cjs/lib/migrate/localMigrator.js.map +1 -0
  83. package/dist/cjs/lib/migrate/localState.d.ts +7 -0
  84. package/dist/cjs/lib/migrate/localState.js +43 -0
  85. package/dist/cjs/lib/migrate/localState.js.map +1 -0
  86. package/dist/cjs/lib/migrate/types.d.ts +6 -0
  87. package/dist/cjs/lib/migrate/types.js +3 -0
  88. package/dist/cjs/lib/migrate/types.js.map +1 -0
  89. package/dist/cjs/lib/readme/readme.d.ts +65 -0
  90. package/dist/cjs/lib/readme/readme.js +534 -0
  91. package/dist/cjs/lib/readme/readme.js.map +1 -0
  92. package/dist/cjs/lib/readme/types.d.ts +260 -0
  93. package/dist/cjs/lib/readme/types.js +54 -0
  94. package/dist/cjs/lib/readme/types.js.map +1 -0
  95. package/dist/cjs/lib/router.d.ts +132 -0
  96. package/dist/cjs/lib/router.js +165 -0
  97. package/dist/cjs/lib/router.js.map +1 -0
  98. package/dist/cjs/lib/scraper/scrapeListing.d.ts +78 -0
  99. package/dist/cjs/lib/scraper/scrapeListing.js +242 -0
  100. package/dist/cjs/lib/scraper/scrapeListing.js.map +1 -0
  101. package/dist/cjs/lib/test/actor.d.ts +21 -0
  102. package/dist/cjs/lib/test/actor.js +56 -0
  103. package/dist/cjs/lib/test/actor.js.map +1 -0
  104. package/dist/cjs/lib/test/mockApifyClient.d.ts +32 -0
  105. package/dist/cjs/lib/test/mockApifyClient.js +176 -0
  106. package/dist/cjs/lib/test/mockApifyClient.js.map +1 -0
  107. package/dist/cjs/types.d.ts +31 -0
  108. package/dist/cjs/types.js +3 -0
  109. package/dist/cjs/types.js.map +1 -0
  110. package/dist/cjs/utils/async.d.ts +19 -0
  111. package/dist/cjs/utils/async.js +74 -0
  112. package/dist/cjs/utils/async.js.map +1 -0
  113. package/dist/cjs/utils/error.d.ts +1 -0
  114. package/dist/cjs/utils/error.js +10 -0
  115. package/dist/cjs/utils/error.js.map +1 -0
  116. package/dist/cjs/utils/format.d.ts +9 -0
  117. package/dist/cjs/utils/format.js +19 -0
  118. package/dist/cjs/utils/format.js.map +1 -0
  119. package/dist/cjs/utils/package.d.ts +15 -0
  120. package/dist/cjs/utils/package.js +25 -0
  121. package/dist/cjs/utils/package.js.map +1 -0
  122. package/dist/cjs/utils/types.d.ts +6 -0
  123. package/dist/cjs/utils/types.js +9 -0
  124. package/dist/cjs/utils/types.js.map +1 -0
  125. package/dist/cjs/utils/url.d.ts +9 -0
  126. package/dist/cjs/utils/url.js +32 -0
  127. package/dist/cjs/utils/url.js.map +1 -0
  128. package/dist/cjs/utils/valueMonitor.d.ts +31 -0
  129. package/dist/cjs/utils/valueMonitor.js +91 -0
  130. package/dist/cjs/utils/valueMonitor.js.map +1 -0
  131. package/package.json +85 -0
@@ -0,0 +1,189 @@
1
+ /// <reference types="lodash" />
2
+ import { Actor } from 'apify';
3
+ import { BasicCrawler, CrawlingContext, RouterHandler, ProxyConfiguration, BasicCrawlerOptions } from 'crawlee';
4
+ import * as Sentry from '@sentry/node';
5
+ import type { CrawlerMeta, CrawlerType } from '../types';
6
+ import type { MaybePromise } from '../utils/types';
7
+ import { RouteHandler, CrawlerRouterWrapper, RouteMatcher } from './router';
8
+ import { MetamorphActorInput } from './config';
9
+ import { itemCacheKey, pushData } from './dataset/pushData';
10
+ type MaybeAsyncFn<R, Args extends any[]> = R | ((...args: Args) => MaybePromise<R>);
11
+ export interface ActorDefinition<Ctx extends CrawlingContext = CrawlingContext<BasicCrawler>, Labels extends string = string, Input extends Record<string, any> = Record<string, any>> {
12
+ /**
13
+ * Actor input which you can get e.g. via `Actor.getInput()`
14
+ *
15
+ * Input is automatically retrieved if undefined.
16
+ */
17
+ input?: MaybeAsyncFn<Input, [ActorDefinition<Ctx, Labels, Input>]>;
18
+ /** Validation for the actor input. Should throw error if validation fails. */
19
+ validateInput?: (input: Input | null) => MaybePromise<void>;
20
+ /**
21
+ * Router instance that redirects the request to handlers.
22
+ * @example
23
+ * import { createCheerioRouter } from 'crawlee';
24
+ *
25
+ * ({
26
+ * ...
27
+ * router: createCheerioRouter(),
28
+ * })
29
+ */
30
+ router: MaybeAsyncFn<RouterHandler<Ctx>, [ActorDefinitionWithInput<Ctx, Labels, Input>]>;
31
+ /**
32
+ * Criteria that un-labelled requests are matched against.
33
+ *
34
+ * E.g. If `match` function returns truthy value,
35
+ * the request is passed to the `action` function for processing.
36
+ *
37
+ * @example
38
+ * ({
39
+ * ...
40
+ * routes: [{
41
+ * // If match returns true, the request is forwarded to handler
42
+ * // with label JOB_DETAIL.
43
+ * name: 'Job detail',
44
+ * handlerLabel: routeLabels.JOB_DETAIL,
45
+ * match: (url) => isUrlOfJobOffer(url),
46
+ * }, {
47
+ * // Define custom action function:
48
+ * // If match returns true, we replace this request with new one
49
+ * // pointing to new domain.
50
+ * name: 'Main page',
51
+ * handlerLabel: null,
52
+ * match: (url) => url.match(/example\.com\/?(?:[?#~]|$)/i),
53
+ * action: async (url, ctx, _, handlers) => {
54
+ * ctx.log.info(`Redirecting to https://www.new-domain.com`);
55
+ * await ctx.crawler.addRequests(['https://www.new-domain.com'], { forefront: true });
56
+ * },
57
+ * }],
58
+ * })
59
+ */
60
+ routes: MaybeAsyncFn<RouteMatcher<Ctx, ActorRouterContext<Ctx, Labels, Input>, Labels>[], [
61
+ ActorDefinitionWithInput<Ctx, Labels, Input>
62
+ ]>;
63
+ /** Handlers for the labelled requests. The object keys are the labels. */
64
+ routeHandlers: MaybeAsyncFn<Record<Labels, RouteHandler<Ctx, ActorRouterContext<Ctx, Labels, Input>>>, [ActorDefinitionWithInput<Ctx, Labels, Input>]>;
65
+ /**
66
+ * Provides the option to modify or extend all router handlers by wrapping
67
+ * them in these functions.
68
+ *
69
+ * Wrappers are applied from right to left. That means that wrappers `[A, B, C]`
70
+ * will be applied like so `A( B( C( handler ) ) )`.
71
+ */
72
+ routerWrappers?: MaybeAsyncFn<CrawlerRouterWrapper<Ctx, ActorRouterContext<Ctx, Labels, Input>>[], [ActorDefinitionWithInput<Ctx, Labels, Input>]>;
73
+ proxy?: MaybeAsyncFn<ProxyConfiguration, [ActorDefinitionWithInput<Ctx, Labels, Input>]>;
74
+ createCrawler: (actorCtx: Omit<ActorContext<Ctx, Labels, Input>, 'crawler' | 'runCrawler' | 'metamorph' | 'pushData'>) => MaybePromise<Ctx['crawler']>;
75
+ }
76
+ /** ActorDefinition object where the input is already resolved */
77
+ export type ActorDefinitionWithInput<Ctx extends CrawlingContext = CrawlingContext<BasicCrawler>, Labels extends string = string, Input extends Record<string, any> = Record<string, any>> = Omit<ActorDefinition<Ctx, Labels, Input>, 'input'> & {
78
+ input: Input | null;
79
+ state: Record<string, unknown>;
80
+ };
81
+ /** Context available while creating an Apify/Crawlee crawler */
82
+ export interface ActorContext<Ctx extends CrawlingContext = CrawlingContext<BasicCrawler>, Labels extends string = string, Input extends Record<string, any> = Record<string, any>> {
83
+ crawler: Ctx['crawler'];
84
+ /**
85
+ * This function wraps `crawler.run(requests, runOtions)` with additional
86
+ * features:
87
+ * - Automatically metamorph into another actor after the run finishes
88
+ */
89
+ runCrawler: RunCrawler<Ctx>;
90
+ /** Trigger actor metamorph, using actor's inputs as defaults. */
91
+ metamorph: Metamorph;
92
+ pushData: typeof pushData;
93
+ proxy?: ProxyConfiguration;
94
+ router: RouterHandler<Ctx>;
95
+ routes: RouteMatcher<Ctx, ActorRouterContext<Ctx, Labels, Input>, Labels>[];
96
+ routeHandlers: Record<Labels, RouteHandler<Ctx, ActorRouterContext<Ctx, Labels, Input>>>;
97
+ /** Original config from which this actor context was created */
98
+ config: ActorDefinition<Ctx, Labels, Input>;
99
+ /** Read-only inputs passed to the actor */
100
+ input: Input | null;
101
+ /** Mutable state that is shared across setup and teardown hooks */
102
+ state: Record<string, unknown>;
103
+ }
104
+ type OrigRunCrawler<T extends CrawlingContext<any, any>> = BasicCrawler<T>['run'];
105
+ /** Extended type of `crawler.run()` function */
106
+ export type RunCrawler<Ctx extends CrawlingContext = CrawlingContext<BasicCrawler>> = (requests?: Parameters<OrigRunCrawler<Ctx>>[0], options?: Parameters<OrigRunCrawler<Ctx>>[1]) => ReturnType<OrigRunCrawler<Ctx>>;
107
+ /** Trigger actor metamorph, using actor's inputs as defaults. */
108
+ export type Metamorph = (overrides?: MetamorphActorInput) => Promise<void>;
109
+ /** Context passed to user-defined functions passed from input */
110
+ export type ActorHookContext = Pick<ActorContext, 'input' | 'state'> & {
111
+ Actor: typeof Actor;
112
+ itemCacheKey: typeof itemCacheKey;
113
+ };
114
+ /** Context passed to route handlers */
115
+ export type ActorRouterContext<Ctx extends CrawlingContext = CrawlingContext<BasicCrawler>, Labels extends string = string, Input extends Record<string, any> = Record<string, any>> = {
116
+ actor: ActorContext<Ctx, Labels, Input>;
117
+ };
118
+ /**
119
+ * Create opinionated Apify crawler that uses router for handling requests.
120
+ *
121
+ * This is a quality-of-life function that does the following for you:
122
+ *
123
+ * 1) TypeScript - Ensure all components use the same Crawler / CrawlerContext.
124
+ *
125
+ * 2) Get Actor input from `Actor.getInput` if not given.
126
+ *
127
+ * 3) (Optional) Validate Actor input
128
+ *
129
+ * 4) Set up router such that requests that reach default route are
130
+ * redirected to labelled routes based on the "routes" items.
131
+ *
132
+ * 5) Register all route handlers for you.
133
+ *
134
+ * 6) (Optional) Wrap all route handlers in a wrapper. Use this e.g.
135
+ * if you want to add a field to the context object, or handle errors
136
+ * from a single place.
137
+ */
138
+ export declare const createApifyActor: <Ctx extends CrawlingContext<unknown, import("crawlee").Dictionary> = CrawlingContext<BasicCrawler<import("crawlee").BasicCrawlingContext<import("crawlee").Dictionary>>, import("crawlee").Dictionary>, Labels extends string = string, Input extends Record<string, any> = Record<string, any>>(config: ActorDefinition<Ctx, Labels, Input>) => Promise<ActorContext<Ctx, Labels, Input>>;
139
+ /**
140
+ * Create default configuration for an Apify actor
141
+ * and run the actor within the `Actor.main()` context.
142
+ */
143
+ export declare const createAndRunApifyActor: <TCrawlerType extends CrawlerType, Ctx extends CrawlerMeta<TCrawlerType, any>["context"] = CrawlingContext<BasicCrawler<import("crawlee").BasicCrawlingContext<import("crawlee").Dictionary>>, import("crawlee").Dictionary>, Labels extends string = string, Input extends Record<string, any> = Record<string, any>>({ actorType, actorName, actorConfig, crawlerConfigDefaults, crawlerConfigOverrides, sentryOptions, onActorReady, }: {
144
+ /** String idetifying the actor class, e.g. `'cheerio'` */
145
+ actorType: TCrawlerType;
146
+ actorName: string;
147
+ /** Config passed to the {@link createApifyActor} */
148
+ actorConfig: Omit<ActorDefinition<Ctx, Labels, Input>, "router" | "createCrawler"> & Partial<Pick<ActorDefinition<Ctx, Labels, Input>, "router" | "createCrawler">>;
149
+ /**
150
+ * If using default `createCrawler` implementation, these are crawler options
151
+ * that may be overriden by user input.
152
+ */
153
+ crawlerConfigDefaults?: CrawlerMeta<TCrawlerType, any>["options"] | undefined;
154
+ /**
155
+ * If using default `createCrawler` implementation, these are crawler options
156
+ * that will override user input.
157
+ *
158
+ * This is useful for testing env.
159
+ */
160
+ crawlerConfigOverrides?: CrawlerMeta<TCrawlerType, any>["options"] | undefined;
161
+ /**
162
+ * Sentry configuration. If using default `createCrawler` implementation,
163
+ * failed requests are optionally reported to Sentry.
164
+ *
165
+ * To disable Sentry, set `"enabled": false`.
166
+ */
167
+ sentryOptions?: Sentry.NodeOptions | undefined;
168
+ /**
169
+ * Callback with the created actor. The callback is called within
170
+ * the `Actor.main()` context.
171
+ */
172
+ onActorReady?: ((actor: ActorContext<Ctx, Labels, Input>) => MaybePromise<void>) | undefined;
173
+ }) => Promise<void>;
174
+ /** Given the actor input, create common crawler options. */
175
+ export declare const createHttpCrawlerOptions: <TOpts extends BasicCrawlerOptions<any> = BasicCrawlerOptions<import("crawlee").BasicCrawlingContext<import("crawlee").Dictionary>>, Input extends Record<string, any> = Record<string, any>>({ input, defaults, overrides, }: {
176
+ /** Actor input */
177
+ input: Input | null;
178
+ /**
179
+ * Default config options set by us. These may be overriden
180
+ * by values from actor input (set by user).
181
+ */
182
+ defaults?: TOpts | undefined;
183
+ /**
184
+ * These config options will overwrite both the default and user
185
+ * options. This is useful for hard-setting values e.g. in tests.
186
+ */
187
+ overrides?: TOpts | undefined;
188
+ }) => Partial<TOpts> & import("lodash").Dictionary<TOpts["requestHandler"] | TOpts["handleRequestFunction"] | TOpts["requestList"] | TOpts["requestQueue"] | TOpts["requestHandlerTimeoutSecs"] | TOpts["handleRequestTimeoutSecs"] | TOpts["errorHandler"] | TOpts["failedRequestHandler"] | TOpts["handleFailedRequestFunction"] | TOpts["maxRequestRetries"] | TOpts["maxRequestsPerCrawl"] | TOpts["autoscaledPoolOptions"] | TOpts["minConcurrency"] | TOpts["maxConcurrency"] | TOpts["maxRequestsPerMinute"] | TOpts["keepAlive"] | TOpts["useSessionPool"] | TOpts["sessionPoolOptions"] | TOpts["loggingInterval"] | TOpts["log"]>;
189
+ export {};
@@ -0,0 +1,225 @@
1
+ "use strict";
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ exports.createHttpCrawlerOptions = exports.createAndRunApifyActor = exports.createApifyActor = void 0;
13
+ const apify_1 = require("apify");
14
+ const crawlee_1 = require("crawlee");
15
+ const lodash_1 = require("lodash");
16
+ const router_1 = require("./router");
17
+ const config_1 = require("./config");
18
+ const errorHandler_1 = require("./error/errorHandler");
19
+ const sentry_1 = require("./error/sentry");
20
+ const log_1 = require("./log");
21
+ const pushData_1 = require("./dataset/pushData");
22
+ const isRouter = (r) => {
23
+ return !!(r.addHandler && r.addDefaultHandler);
24
+ };
25
+ const isFunc = (f) => {
26
+ return typeof f === 'function';
27
+ };
28
+ /** Run a function that was defined as a string via Actor input */
29
+ const evalInputHook = (actor, fnStr, args = []) => __awaiter(void 0, void 0, void 0, function* () {
30
+ if (!fnStr)
31
+ return;
32
+ const hookCtx = {
33
+ Actor: apify_1.Actor,
34
+ input: actor.input,
35
+ state: actor.state,
36
+ itemCacheKey: pushData_1.itemCacheKey,
37
+ };
38
+ const hookFn = eval(fnStr);
39
+ yield hookFn(...args, hookCtx);
40
+ });
41
+ /**
42
+ * Create opinionated Apify crawler that uses router for handling requests.
43
+ *
44
+ * This is a quality-of-life function that does the following for you:
45
+ *
46
+ * 1) TypeScript - Ensure all components use the same Crawler / CrawlerContext.
47
+ *
48
+ * 2) Get Actor input from `Actor.getInput` if not given.
49
+ *
50
+ * 3) (Optional) Validate Actor input
51
+ *
52
+ * 4) Set up router such that requests that reach default route are
53
+ * redirected to labelled routes based on the "routes" items.
54
+ *
55
+ * 5) Register all route handlers for you.
56
+ *
57
+ * 6) (Optional) Wrap all route handlers in a wrapper. Use this e.g.
58
+ * if you want to add a field to the context object, or handle errors
59
+ * from a single place.
60
+ */
61
+ const createApifyActor = (config) => __awaiter(void 0, void 0, void 0, function* () {
62
+ // Initialize actor inputs
63
+ const input = Object.freeze(config.input
64
+ ? isFunc(config.input)
65
+ ? yield config.input(Object.assign({}, config))
66
+ : config.input
67
+ : yield apify_1.Actor.getInput());
68
+ if (config.validateInput)
69
+ yield config.validateInput(input);
70
+ // Mutable state that is available to the actor hooks
71
+ const state = {};
72
+ // This is context that is available to options that use initialization function
73
+ const getConfig = () => (Object.assign(Object.assign({}, config), { input, state }));
74
+ // Set up proxy
75
+ const defaultProxy = config.proxy == null && process.env.APIFY_IS_AT_HOME
76
+ ? yield apify_1.Actor.createProxyConfiguration(input === null || input === void 0 ? void 0 : input.proxy)
77
+ : undefined;
78
+ const proxy = config.proxy == null
79
+ ? defaultProxy
80
+ : isFunc(config.proxy)
81
+ ? yield config.proxy(getConfig())
82
+ : config.proxy;
83
+ // Run initialization functions
84
+ const router = isRouter(config.router)
85
+ ? config.router
86
+ : yield config.router(getConfig());
87
+ const routes = isFunc(config.routes) ? yield config.routes(getConfig()) : config.routes; // prettier-ignore
88
+ const routeHandlers = isFunc(config.routeHandlers) ? yield config.routeHandlers(getConfig()) : config.routeHandlers; // prettier-ignore
89
+ const routerWrappers = isFunc(config.routerWrappers) ? yield config.routerWrappers(getConfig()) : config.routerWrappers; // prettier-ignore
90
+ // Create Crawlee crawler
91
+ const getActorCtx = () => ({ router, routes, routeHandlers, proxy, config, input, state });
92
+ const crawler = yield config.createCrawler(getActorCtx());
93
+ // Create actor (our custom entity)
94
+ const preActor = Object.assign({ crawler }, getActorCtx());
95
+ const runCrawler = createScopedCrawlerRun(preActor);
96
+ const metamorph = createScopedMetamorph(preActor);
97
+ const scopedPushData = createScopedPushData(preActor);
98
+ const actor = Object.assign(Object.assign({}, preActor), { crawler,
99
+ runCrawler,
100
+ metamorph, pushData: scopedPushData });
101
+ // Extra data that we make available to the route handlers
102
+ const routerContext = { actor, pushData: scopedPushData };
103
+ // Set up router
104
+ yield (0, router_1.setupDefaultRoute)({
105
+ router,
106
+ routerWrappers,
107
+ routerContext,
108
+ routes,
109
+ routeHandlers,
110
+ });
111
+ yield (0, router_1.registerHandlers)({
112
+ router,
113
+ routerWrappers,
114
+ routerContext,
115
+ routeHandlers,
116
+ });
117
+ return actor;
118
+ });
119
+ exports.createApifyActor = createApifyActor;
120
+ /** Create a function that triggers metamorph, using Actor's inputs as defaults. */
121
+ const createScopedMetamorph = (actor) => {
122
+ // Trigger metamorph if it was set from the input
123
+ const metamorph = (overrides) => __awaiter(void 0, void 0, void 0, function* () {
124
+ var _a;
125
+ const { metamorphActorId, metamorphActorBuild, metamorphActorInput, } = (0, lodash_1.defaults)({}, overrides, (_a = actor.input) !== null && _a !== void 0 ? _a : {}); // prettier-ignore
126
+ if (!metamorphActorId)
127
+ return;
128
+ yield apify_1.Actor.metamorph(metamorphActorId, metamorphActorInput, { build: metamorphActorBuild });
129
+ });
130
+ return metamorph;
131
+ };
132
+ /** pushData wrapper that pre-populates options based on actor input */
133
+ const createScopedPushData = (actor) => {
134
+ const scopedPushData = (entries, ctx, options) => {
135
+ var _a;
136
+ const { includePersonalData, outputTransform, outputFilter, outputDatasetIdOrName, outputPickFields, outputRenameFields, outputCacheStoreIdOrName, outputCachePrimaryKeys, outputCacheActionOnResult, } = ((_a = actor.input) !== null && _a !== void 0 ? _a : {});
137
+ const mergedOptions = Object.assign({ showPrivate: includePersonalData, pickKeys: outputPickFields, remapKeys: outputRenameFields, transform: outputTransform ? ((item) => evalInputHook(actor, outputTransform, [item])) : undefined, filter: outputFilter ? ((item) => evalInputHook(actor, outputFilter, [item])) : undefined, datasetIdOrName: outputDatasetIdOrName, cacheStoreIdOrName: outputCacheStoreIdOrName, cachePrimaryKeys: outputCachePrimaryKeys, cacheActionOnResult: outputCacheActionOnResult }, options);
138
+ return (0, pushData_1.pushData)(entries, ctx, mergedOptions);
139
+ };
140
+ return scopedPushData;
141
+ };
142
+ /**
143
+ * Create a function that wraps `crawler.run(requests, runOtions)` with additional
144
+ * features like:
145
+ * - Automatically metamorph into another actor after the run finishes
146
+ */
147
+ const createScopedCrawlerRun = (actor) => {
148
+ var _a;
149
+ const { outputTransformBefore, outputTransformAfter, outputFilterBefore, outputFilterAfter, outputCacheStoreIdOrName, outputCacheActionOnResult, } = ((_a = actor.input) !== null && _a !== void 0 ? _a : {});
150
+ const metamorph = createScopedMetamorph(actor);
151
+ const runCrawler = (requests, options) => __awaiter(void 0, void 0, void 0, function* () {
152
+ // Clear cache if it was set from the input
153
+ if (outputCacheStoreIdOrName && outputCacheActionOnResult === 'overwrite') {
154
+ const store = yield apify_1.Actor.openKeyValueStore(outputCacheStoreIdOrName);
155
+ yield store.drop();
156
+ }
157
+ yield evalInputHook(actor, outputTransformBefore);
158
+ yield evalInputHook(actor, outputFilterBefore);
159
+ const runRes = yield actor.crawler.run(requests, options);
160
+ yield evalInputHook(actor, outputTransformAfter);
161
+ yield evalInputHook(actor, outputFilterAfter);
162
+ // Trigger metamorph if it was set from the input
163
+ yield metamorph();
164
+ return runRes;
165
+ });
166
+ return runCrawler;
167
+ };
168
+ const actorClassByType = {
169
+ basic: crawlee_1.BasicCrawler,
170
+ http: crawlee_1.HttpCrawler,
171
+ cheerio: crawlee_1.CheerioCrawler,
172
+ jsdom: crawlee_1.JSDOMCrawler,
173
+ playwright: crawlee_1.PlaywrightCrawler,
174
+ puppeteer: crawlee_1.PuppeteerCrawler,
175
+ };
176
+ /**
177
+ * Create default configuration for an Apify actor
178
+ * and run the actor within the `Actor.main()` context.
179
+ */
180
+ const createAndRunApifyActor = ({ actorType, actorName, actorConfig, crawlerConfigDefaults, crawlerConfigOverrides, sentryOptions, onActorReady, }) => __awaiter(void 0, void 0, void 0, function* () {
181
+ (0, sentry_1.setupSentry)(Object.assign(Object.assign({}, sentryOptions), { serverName: actorName }));
182
+ // See docs:
183
+ // - https://docs.apify.com/sdk/js/
184
+ // - https://docs.apify.com/academy/deploying-your-code/inputs-outputs#accepting-input-with-the-apify-sdk
185
+ // - https://docs.apify.com/sdk/js/docs/upgrading/upgrading-to-v3#apify-sdk
186
+ yield apify_1.Actor.main(() => __awaiter(void 0, void 0, void 0, function* () {
187
+ var _a, _b, _c;
188
+ const actorDefaults = {
189
+ router: crawlee_1.Router.create(),
190
+ routerWrappers: ({ input }) => {
191
+ var _a;
192
+ return [
193
+ (0, log_1.logLevelHandlerWrapper)((_a = input === null || input === void 0 ? void 0 : input.logLevel) !== null && _a !== void 0 ? _a : 'info'),
194
+ ];
195
+ },
196
+ createCrawler: ({ router, proxy, input }) => {
197
+ var _a, _b;
198
+ const options = (0, exports.createHttpCrawlerOptions)({
199
+ input,
200
+ defaults: crawlerConfigDefaults,
201
+ overrides: Object.assign({ requestHandler: router, proxyConfiguration: proxy,
202
+ // Capture errors as a separate Apify/Actor dataset and pass errors to Sentry
203
+ failedRequestHandler: (0, errorHandler_1.createErrorHandler)({
204
+ reportingDatasetId: (_a = input === null || input === void 0 ? void 0 : input.errorReportingDatasetId) !== null && _a !== void 0 ? _a : 'REPORTING',
205
+ sendToSentry: (_b = input === null || input === void 0 ? void 0 : input.errorSendToSentry) !== null && _b !== void 0 ? _b : true,
206
+ }) }, crawlerConfigOverrides),
207
+ });
208
+ const CrawlerClass = actorClassByType[actorType];
209
+ return new CrawlerClass(options);
210
+ },
211
+ routes: [],
212
+ routeHandlers: {},
213
+ };
214
+ const actor = yield (0, exports.createApifyActor)(Object.assign(Object.assign({}, actorConfig), { router: (_a = actorConfig.router) !== null && _a !== void 0 ? _a : actorDefaults.router, routerWrappers: (_b = actorConfig.routerWrappers) !== null && _b !== void 0 ? _b : actorDefaults.routerWrappers, createCrawler: (_c = actorConfig.createCrawler) !== null && _c !== void 0 ? _c : actorDefaults.createCrawler }));
215
+ yield (onActorReady === null || onActorReady === void 0 ? void 0 : onActorReady(actor));
216
+ }), { statusMessage: 'Crawling finished!' });
217
+ });
218
+ exports.createAndRunApifyActor = createAndRunApifyActor;
219
+ /** Given the actor input, create common crawler options. */
220
+ const createHttpCrawlerOptions = ({ input, defaults, overrides, }) => {
221
+ const pickCrawlerInputFields = (config) => (0, lodash_1.pick)(config, Object.keys(config_1.crawlerInput));
222
+ return Object.assign(Object.assign(Object.assign({}, (0, lodash_1.omitBy)(defaults !== null && defaults !== void 0 ? defaults : {}, (field) => field === undefined)), (0, lodash_1.omitBy)(pickCrawlerInputFields(input !== null && input !== void 0 ? input : {}), (field) => field === undefined)), (0, lodash_1.omitBy)(overrides !== null && overrides !== void 0 ? overrides : {}, (field) => field === undefined));
223
+ };
224
+ exports.createHttpCrawlerOptions = createHttpCrawlerOptions;
225
+ //# sourceMappingURL=actor.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"actor.js","sourceRoot":"","sources":["../../../src/lib/actor.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,iCAA8B;AAC9B,qCAYiB;AACjB,mCAAgD;AAKhD,qCAMkB;AAClB,qCAQkB;AAClB,uDAA0D;AAC1D,2CAA6C;AAC7C,+BAA+C;AAC/C,iDAA4D;AAI5D,MAAM,QAAQ,GAAG,CAAC,CAAM,EAA2B,EAAE;IACnD,OAAO,CAAC,CAAC,CAAE,CAAmB,CAAC,UAAU,IAAK,CAAmB,CAAC,iBAAiB,CAAC,CAAC;AACvF,CAAC,CAAC;AACF,MAAM,MAAM,GAAG,CAAC,CAAM,EAAgC,EAAE;IACtD,OAAO,OAAO,CAAC,KAAK,UAAU,CAAC;AACjC,CAAC,CAAC;AAEF,kEAAkE;AAClE,MAAM,aAAa,GAAG,CAKpB,KAAgE,EAChE,KAAc,EACd,OAAc,EAAE,EAChB,EAAE;IACF,IAAI,CAAC,KAAK;QAAE,OAAO;IAEnB,MAAM,OAAO,GAAG;QACd,KAAK,EAAL,aAAK;QACL,KAAK,EAAE,KAAK,CAAC,KAAK;QAClB,KAAK,EAAE,KAAK,CAAC,KAAK;QAClB,YAAY,EAAZ,uBAAY;KACc,CAAC;IAE7B,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC;IAC3B,MAAM,MAAM,CAAC,GAAG,IAAI,EAAE,OAAO,CAAC,CAAC;AACjC,CAAC,CAAA,CAAC;AAqJF;;;;;;;;;;;;;;;;;;;GAmBG;AACI,MAAM,gBAAgB,GAAG,CAK9B,MAA2C,EACA,EAAE;IAC7C,0BAA0B;IAC1B,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CACzB,MAAM,CAAC,KAAK;QACV,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC;YACpB,CAAC,CAAC,MAAM,MAAM,CAAC,KAAK,mBAAM,MAAM,EAAG;YACnC,CAAC,CAAC,MAAM,CAAC,KAAK;QAChB,CAAC,CAAC,MAAM,aAAK,CAAC,QAAQ,EAAS,CAClC,CAAC;IAEF,IAAI,MAAM,CAAC,aAAa;QAAE,MAAM,MAAM,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;IAE5D,qDAAqD;IACrD,MAAM,KAAK,GAAG,EAAE,CAAC;IAEjB,gFAAgF;IAChF,MAAM,SAAS,GAAG,GAAG,EAAE,CAAC,iCAAM,MAAM,KAAE,KAAK,EAAE,KAAK,IAAG,CAAC;IAEtD,eAAe;IACf,MAAM,YAAY,GAChB,MAAM,CAAC,KAAK,IAAI,IAAI,IAAI,OAAO,CAAC,GAAG,CAAC,gBAAgB;QAClD,CAAC,CAAC,MAAM,aAAK,CAAC,wBAAwB,CAAC,KAAK,aAAL,KAAK,uBAAL,KAAK,CAAE,KAAK,CAAC;QACpD,CAAC,CAAC,SAAS,CAAC;IAChB,MAAM,KAAK,GACT,MAAM,CAAC,KAAK,IAAI,IAAI;QAClB,CAAC,CAAC,YAAY;QACd,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC;YACtB,CAAC,CAAC,MAAM,MAAM,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC;YACjC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;IAEnB,+BAA+B;IAC/B,MAAM,MAAM,GAAuB,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC;QACxD,CAAC,CAAC,MAAM,CAAC,MAAM;QACf,CAAC,CAAC,MAAO,MAAM,CAAC,MAAc,CAAC,SAAS,EAAE,CAAC,CAAC;IAC9C,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,kBAAkB;IAC3G,MAAM,aAAa,GAAG,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,aAAa,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,kBAAkB;IACvI,MAAM,cAAc,GAAG,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,cAAc,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,kBAAkB;IAE3I,yBAAyB;IACzB,MAAM,WAAW,GAAG,GAAG,EAAE,CAAC,CAAC,EAAE,MAAM,EAAE,MAAM,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,CAAC;IAC3F,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC,WAAW,EAAE,CAAC,CAAC;IAE1D,mCAAmC;IACnC,MAAM,QAAQ,mBAAK,OAAO,IAAK,WAAW,EAAE,CAAE,CAAC;IAC/C,MAAM,UAAU,GAAG,sBAAsB,CAAC,QAAQ,CAAC,CAAC;IACpD,MAAM,SAAS,GAAG,qBAAqB,CAAC,QAAQ,CAAC,CAAC;IAClD,MAAM,cAAc,GAAG,oBAAoB,CAAC,QAAQ,CAAC,CAAC;IAEtD,MAAM,KAAK,GAAG,gCACT,QAAQ,KACX,OAAO;QACP,UAAU;QACV,SAAS,EACT,QAAQ,EAAE,cAAc,GACkB,CAAC;IAE7C,0DAA0D;IAC1D,MAAM,aAAa,GAAG,EAAE,KAAK,EAAE,QAAQ,EAAE,cAAc,EAAE,CAAC;IAE1D,gBAAgB;IAChB,MAAM,IAAA,0BAAiB,EAAsD;QAC3E,MAAM;QACN,cAAc;QACd,aAAa;QACb,MAAM;QACN,aAAa;KACd,CAAC,CAAC;IACH,MAAM,IAAA,yBAAgB,EAAsD;QAC1E,MAAM;QACN,cAAc;QACd,aAAa;QACb,aAAa;KACd,CAAC,CAAC;IAEH,OAAO,KAAK,CAAC;AACf,CAAC,CAAA,CAAC;AAjFW,QAAA,gBAAgB,oBAiF3B;AAEF,mFAAmF;AACnF,MAAM,qBAAqB,GAAG,CAAC,KAAkC,EAAE,EAAE;IACnE,iDAAiD;IACjD,MAAM,SAAS,GAAc,CAAO,SAA+B,EAAE,EAAE;;QACrE,MAAM,EACJ,gBAAgB,EAChB,mBAAmB,EACnB,mBAAmB,GACpB,GAAG,IAAA,iBAAQ,EAAC,EAAE,EAAE,SAAS,EAAE,MAAA,KAAK,CAAC,KAAK,mCAAI,EAAE,CAAC,CAAC,CAAC,kBAAkB;QAElE,IAAI,CAAC,gBAAgB;YAAE,OAAO;QAE9B,MAAM,aAAK,CAAC,SAAS,CAAC,gBAAgB,EAAE,mBAAmB,EAAE,EAAE,KAAK,EAAE,mBAAmB,EAAE,CAAC,CAAC;IAC/F,CAAC,CAAA,CAAC;IAEF,OAAO,SAAS,CAAC;AACnB,CAAC,CAAC;AAEF,uEAAuE;AACvE,MAAM,oBAAoB,GAAG,CAAC,KAA4C,EAAE,EAAE;IAC5E,MAAM,cAAc,GAAoB,CAAC,OAAO,EAAE,GAAG,EAAE,OAAO,EAAE,EAAE;;QAChE,MAAM,EACJ,mBAAmB,EACnB,eAAe,EACf,YAAY,EACZ,qBAAqB,EACrB,gBAAgB,EAChB,kBAAkB,EAClB,wBAAwB,EACxB,sBAAsB,EACtB,yBAAyB,GAC1B,GAAG,CAAC,MAAA,KAAK,CAAC,KAAK,mCAAI,EAAE,CAAyC,CAAC;QAEhE,MAAM,aAAa,mBACjB,WAAW,EAAE,mBAAmB,EAChC,QAAQ,EAAE,gBAAgB,EAC1B,SAAS,EAAE,kBAAkB,EAC7B,SAAS,EAAE,eAAe,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,aAAa,CAAC,KAAK,EAAE,eAAe,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,EAClG,MAAM,EAAE,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,aAAa,CAAC,KAAK,EAAE,YAAY,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,EACzF,eAAe,EAAE,qBAAqB,EACtC,kBAAkB,EAAE,wBAAwB,EAC5C,gBAAgB,EAAE,sBAAsB,EACxC,mBAAmB,EAAE,yBAAyB,IAC3C,OAAO,CACX,CAAC;QAEF,OAAO,IAAA,mBAAQ,EAAC,OAAO,EAAE,GAAG,EAAE,aAAa,CAAC,CAAC;IAC/C,CAAC,CAAC;IAEF,OAAO,cAAc,CAAC;AACxB,CAAC,CAAC;AAEF;;;;GAIG;AACH,MAAM,sBAAsB,GAAG,CAK7B,KAAsF,EACtF,EAAE;;IACF,MAAM,EACJ,qBAAqB,EACrB,oBAAoB,EACpB,kBAAkB,EAClB,iBAAiB,EACjB,wBAAwB,EACxB,yBAAyB,GAC1B,GAAG,CAAC,MAAA,KAAK,CAAC,KAAK,mCAAI,EAAE,CAAqB,CAAC;IAE5C,MAAM,SAAS,GAAG,qBAAqB,CAAC,KAAK,CAAC,CAAC;IAE/C,MAAM,UAAU,GAAoB,CAAO,QAAQ,EAAE,OAAO,EAAE,EAAE;QAC9D,2CAA2C;QAC3C,IAAI,wBAAwB,IAAI,yBAAyB,KAAK,WAAW,EAAE;YACzE,MAAM,KAAK,GAAG,MAAM,aAAK,CAAC,iBAAiB,CAAC,wBAAwB,CAAC,CAAC;YACtE,MAAM,KAAK,CAAC,IAAI,EAAE,CAAC;SACpB;QAED,MAAM,aAAa,CAAC,KAAK,EAAE,qBAAqB,CAAC,CAAC;QAClD,MAAM,aAAa,CAAC,KAAK,EAAE,kBAAkB,CAAC,CAAC;QAE/C,MAAM,MAAM,GAAG,MAAM,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QAE1D,MAAM,aAAa,CAAC,KAAK,EAAE,oBAAoB,CAAC,CAAC;QACjD,MAAM,aAAa,CAAC,KAAK,EAAE,iBAAiB,CAAC,CAAC;QAE9C,iDAAiD;QACjD,MAAM,SAAS,EAAE,CAAC;QAElB,OAAO,MAAM,CAAC;IAChB,CAAC,CAAA,CAAC;IAEF,OAAO,UAAU,CAAC;AACpB,CAAC,CAAC;AAEF,MAAM,gBAAgB,GAAG;IACvB,KAAK,EAAE,sBAAY;IACnB,IAAI,EAAE,qBAAW;IACjB,OAAO,EAAE,wBAAc;IACvB,KAAK,EAAE,sBAAY;IACnB,UAAU,EAAE,2BAAiB;IAC7B,SAAS,EAAE,0BAAgB;CAC+C,CAAC;AAS7E;;;GAGG;AACI,MAAM,sBAAsB,GAAG,CAKpC,EACA,SAAS,EACT,SAAS,EACT,WAAW,EACX,qBAAqB,EACrB,sBAAsB,EACtB,aAAa,EACb,YAAY,GAgCb,EAAiB,EAAE;IAClB,IAAA,oBAAW,kCAAM,aAAa,KAAE,UAAU,EAAE,SAAS,IAAG,CAAC;IAEzD,YAAY;IACZ,mCAAmC;IACnC,yGAAyG;IACzG,2EAA2E;IAC3E,MAAM,aAAK,CAAC,IAAI,CACd,GAAS,EAAE;;QACT,MAAM,aAAa,GAAoD;YACrE,MAAM,EAAE,gBAAM,CAAC,MAAM,EAAO;YAC5B,cAAc,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE;;gBAAC,OAAA;oBAC7B,IAAA,4BAAsB,EAAW,MAAA,KAAK,aAAL,KAAK,uBAAL,KAAK,CAAE,QAAQ,mCAAI,MAAM,CAAC;iBAC5D,CAAA;aAAA;YACD,aAAa,EAAE,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE,EAAE;;gBAC1C,MAAM,OAAO,GAAG,IAAA,gCAAwB,EAGtC;oBACA,KAAK;oBACL,QAAQ,EAAE,qBAAqB;oBAC/B,SAAS,kBACP,cAAc,EAAE,MAAM,EACtB,kBAAkB,EAAE,KAAK;wBACzB,6EAA6E;wBAC7E,oBAAoB,EAAE,IAAA,iCAAkB,EAAC;4BACvC,kBAAkB,EAAE,MAAA,KAAK,aAAL,KAAK,uBAAL,KAAK,CAAE,uBAAuB,mCAAI,WAAW;4BACjE,YAAY,EAAE,MAAA,KAAK,aAAL,KAAK,uBAAL,KAAK,CAAE,iBAAiB,mCAAI,IAAI;yBAC/C,CAAC,IACC,sBAAsB,CAC1B;iBACF,CAAC,CAAC;gBACH,MAAM,YAAY,GAAG,gBAAgB,CAAC,SAAS,CAAQ,CAAC;gBACxD,OAAO,IAAI,YAAY,CAAC,OAAO,CAAC,CAAC;YACnC,CAAC;YACD,MAAM,EAAE,EAAE;YACV,aAAa,EAAE,EAAS;SACzB,CAAC;QAEF,MAAM,KAAK,GAAG,MAAM,IAAA,wBAAgB,kCAC/B,WAAW,KACd,MAAM,EAAE,MAAA,WAAW,CAAC,MAAM,mCAAK,aAAa,CAAC,MAAc,EAC3D,cAAc,EAAE,MAAA,WAAW,CAAC,cAAc,mCAAK,aAAa,CAAC,cAAsB,EACnF,aAAa,EAAE,MAAA,WAAW,CAAC,aAAa,mCAAK,aAAa,CAAC,aAAqB,IAChF,CAAC;QAEH,MAAM,CAAA,YAAY,aAAZ,YAAY,uBAAZ,YAAY,CAAG,KAAK,CAAC,CAAA,CAAC;IAC9B,CAAC,CAAA,EACD,EAAE,aAAa,EAAE,oBAAoB,EAAE,CACxC,CAAC;AACJ,CAAC,CAAA,CAAC;AA9FW,QAAA,sBAAsB,0BA8FjC;AAEF,4DAA4D;AACrD,MAAM,wBAAwB,GAAG,CAGtC,EACA,KAAK,EACL,QAAQ,EACR,SAAS,GAcV,EAAE,EAAE;IACH,MAAM,sBAAsB,GAAG,CAAoC,MAAS,EAAE,EAAE,CAC9E,IAAA,aAAI,EAAC,MAAM,EAAE,MAAM,CAAC,IAAI,CAAC,qBAAY,CAAC,CAAC,CAAC;IAE1C,OAAO,8CAEF,IAAA,eAAM,EAAC,QAAQ,aAAR,QAAQ,cAAR,QAAQ,GAAK,EAAY,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,KAAK,SAAS,CAAC,GAEjE,IAAA,eAAM,EAAC,sBAAsB,CAAC,KAAK,aAAL,KAAK,cAAL,KAAK,GAAI,EAAE,CAAC,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,KAAK,SAAS,CAAC,GAE3E,IAAA,eAAM,EAAC,SAAS,aAAT,SAAS,cAAT,SAAS,GAAK,EAAY,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,KAAK,SAAS,CAAC,CAC7C,CAAC;AAC7B,CAAC,CAAC;AAhCW,QAAA,wBAAwB,4BAgCnC","sourcesContent":["import { Actor } from 'apify';\nimport {\n BasicCrawler,\n CrawlingContext,\n RouterHandler,\n ProxyConfiguration,\n BasicCrawlerOptions,\n CheerioCrawler,\n Router,\n HttpCrawler,\n JSDOMCrawler,\n PlaywrightCrawler,\n PuppeteerCrawler,\n} from 'crawlee';\nimport { omitBy, pick, defaults } from 'lodash';\nimport * as Sentry from '@sentry/node';\n\nimport type { CrawlerMeta, CrawlerType } from '../types';\nimport type { MaybePromise } from '../utils/types';\nimport {\n RouteHandler,\n CrawlerRouterWrapper,\n RouteMatcher,\n registerHandlers,\n setupDefaultRoute,\n} from './router';\nimport {\n CrawlerConfigActorInput,\n LoggingActorInput,\n OutputActorInput,\n MetamorphActorInput,\n PrivacyActorInput,\n ProxyActorInput,\n crawlerInput,\n} from './config';\nimport { createErrorHandler } from './error/errorHandler';\nimport { setupSentry } from './error/sentry';\nimport { logLevelHandlerWrapper } from './log';\nimport { itemCacheKey, pushData } from './dataset/pushData';\n\ntype MaybeAsyncFn<R, Args extends any[]> = R | ((...args: Args) => MaybePromise<R>);\n\nconst isRouter = (r: any): r is RouterHandler<any> => {\n return !!((r as RouterHandler).addHandler && (r as RouterHandler).addDefaultHandler);\n};\nconst isFunc = (f: any): f is (...args: any[]) => any => {\n return typeof f === 'function';\n};\n\n/** Run a function that was defined as a string via Actor input */\nconst evalInputHook = async <\n Ctx extends CrawlingContext<any> = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>\n>(\n actor: Pick<ActorContext<Ctx, Labels, Input>, 'input' | 'state'>,\n fnStr?: string,\n args: any[] = []\n) => {\n if (!fnStr) return;\n\n const hookCtx = {\n Actor,\n input: actor.input,\n state: actor.state,\n itemCacheKey,\n } satisfies ActorHookContext;\n\n const hookFn = eval(fnStr);\n await hookFn(...args, hookCtx);\n};\n\nexport interface ActorDefinition<\n Ctx extends CrawlingContext = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>\n> {\n // Actor input\n /**\n * Actor input which you can get e.g. via `Actor.getInput()`\n *\n * Input is automatically retrieved if undefined.\n */\n input?: MaybeAsyncFn<Input, [ActorDefinition<Ctx, Labels, Input>]>;\n /** Validation for the actor input. Should throw error if validation fails. */\n validateInput?: (input: Input | null) => MaybePromise<void>;\n\n // Router setup\n /**\n * Router instance that redirects the request to handlers.\n * @example\n * import { createCheerioRouter } from 'crawlee';\n *\n * ({\n * ...\n * router: createCheerioRouter(),\n * })\n */\n router: MaybeAsyncFn<RouterHandler<Ctx>, [ActorDefinitionWithInput<Ctx, Labels, Input>]>;\n /**\n * Criteria that un-labelled requests are matched against.\n *\n * E.g. If `match` function returns truthy value,\n * the request is passed to the `action` function for processing.\n *\n * @example\n * ({\n * ...\n * routes: [{\n * // If match returns true, the request is forwarded to handler\n * // with label JOB_DETAIL.\n * name: 'Job detail',\n * handlerLabel: routeLabels.JOB_DETAIL,\n * match: (url) => isUrlOfJobOffer(url),\n * }, {\n * // Define custom action function:\n * // If match returns true, we replace this request with new one\n * // pointing to new domain.\n * name: 'Main page',\n * handlerLabel: null,\n * match: (url) => url.match(/example\\.com\\/?(?:[?#~]|$)/i),\n * action: async (url, ctx, _, handlers) => {\n * ctx.log.info(`Redirecting to https://www.new-domain.com`);\n * await ctx.crawler.addRequests(['https://www.new-domain.com'], { forefront: true });\n * },\n * }],\n * })\n */\n routes: MaybeAsyncFn<\n RouteMatcher<Ctx, ActorRouterContext<Ctx, Labels, Input>, Labels>[],\n [ActorDefinitionWithInput<Ctx, Labels, Input>]\n >;\n /** Handlers for the labelled requests. The object keys are the labels. */\n routeHandlers: MaybeAsyncFn<Record<Labels, RouteHandler<Ctx, ActorRouterContext<Ctx, Labels, Input>>>, [ActorDefinitionWithInput<Ctx, Labels, Input>]>; // prettier-ignore\n /**\n * Provides the option to modify or extend all router handlers by wrapping\n * them in these functions.\n *\n * Wrappers are applied from right to left. That means that wrappers `[A, B, C]`\n * will be applied like so `A( B( C( handler ) ) )`.\n */\n routerWrappers?: MaybeAsyncFn<CrawlerRouterWrapper<Ctx, ActorRouterContext<Ctx, Labels, Input>>[], [ActorDefinitionWithInput<Ctx, Labels, Input>]>; // prettier-ignore\n\n // Proxy setup\n proxy?: MaybeAsyncFn<ProxyConfiguration, [ActorDefinitionWithInput<Ctx, Labels, Input>]>; // prettier-ignore\n\n // Crawler setup\n createCrawler: (\n actorCtx: Omit<\n ActorContext<Ctx, Labels, Input>,\n 'crawler' | 'runCrawler' | 'metamorph' | 'pushData'\n >\n ) => MaybePromise<Ctx['crawler']>;\n}\n\n/** ActorDefinition object where the input is already resolved */\nexport type ActorDefinitionWithInput<\n Ctx extends CrawlingContext = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>\n> = Omit<ActorDefinition<Ctx, Labels, Input>, 'input'> & {\n input: Input | null;\n state: Record<string, unknown>;\n};\n\n/** Context available while creating an Apify/Crawlee crawler */\nexport interface ActorContext<\n Ctx extends CrawlingContext = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>\n> {\n crawler: Ctx['crawler'];\n /**\n * This function wraps `crawler.run(requests, runOtions)` with additional\n * features:\n * - Automatically metamorph into another actor after the run finishes\n */\n runCrawler: RunCrawler<Ctx>;\n /** Trigger actor metamorph, using actor's inputs as defaults. */\n metamorph: Metamorph;\n pushData: typeof pushData;\n proxy?: ProxyConfiguration;\n router: RouterHandler<Ctx>;\n routes: RouteMatcher<Ctx, ActorRouterContext<Ctx, Labels, Input>, Labels>[];\n routeHandlers: Record<Labels, RouteHandler<Ctx, ActorRouterContext<Ctx, Labels, Input>>>;\n /** Original config from which this actor context was created */\n config: ActorDefinition<Ctx, Labels, Input>;\n /** Read-only inputs passed to the actor */\n input: Input | null;\n /** Mutable state that is shared across setup and teardown hooks */\n state: Record<string, unknown>;\n}\n\ntype OrigRunCrawler<T extends CrawlingContext<any, any>> = BasicCrawler<T>['run'];\n\n/** Extended type of `crawler.run()` function */\nexport type RunCrawler<Ctx extends CrawlingContext = CrawlingContext<BasicCrawler>> = (\n requests?: Parameters<OrigRunCrawler<Ctx>>[0],\n options?: Parameters<OrigRunCrawler<Ctx>>[1]\n) => ReturnType<OrigRunCrawler<Ctx>>;\n\n/** Trigger actor metamorph, using actor's inputs as defaults. */\nexport type Metamorph = (overrides?: MetamorphActorInput) => Promise<void>;\n\n/** Context passed to user-defined functions passed from input */\nexport type ActorHookContext = Pick<ActorContext, 'input' | 'state'> & {\n Actor: typeof Actor;\n itemCacheKey: typeof itemCacheKey;\n};\n\n/** Context passed to route handlers */\nexport type ActorRouterContext<\n Ctx extends CrawlingContext = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>\n> = {\n actor: ActorContext<Ctx, Labels, Input>;\n};\n\n/**\n * Create opinionated Apify crawler that uses router for handling requests.\n *\n * This is a quality-of-life function that does the following for you:\n *\n * 1) TypeScript - Ensure all components use the same Crawler / CrawlerContext.\n *\n * 2) Get Actor input from `Actor.getInput` if not given.\n *\n * 3) (Optional) Validate Actor input\n *\n * 4) Set up router such that requests that reach default route are\n * redirected to labelled routes based on the \"routes\" items.\n *\n * 5) Register all route handlers for you.\n *\n * 6) (Optional) Wrap all route handlers in a wrapper. Use this e.g.\n * if you want to add a field to the context object, or handle errors\n * from a single place.\n */\nexport const createApifyActor = async <\n Ctx extends CrawlingContext = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>\n>(\n config: ActorDefinition<Ctx, Labels, Input>\n): Promise<ActorContext<Ctx, Labels, Input>> => {\n // Initialize actor inputs\n const input = Object.freeze(\n config.input\n ? isFunc(config.input)\n ? await config.input({ ...config })\n : config.input\n : await Actor.getInput<Input>()\n );\n\n if (config.validateInput) await config.validateInput(input);\n\n // Mutable state that is available to the actor hooks\n const state = {};\n\n // This is context that is available to options that use initialization function\n const getConfig = () => ({ ...config, input, state });\n\n // Set up proxy\n const defaultProxy =\n config.proxy == null && process.env.APIFY_IS_AT_HOME\n ? await Actor.createProxyConfiguration(input?.proxy)\n : undefined;\n const proxy =\n config.proxy == null\n ? defaultProxy\n : isFunc(config.proxy)\n ? await config.proxy(getConfig())\n : config.proxy;\n\n // Run initialization functions\n const router: RouterHandler<Ctx> = isRouter(config.router)\n ? config.router\n : await (config.router as any)(getConfig());\n const routes = isFunc(config.routes) ? await config.routes(getConfig()) : config.routes; // prettier-ignore\n const routeHandlers = isFunc(config.routeHandlers) ? await config.routeHandlers(getConfig()) : config.routeHandlers; // prettier-ignore\n const routerWrappers = isFunc(config.routerWrappers) ? await config.routerWrappers(getConfig()) : config.routerWrappers; // prettier-ignore\n\n // Create Crawlee crawler\n const getActorCtx = () => ({ router, routes, routeHandlers, proxy, config, input, state });\n const crawler = await config.createCrawler(getActorCtx());\n\n // Create actor (our custom entity)\n const preActor = { crawler, ...getActorCtx() };\n const runCrawler = createScopedCrawlerRun(preActor);\n const metamorph = createScopedMetamorph(preActor);\n const scopedPushData = createScopedPushData(preActor);\n\n const actor = {\n ...preActor,\n crawler,\n runCrawler,\n metamorph,\n pushData: scopedPushData,\n } satisfies ActorContext<Ctx, Labels, Input>;\n\n // Extra data that we make available to the route handlers\n const routerContext = { actor, pushData: scopedPushData };\n\n // Set up router\n await setupDefaultRoute<Ctx, ActorRouterContext<Ctx, Labels, Input>, Labels>({\n router,\n routerWrappers,\n routerContext,\n routes,\n routeHandlers,\n });\n await registerHandlers<Ctx, ActorRouterContext<Ctx, Labels, Input>, Labels>({\n router,\n routerWrappers,\n routerContext,\n routeHandlers,\n });\n\n return actor;\n};\n\n/** Create a function that triggers metamorph, using Actor's inputs as defaults. */\nconst createScopedMetamorph = (actor: Pick<ActorContext, 'input'>) => {\n // Trigger metamorph if it was set from the input\n const metamorph: Metamorph = async (overrides?: MetamorphActorInput) => {\n const {\n metamorphActorId,\n metamorphActorBuild,\n metamorphActorInput,\n } = defaults({}, overrides, actor.input ?? {}); // prettier-ignore\n\n if (!metamorphActorId) return;\n\n await Actor.metamorph(metamorphActorId, metamorphActorInput, { build: metamorphActorBuild });\n };\n\n return metamorph;\n};\n\n/** pushData wrapper that pre-populates options based on actor input */\nconst createScopedPushData = (actor: Pick<ActorContext, 'input' | 'state'>) => {\n const scopedPushData: typeof pushData = (entries, ctx, options) => {\n const {\n includePersonalData,\n outputTransform,\n outputFilter,\n outputDatasetIdOrName,\n outputPickFields,\n outputRenameFields,\n outputCacheStoreIdOrName,\n outputCachePrimaryKeys,\n outputCacheActionOnResult,\n } = (actor.input ?? {}) as OutputActorInput & PrivacyActorInput;\n\n const mergedOptions = {\n showPrivate: includePersonalData,\n pickKeys: outputPickFields,\n remapKeys: outputRenameFields,\n transform: outputTransform ? ((item) => evalInputHook(actor, outputTransform, [item])) : undefined, // prettier-ignore\n filter: outputFilter ? ((item) => evalInputHook(actor, outputFilter, [item])) : undefined, // prettier-ignore\n datasetIdOrName: outputDatasetIdOrName,\n cacheStoreIdOrName: outputCacheStoreIdOrName,\n cachePrimaryKeys: outputCachePrimaryKeys,\n cacheActionOnResult: outputCacheActionOnResult,\n ...options,\n };\n\n return pushData(entries, ctx, mergedOptions);\n };\n\n return scopedPushData;\n};\n\n/**\n * Create a function that wraps `crawler.run(requests, runOtions)` with additional\n * features like:\n * - Automatically metamorph into another actor after the run finishes\n */\nconst createScopedCrawlerRun = <\n Ctx extends CrawlingContext<any> = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>\n>(\n actor: Omit<ActorContext<Ctx, Labels, Input>, 'runCrawler' | 'metamorph' | 'pushData'>\n) => {\n const {\n outputTransformBefore,\n outputTransformAfter,\n outputFilterBefore,\n outputFilterAfter,\n outputCacheStoreIdOrName,\n outputCacheActionOnResult,\n } = (actor.input ?? {}) as OutputActorInput;\n\n const metamorph = createScopedMetamorph(actor);\n\n const runCrawler: RunCrawler<Ctx> = async (requests, options) => {\n // Clear cache if it was set from the input\n if (outputCacheStoreIdOrName && outputCacheActionOnResult === 'overwrite') {\n const store = await Actor.openKeyValueStore(outputCacheStoreIdOrName);\n await store.drop();\n }\n\n await evalInputHook(actor, outputTransformBefore);\n await evalInputHook(actor, outputFilterBefore);\n\n const runRes = await actor.crawler.run(requests, options);\n\n await evalInputHook(actor, outputTransformAfter);\n await evalInputHook(actor, outputFilterAfter);\n\n // Trigger metamorph if it was set from the input\n await metamorph();\n\n return runRes;\n };\n\n return runCrawler;\n};\n\nconst actorClassByType = {\n basic: BasicCrawler,\n http: HttpCrawler,\n cheerio: CheerioCrawler,\n jsdom: JSDOMCrawler,\n playwright: PlaywrightCrawler,\n puppeteer: PuppeteerCrawler,\n} satisfies Record<CrawlerType, { new (options: Record<string, any>): any }>;\n\ntype AllInputs = CrawlerConfigActorInput &\n LoggingActorInput &\n ProxyActorInput &\n PrivacyActorInput &\n OutputActorInput &\n MetamorphActorInput;\n\n/**\n * Create default configuration for an Apify actor\n * and run the actor within the `Actor.main()` context.\n */\nexport const createAndRunApifyActor = async <\n TCrawlerType extends CrawlerType,\n Ctx extends CrawlerMeta<TCrawlerType, any>['context'] = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>\n>({\n actorType,\n actorName,\n actorConfig,\n crawlerConfigDefaults,\n crawlerConfigOverrides,\n sentryOptions,\n onActorReady,\n}: {\n /** String idetifying the actor class, e.g. `'cheerio'` */\n actorType: TCrawlerType;\n actorName: string;\n /** Config passed to the {@link createApifyActor} */\n actorConfig: Omit<ActorDefinition<Ctx, Labels, Input>, 'router' | 'createCrawler'> &\n Partial<Pick<ActorDefinition<Ctx, Labels, Input>, 'router' | 'createCrawler'>>;\n /**\n * If using default `createCrawler` implementation, these are crawler options\n * that may be overriden by user input.\n */\n crawlerConfigDefaults?: CrawlerMeta<TCrawlerType, any>['options'];\n /**\n * If using default `createCrawler` implementation, these are crawler options\n * that will override user input.\n *\n * This is useful for testing env.\n */\n crawlerConfigOverrides?: CrawlerMeta<TCrawlerType, any>['options'];\n /**\n * Sentry configuration. If using default `createCrawler` implementation,\n * failed requests are optionally reported to Sentry.\n *\n * To disable Sentry, set `\"enabled\": false`.\n */\n sentryOptions?: Sentry.NodeOptions;\n /**\n * Callback with the created actor. The callback is called within\n * the `Actor.main()` context.\n */\n onActorReady?: (actor: ActorContext<Ctx, Labels, Input>) => MaybePromise<void>;\n}): Promise<void> => {\n setupSentry({ ...sentryOptions, serverName: actorName });\n\n // See docs:\n // - https://docs.apify.com/sdk/js/\n // - https://docs.apify.com/academy/deploying-your-code/inputs-outputs#accepting-input-with-the-apify-sdk\n // - https://docs.apify.com/sdk/js/docs/upgrading/upgrading-to-v3#apify-sdk\n await Actor.main(\n async () => {\n const actorDefaults: ActorDefinition<Ctx, Labels, Input & AllInputs> = {\n router: Router.create<Ctx>(),\n routerWrappers: ({ input }) => [\n logLevelHandlerWrapper<Ctx, any>(input?.logLevel ?? 'info'),\n ],\n createCrawler: ({ router, proxy, input }) => {\n const options = createHttpCrawlerOptions<\n CrawlerMeta<TCrawlerType, any>['options'],\n Input\n >({\n input,\n defaults: crawlerConfigDefaults,\n overrides: {\n requestHandler: router,\n proxyConfiguration: proxy,\n // Capture errors as a separate Apify/Actor dataset and pass errors to Sentry\n failedRequestHandler: createErrorHandler({\n reportingDatasetId: input?.errorReportingDatasetId ?? 'REPORTING',\n sendToSentry: input?.errorSendToSentry ?? true,\n }),\n ...crawlerConfigOverrides,\n },\n });\n const CrawlerClass = actorClassByType[actorType] as any;\n return new CrawlerClass(options);\n },\n routes: [],\n routeHandlers: {} as any,\n };\n\n const actor = await createApifyActor<Ctx, Labels, Input>({\n ...actorConfig,\n router: actorConfig.router ?? (actorDefaults.router as any),\n routerWrappers: actorConfig.routerWrappers ?? (actorDefaults.routerWrappers as any),\n createCrawler: actorConfig.createCrawler ?? (actorDefaults.createCrawler as any),\n });\n\n await onActorReady?.(actor);\n },\n { statusMessage: 'Crawling finished!' }\n );\n};\n\n/** Given the actor input, create common crawler options. */\nexport const createHttpCrawlerOptions = <\n TOpts extends BasicCrawlerOptions<any> = BasicCrawlerOptions,\n Input extends Record<string, any> = Record<string, any>\n>({\n input,\n defaults,\n overrides,\n}: {\n /** Actor input */\n input: Input | null;\n /**\n * Default config options set by us. These may be overriden\n * by values from actor input (set by user).\n */\n defaults?: TOpts;\n /**\n * These config options will overwrite both the default and user\n * options. This is useful for hard-setting values e.g. in tests.\n */\n overrides?: TOpts;\n}) => {\n const pickCrawlerInputFields = <T extends CrawlerConfigActorInput>(config: T) =>\n pick(config, Object.keys(crawlerInput));\n\n return {\n // ----- 1. DEFAULTS -----\n ...omitBy(defaults ?? ({} as TOpts), (field) => field === undefined),\n // ----- 2. CONFIG FROM INPUT -----\n ...omitBy(pickCrawlerInputFields(input ?? {}), (field) => field === undefined),\n // ----- 3. OVERRIDES - E.G. TEST CONFIG -----\n ...omitBy(overrides ?? ({} as TOpts), (field) => field === undefined),\n } satisfies Partial<TOpts>;\n};\n"]}
@@ -0,0 +1,20 @@
1
+ import type { DatasetPerfStat, ScraperActorSpec, ScraperDataset } from 'actor-spec';
2
+ /**
3
+ * Scraper actor spec with additional dataset perf stats info for formatting in tables
4
+ *
5
+ * See {@link ScraperActorSpec}
6
+ */
7
+ export interface CrawleeOneScraperActorSpec extends ScraperActorSpec {
8
+ datasets: CrawleeOneScraperDataset[];
9
+ }
10
+ /** Dataset with additional perf stats info for formatting in tables */
11
+ export interface CrawleeOneScraperDataset extends ScraperDataset {
12
+ perfStats: CrawleeOneDatasetPerfStat[];
13
+ /** Specify which perfTable should render this data */
14
+ perfTable: string;
15
+ }
16
+ /** Dataset perf stats with additional info for formatting in tables */
17
+ export interface CrawleeOneDatasetPerfStat extends DatasetPerfStat {
18
+ rowId: string;
19
+ colId: string;
20
+ }
@@ -0,0 +1,3 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ //# sourceMappingURL=actorSpec.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"actorSpec.js","sourceRoot":"","sources":["../../../src/lib/actorSpec.ts"],"names":[],"mappings":"","sourcesContent":["import type { DatasetPerfStat, ScraperActorSpec, ScraperDataset } from 'actor-spec';\n\n/**\n * Scraper actor spec with additional dataset perf stats info for formatting in tables\n *\n * See {@link ScraperActorSpec}\n */\nexport interface CrawleeOneScraperActorSpec extends ScraperActorSpec {\n datasets: CrawleeOneScraperDataset[];\n}\n\n/** Dataset with additional perf stats info for formatting in tables */\nexport interface CrawleeOneScraperDataset extends ScraperDataset {\n perfStats: CrawleeOneDatasetPerfStat[];\n /** Specify which perfTable should render this data */\n perfTable: string;\n}\n\n/** Dataset perf stats with additional info for formatting in tables */\nexport interface CrawleeOneDatasetPerfStat extends DatasetPerfStat {\n rowId: string;\n colId: string;\n}\n"]}