crawlee-one 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/index.d.ts +2 -0
- package/dist/cjs/index.js +2 -0
- package/dist/cjs/index.js.map +1 -1
- package/dist/cjs/lib/actor/actor.d.ts +1 -1
- package/dist/cjs/lib/actor/actor.js +14 -7
- package/dist/cjs/lib/actor/actor.js.map +1 -1
- package/dist/cjs/lib/actor/types.d.ts +2 -1
- package/dist/cjs/lib/actor/types.js.map +1 -1
- package/dist/cjs/lib/io/pushData.d.ts +2 -1
- package/dist/cjs/lib/io/pushData.js +8 -7
- package/dist/cjs/lib/io/pushData.js.map +1 -1
- package/dist/cjs/lib/io/pushRequests.d.ts +3 -2
- package/dist/cjs/lib/io/pushRequests.js +7 -6
- package/dist/cjs/lib/io/pushRequests.js.map +1 -1
- package/package.json +1 -1
package/dist/cjs/index.d.ts
CHANGED
package/dist/cjs/index.js
CHANGED
|
@@ -40,4 +40,6 @@ __exportStar(require("./lib/router"), exports);
|
|
|
40
40
|
__exportStar(require("./lib/log"), exports);
|
|
41
41
|
__exportStar(require("./lib/test/actor"), exports);
|
|
42
42
|
__exportStar(require("./lib/test/mockApifyClient"), exports);
|
|
43
|
+
__exportStar(require("./lib/integrations/apify"), exports);
|
|
44
|
+
__exportStar(require("./lib/integrations/types"), exports);
|
|
43
45
|
//# sourceMappingURL=index.js.map
|
package/dist/cjs/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;AAAA,2CAAqF;AAA5E,+GAAA,sBAAsB,OAAA;AAAE,iHAAA,wBAAwB,OAAA;AACzD,oDAAkC;AAClC,kDAAgC;AAChC,+CAA6B;AAC7B,mDAAiC;AACjC,wDAAsC;AACtC,oDAAkC;AAClC,wDAAsC;AACtC,oDAAkC;AAClC,yDAAuC;AACvC,qDAAmC;AACnC,8DAA4C;AAC5C,2DAAyC;AACzC,qDAAmC;AACnC,8DAA4C;AAC5C,2DAAyC;AACzC,sDAAoC;AACpC,sDAAoC;AACpC,qDAAmC;AACnC,+CAA6B;AAC7B,4CAA0B;AAC1B,mDAAiC;AACjC,6DAA2C","sourcesContent":["export { createAndRunCrawleeOne, createHttpCrawlerOptions } from './lib/actor/actor';\nexport * from './lib/actor/types';\nexport * from './lib/actorSpec';\nexport * from './lib/config';\nexport * from './lib/io/dataset';\nexport * from './lib/io/requestQueue';\nexport * from './lib/io/pushData';\nexport * from './lib/io/pushRequests';\nexport * from './lib/actions/dom';\nexport * from './lib/actions/domUtils';\nexport * from './lib/actions/page';\nexport * from './lib/actions/scrapeListing';\nexport * from './lib/error/errorHandler';\nexport * from './lib/error/sentry';\nexport * from './lib/migrate/localMigrator';\nexport * from './lib/migrate/localState';\nexport * from './lib/migrate/types';\nexport * from './lib/readme/readme';\nexport * from './lib/readme/types';\nexport * from './lib/router';\nexport * from './lib/log';\nexport * from './lib/test/actor';\nexport * from './lib/test/mockApifyClient';\nexport type { CrawlerUrl, CrawlerType } from './types';\n"]}
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;AAAA,2CAAqF;AAA5E,+GAAA,sBAAsB,OAAA;AAAE,iHAAA,wBAAwB,OAAA;AACzD,oDAAkC;AAClC,kDAAgC;AAChC,+CAA6B;AAC7B,mDAAiC;AACjC,wDAAsC;AACtC,oDAAkC;AAClC,wDAAsC;AACtC,oDAAkC;AAClC,yDAAuC;AACvC,qDAAmC;AACnC,8DAA4C;AAC5C,2DAAyC;AACzC,qDAAmC;AACnC,8DAA4C;AAC5C,2DAAyC;AACzC,sDAAoC;AACpC,sDAAoC;AACpC,qDAAmC;AACnC,+CAA6B;AAC7B,4CAA0B;AAC1B,mDAAiC;AACjC,6DAA2C;AAE3C,2DAAyC;AACzC,2DAAyC","sourcesContent":["export { createAndRunCrawleeOne, createHttpCrawlerOptions } from './lib/actor/actor';\nexport * from './lib/actor/types';\nexport * from './lib/actorSpec';\nexport * from './lib/config';\nexport * from './lib/io/dataset';\nexport * from './lib/io/requestQueue';\nexport * from './lib/io/pushData';\nexport * from './lib/io/pushRequests';\nexport * from './lib/actions/dom';\nexport * from './lib/actions/domUtils';\nexport * from './lib/actions/page';\nexport * from './lib/actions/scrapeListing';\nexport * from './lib/error/errorHandler';\nexport * from './lib/error/sentry';\nexport * from './lib/migrate/localMigrator';\nexport * from './lib/migrate/localState';\nexport * from './lib/migrate/types';\nexport * from './lib/readme/readme';\nexport * from './lib/readme/types';\nexport * from './lib/router';\nexport * from './lib/log';\nexport * from './lib/test/actor';\nexport * from './lib/test/mockApifyClient';\nexport type { CrawlerUrl, CrawlerType } from './types';\nexport * from './lib/integrations/apify';\nexport * from './lib/integrations/types';\n"]}
|
|
@@ -13,7 +13,7 @@ import type { ActorContext, ActorDefinition } from './types';
|
|
|
13
13
|
*
|
|
14
14
|
* Read more about what this actor does at {@link createCrawleeOne}.
|
|
15
15
|
*/
|
|
16
|
-
export declare const createAndRunCrawleeOne: <TCrawlerType extends CrawlerType, Ctx extends CrawlerMeta<TCrawlerType, any>["context"] = CrawlingContext<BasicCrawler<import("crawlee").BasicCrawlingContext<import("crawlee").Dictionary>>, import("crawlee").Dictionary>, Labels extends string = string, Input extends Record<string, any> = Record<string, any>, TIO extends CrawleeOneIO<object, object, object> = CrawleeOneIO<object, object, object>>(
|
|
16
|
+
export declare const createAndRunCrawleeOne: <TCrawlerType extends CrawlerType, Ctx extends CrawlerMeta<TCrawlerType, any>["context"] = CrawlingContext<BasicCrawler<import("crawlee").BasicCrawlingContext<import("crawlee").Dictionary>>, import("crawlee").Dictionary>, Labels extends string = string, Input extends Record<string, any> = Record<string, any>, TIO extends CrawleeOneIO<object, object, object> = CrawleeOneIO<object, object, object>>(args: {
|
|
17
17
|
/** String idetifying the actor class, e.g. `'cheerio'` */
|
|
18
18
|
actorType: TCrawlerType;
|
|
19
19
|
actorName: string;
|
|
@@ -60,8 +60,8 @@ const genHookFn = (actor, fnStr) => {
|
|
|
60
60
|
*
|
|
61
61
|
* Read more about what this actor does at {@link createCrawleeOne}.
|
|
62
62
|
*/
|
|
63
|
-
const createAndRunCrawleeOne = (
|
|
64
|
-
const { actorType, actorName, actorConfig, crawlerConfigDefaults, crawlerConfigOverrides, sentryOptions, onActorReady, } =
|
|
63
|
+
const createAndRunCrawleeOne = (args) => __awaiter(void 0, void 0, void 0, function* () {
|
|
64
|
+
const { actorType, actorName, actorConfig, crawlerConfigDefaults, crawlerConfigOverrides, sentryOptions, onActorReady, } = args;
|
|
65
65
|
const { io = apify_1.apifyIO } = actorConfig;
|
|
66
66
|
yield (0, sentry_1.setupSentry)(Object.assign(Object.assign({}, sentryOptions), { serverName: actorName }), { io });
|
|
67
67
|
// See docs:
|
|
@@ -144,8 +144,10 @@ const createCrawleeOne = (config) => __awaiter(void 0, void 0, void 0, function*
|
|
|
144
144
|
const input = Object.freeze(yield resolveInput(rawInput, state, { io }));
|
|
145
145
|
if (config.validateInput)
|
|
146
146
|
yield config.validateInput(input);
|
|
147
|
+
const { logLevel } = (input !== null && input !== void 0 ? input : {});
|
|
148
|
+
const log = new crawlee_1.Log({ level: logLevel ? log_1.logLevelToCrawlee[logLevel] : undefined });
|
|
147
149
|
// This is context that is available to options that use initialization function
|
|
148
|
-
const getConfig = () => (Object.assign(Object.assign({}, config), { input, state, io }));
|
|
150
|
+
const getConfig = () => (Object.assign(Object.assign({}, config), { input, state, io, log }));
|
|
149
151
|
// Set up proxy
|
|
150
152
|
const defaultProxy = config.proxy == null ? yield io.createDefaultProxyConfiguration(input !== null && input !== void 0 ? input : undefined) : undefined;
|
|
151
153
|
const proxy = config.proxy == null
|
|
@@ -170,6 +172,7 @@ const createCrawleeOne = (config) => __awaiter(void 0, void 0, void 0, function*
|
|
|
170
172
|
config,
|
|
171
173
|
input,
|
|
172
174
|
state,
|
|
175
|
+
log,
|
|
173
176
|
});
|
|
174
177
|
const crawler = yield config.createCrawler(getActorCtx());
|
|
175
178
|
// Create actor (our custom entity)
|
|
@@ -200,6 +203,8 @@ const createCrawleeOne = (config) => __awaiter(void 0, void 0, void 0, function*
|
|
|
200
203
|
routerContext,
|
|
201
204
|
routeHandlers,
|
|
202
205
|
});
|
|
206
|
+
// Now that the actor is ready, enqueue the URLs right away
|
|
207
|
+
yield scopedPushRequest(startUrls);
|
|
203
208
|
return actor;
|
|
204
209
|
});
|
|
205
210
|
exports.createCrawleeOne = createCrawleeOne;
|
|
@@ -265,7 +270,7 @@ const createScopedPushData = (actor) => {
|
|
|
265
270
|
const scopedPushData = (entries, ctx, options) => __awaiter(void 0, void 0, void 0, function* () {
|
|
266
271
|
const transformFn = genHookFn(actor, outputTransform);
|
|
267
272
|
const filterFn = genHookFn(actor, outputFilter);
|
|
268
|
-
const mergedOptions = Object.assign({ io: actor.io, showPrivate: includePersonalData, maxCount: outputMaxEntries, pickKeys: outputPickFields, remapKeys: outputRenameFields, transform: transformFn ? (item) => transformFn(item) : undefined, filter: filterFn ? (item) => filterFn(item) : undefined, datasetId: outputDatasetId, requestQueueId, cacheStoreId: outputCacheStoreId, cachePrimaryKeys: outputCachePrimaryKeys, cacheActionOnResult: outputCacheActionOnResult }, options);
|
|
273
|
+
const mergedOptions = Object.assign({ io: actor.io, log: actor.log, showPrivate: includePersonalData, maxCount: outputMaxEntries, pickKeys: outputPickFields, remapKeys: outputRenameFields, transform: transformFn ? (item) => transformFn(item) : undefined, filter: filterFn ? (item) => filterFn(item) : undefined, datasetId: outputDatasetId, requestQueueId, cacheStoreId: outputCacheStoreId, cachePrimaryKeys: outputCachePrimaryKeys, cacheActionOnResult: outputCacheActionOnResult }, options);
|
|
269
274
|
return (0, pushData_1.pushData)(entries, ctx, mergedOptions);
|
|
270
275
|
});
|
|
271
276
|
return scopedPushData;
|
|
@@ -274,11 +279,11 @@ const createScopedPushData = (actor) => {
|
|
|
274
279
|
const createScopedPushRequests = (actor) => {
|
|
275
280
|
var _a;
|
|
276
281
|
const { requestQueueId, requestMaxEntries, requestTransform, requestFilter } = ((_a = actor.input) !== null && _a !== void 0 ? _a : {});
|
|
277
|
-
const scopedPushRequest = (entries,
|
|
282
|
+
const scopedPushRequest = (entries, options) => __awaiter(void 0, void 0, void 0, function* () {
|
|
278
283
|
const transformFn = genHookFn(actor, requestTransform);
|
|
279
284
|
const filterFn = genHookFn(actor, requestFilter);
|
|
280
|
-
const mergedOptions = Object.assign({ io: actor.io, maxCount: requestMaxEntries, transform: transformFn ? (item) => transformFn(item) : undefined, filter: filterFn ? (item) => filterFn(item) : undefined, requestQueueId }, options);
|
|
281
|
-
return (0, pushRequests_1.pushRequests)(entries,
|
|
285
|
+
const mergedOptions = Object.assign({ io: actor.io, log: actor.log, maxCount: requestMaxEntries, transform: transformFn ? (item) => transformFn(item) : undefined, filter: filterFn ? (item) => filterFn(item) : undefined, requestQueueId }, options);
|
|
286
|
+
return (0, pushRequests_1.pushRequests)(entries, mergedOptions);
|
|
282
287
|
});
|
|
283
288
|
return scopedPushRequest;
|
|
284
289
|
};
|
|
@@ -293,11 +298,13 @@ const getStartUrlsFromInput = (actor) => __awaiter(void 0, void 0, void 0, funct
|
|
|
293
298
|
const { startUrls, startUrlsFromDataset, startUrlsFromFunction } = ((_e = actor.input) !== null && _e !== void 0 ? _e : {});
|
|
294
299
|
const urlsAgg = [...(startUrls !== null && startUrls !== void 0 ? startUrls : [])];
|
|
295
300
|
if (startUrlsFromDataset) {
|
|
301
|
+
actor.log.debug(`Loading start URLs from Dataset ${startUrlsFromDataset}`);
|
|
296
302
|
const [datasetId, field] = startUrlsFromDataset.split('#');
|
|
297
303
|
const urlsFromDataset = yield (0, dataset_1.getColumnFromDataset)(datasetId, field, { io: actor.io });
|
|
298
304
|
urlsAgg.push(...urlsFromDataset);
|
|
299
305
|
}
|
|
300
306
|
if (startUrlsFromFunction) {
|
|
307
|
+
actor.log.debug(`Loading start URLs from function`);
|
|
301
308
|
const urlsFromFn = yield ((_f = genHookFn(actor, startUrlsFromFunction)) === null || _f === void 0 ? void 0 : _f());
|
|
302
309
|
urlsAgg.push(...urlsFromFn);
|
|
303
310
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"actor.js","sourceRoot":"","sources":["../../../../src/lib/actor/actor.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,qCAWiB;AACjB,mCAAgD;AAEhD,+CAA2C;AAI3C,wDAA2D;AAC3D,4CAA8C;AAC9C,6CAA8E;AAC9E,2CAAqD;AACrD,qDAAuE;AAEvE,iDAAgD;AAChD,sCAAgE;AAChE,sCAUmB;AACnB,gCAAgD;AAUhD,MAAM,gBAAgB,GAAG;IACvB,KAAK,EAAE,sBAAY;IACnB,IAAI,EAAE,qBAAW;IACjB,OAAO,EAAE,wBAAc;IACvB,KAAK,EAAE,sBAAY;IACnB,UAAU,EAAE,2BAAiB;IAC7B,SAAS,EAAE,0BAAgB;CAC+C,CAAC;AAE7E,MAAM,QAAQ,GAAG,CAAC,CAAM,EAA2B,EAAE;IACnD,OAAO,CAAC,CAAC,CAAE,CAAmB,CAAC,UAAU,IAAK,CAAmB,CAAC,iBAAiB,CAAC,CAAC;AACvF,CAAC,CAAC;AACF,MAAM,MAAM,GAAG,CAAC,CAAM,EAAgC,EAAE;IACtD,OAAO,OAAO,CAAC,KAAK,UAAU,CAAC;AACjC,CAAC,CAAC;AAEF,kEAAkE;AAClE,MAAM,SAAS,GAAG,CAMhB,KAA4E,EAC5E,KAAc,EACd,EAAE;IACF,IAAI,CAAC,KAAK;QAAE,OAAO,IAAI,CAAC;IAExB,MAAM,OAAO,GAAG;QACd,EAAE,EAAE,KAAK,CAAC,EAAE;QACZ,KAAK,EAAE,KAAK,CAAC,KAAK;QAClB,KAAK,EAAE,KAAK,CAAC,KAAK;QAClB,YAAY,EAAZ,uBAAY;QACZ,WAAW,EAAE,0BAAW;KACO,CAAC;IAElC,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC;IAC3B,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC;IAEzB,OAAO,CAAO,GAAG,IAAI,EAAE,EAAE,kDAAC,OAAA,MAAM,CAAC,GAAG,IAAI,EAAE,OAAO,CAAC,CAAA,GAAA,CAAC;AACrD,CAAC,CAAC;AAEF;;;;;;;GAOG;AACI,MAAM,sBAAsB,GAAG,CAMpC,KAiCD,EAAiB,EAAE;IAClB,MAAM,EACJ,SAAS,EACT,SAAS,EACT,WAAW,EACX,qBAAqB,EACrB,sBAAsB,EACtB,aAAa,EACb,YAAY,GACb,GAAG,KAAK,CAAC;IAEV,MAAM,EAAE,EAAE,GAAG,eAAqB,EAAE,GAAG,WAAW,CAAC;IAEnD,MAAM,IAAA,oBAAW,kCAAM,aAAa,KAAE,UAAU,EAAE,SAAS,KAAI,EAAE,EAAE,EAAE,CAAC,CAAC;IAEvE,YAAY;IACZ,mCAAmC;IACnC,yGAAyG;IACzG,2EAA2E;IAC3E,MAAM,EAAE,CAAC,YAAY,CACnB,GAAS,EAAE;;QACT,MAAM,aAAa,GAA8D;YAC/E,EAAE;YACF,MAAM,EAAE,gBAAM,CAAC,MAAM,EAAO;YAC5B,cAAc,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE;;gBAAC,OAAA;oBAC7B,IAAA,4BAAsB,EAAW,MAAA,KAAK,aAAL,KAAK,uBAAL,KAAK,CAAE,QAAQ,mCAAI,MAAM,CAAC;iBAC5D,CAAA;aAAA;YACD,aAAa,EAAE,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE,EAAE;;gBAC1C,MAAM,OAAO,GAAG,IAAA,gCAAwB,EAGtC;oBACA,KAAK;oBACL,QAAQ,EAAE,qBAAqB;oBAC/B,SAAS,kBACP,cAAc,EAAE,MAAM,EACtB,kBAAkB,EAAE,KAAK;wBACzB,yEAAyE;wBACzE,oBAAoB,EAAE,IAAA,iCAAkB,EAAC;4BACvC,EAAE;4BACF,kBAAkB,EAAE,MAAA,KAAK,aAAL,KAAK,uBAAL,KAAK,CAAE,uBAAuB,mCAAI,WAAW;4BACjE,YAAY,EAAE,MAAA,KAAK,aAAL,KAAK,uBAAL,KAAK,CAAE,iBAAiB,mCAAI,IAAI;yBAC/C,CAAC,IACC,sBAAsB,CAC1B;iBACF,CAAC,CAAC;gBACH,MAAM,YAAY,GAAG,gBAAgB,CAAC,SAAS,CAAQ,CAAC;gBACxD,OAAO,IAAI,YAAY,CAAC,OAAO,CAAC,CAAC;YACnC,CAAC;YACD,MAAM,EAAE,EAAE;YACV,aAAa,EAAE,EAAS;SACzB,CAAC;QAEF,MAAM,KAAK,GAAG,MAAM,IAAA,wBAAgB,kCAC/B,WAAW,KACd,EAAE,EACF,MAAM,EAAE,MAAA,WAAW,CAAC,MAAM,mCAAK,aAAa,CAAC,MAAc,EAC3D,cAAc,EAAE,MAAA,WAAW,CAAC,cAAc,mCAAK,aAAa,CAAC,cAAsB,EACnF,aAAa,EAAE,MAAA,WAAW,CAAC,aAAa,mCAAK,aAAa,CAAC,aAAqB,IAChF,CAAC;QAEH,MAAM,CAAA,YAAY,aAAZ,YAAY,uBAAZ,YAAY,CAAG,KAAK,CAAC,CAAA,CAAC;IAC9B,CAAC,CAAA,EACD,EAAE,aAAa,EAAE,oBAAoB,EAAE,CACxC,CAAC;AACJ,CAAC,CAAA,CAAC;AAxGW,QAAA,sBAAsB,0BAwGjC;AAEF;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AACI,MAAM,gBAAgB,GAAG,CAM9B,MAAmE,EACnB,EAAE;IAClD,MAAM,EAAE,EAAE,GAAG,eAAqB,EAAE,GAAG,MAAM,CAAC;IAE9C,qDAAqD;IACrD,MAAM,KAAK,GAAG,EAAE,CAAC;IAEjB,0BAA0B;IAC1B,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK;QAC3B,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC;YACpB,CAAC,CAAC,MAAM,MAAM,CAAC,KAAK,iCAAM,MAAM,KAAE,EAAE,IAAG;YACvC,CAAC,CAAC,MAAM,CAAC,KAAK;QAChB,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAS,CAAC;IAC/B,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,YAAY,CAAe,QAAQ,EAAE,KAAK,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC;IAEvF,IAAI,MAAM,CAAC,aAAa;QAAE,MAAM,MAAM,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;IAE5D,gFAAgF;IAChF,MAAM,SAAS,GAAG,GAAG,EAAE,CAAC,iCAAM,MAAM,KAAE,KAAK,EAAE,KAAK,EAAE,EAAE,IAAG,CAAC;IAE1D,eAAe;IACf,MAAM,YAAY,GAChB,MAAM,CAAC,KAAK,IAAI,IAAI,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,+BAA+B,CAAC,KAAK,aAAL,KAAK,cAAL,KAAK,GAAI,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IAClG,MAAM,KAAK,GACT,MAAM,CAAC,KAAK,IAAI,IAAI;QAClB,CAAC,CAAC,YAAY;QACd,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC;YACtB,CAAC,CAAC,MAAM,MAAM,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC;YACjC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;IAEnB,+BAA+B;IAC/B,MAAM,MAAM,GAAuB,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC;QACxD,CAAC,CAAC,MAAM,CAAC,MAAM;QACf,CAAC,CAAC,MAAO,MAAM,CAAC,MAAc,CAAC,SAAS,EAAE,CAAC,CAAC;IAC9C,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,kBAAkB;IAC3G,MAAM,aAAa,GAAG,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,aAAa,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,kBAAkB;IACvI,MAAM,cAAc,GAAG,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,cAAc,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,kBAAkB;IAE3I,yBAAyB;IACzB,MAAM,WAAW,GAAG,GAAG,EAAE,CAAC,CAAC;QACzB,EAAE;QACF,MAAM;QACN,MAAM;QACN,aAAa;QACb,KAAK;QACL,MAAM;QACN,KAAK;QACL,KAAK;KACN,CAAC,CAAC;IACH,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC,WAAW,EAAE,CAAC,CAAC;IAE1D,mCAAmC;IACnC,MAAM,QAAQ,mBAAK,OAAO,IAAK,WAAW,EAAE,CAAE,CAAC;IAC/C,MAAM,UAAU,GAAG,sBAAsB,CAAC,QAAQ,CAAC,CAAC;IACpD,MAAM,SAAS,GAAG,qBAAqB,CAAC,QAAQ,CAAC,CAAC;IAClD,MAAM,cAAc,GAAG,oBAAoB,CAAC,QAAQ,CAAC,CAAC;IACtD,MAAM,iBAAiB,GAAG,wBAAwB,CAAC,QAAQ,CAAC,CAAC;IAC7D,MAAM,SAAS,GAAG,MAAM,qBAAqB,CAAC,QAAQ,CAAC,CAAC;IAExD,MAAM,KAAK,GAAG,gCACT,QAAQ,KACX,OAAO;QACP,UAAU;QACV,SAAS,EACT,QAAQ,EAAE,cAAc,EACxB,YAAY,EAAE,iBAAiB,EAC/B,SAAS,GACsC,CAAC;IAElD,0DAA0D;IAC1D,MAAM,aAAa,GAAG,EAAE,KAAK,EAAE,QAAQ,EAAE,cAAc,EAAE,CAAC;IAE1D,gBAAgB;IAChB,MAAM,IAAA,0BAAiB,EAAkE;QACvF,EAAE;QACF,MAAM;QACN,cAAc;QACd,aAAa;QACb,MAAM;QACN,aAAa;QACb,KAAK;KACN,CAAC,CAAC;IACH,MAAM,IAAA,yBAAgB,EAA2D;QAC/E,MAAM;QACN,cAAc;QACd,aAAa;QACb,aAAa;KACd,CAAC,CAAC;IAEH,OAAO,KAAK,CAAC;AACf,CAAC,CAAA,CAAC;AAhGW,QAAA,gBAAgB,oBAgG3B;AAEF,MAAM,YAAY,GAAG,CACnB,KAAoB,EACpB,KAA8B,EAC9B,OAA+B,EAC/B,EAAE;;IACF,MAAM,EAAE,EAAE,GAAG,eAAuB,EAAE,GAAG,OAAO,aAAP,OAAO,cAAP,OAAO,GAAI,EAAE,CAAC;IACvD,MAAM,EAAE,cAAc,EAAE,uBAAuB,EAAE,GAAG,CAAC,KAAK,aAAL,KAAK,cAAL,KAAK,GAAI,EAAE,CAAoB,CAAC;IAErF,MAAM,YAAY,GAAG,cAAc,CAAC,CAAC,CAAC,MAAM,0BAAW,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC,IAAI,EAAU,CAAC,CAAC,CAAC,IAAI,CAAC;IAClG,MAAM,OAAO,GAAG,SAAS,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE,EAAE,EAAE,uBAAuB,CAAC,CAAC;IACzE,MAAM,aAAa,GAAG,MAAA,CAAC,MAAM,CAAA,OAAO,aAAP,OAAO,uBAAP,OAAO,EAAI,CAAA,CAAC,mCAAI,IAAI,CAAC;IAClD,MAAM,aAAa,iDAAQ,YAAY,GAAK,aAAa,GAAK,KAAK,CAAE,CAAC;IAEtE,OAAO,aAAkB,CAAC;AAC5B,CAAC,CAAA,CAAC;AAEF;;;;GAIG;AACH,MAAM,sBAAsB,GAAG,CAM7B,KAGC,EACD,EAAE;;IACF,MAAM,EACJ,sBAAsB,EACtB,qBAAqB,EACrB,mBAAmB,EACnB,kBAAkB,EAClB,qBAAqB,EACrB,oBAAoB,EACpB,kBAAkB,EAClB,iBAAiB,EACjB,kBAAkB,EAClB,yBAAyB,GAC1B,GAAG,CAAC,MAAA,KAAK,CAAC,KAAK,mCAAI,EAAE,CAAyC,CAAC;IAEhE,MAAM,SAAS,GAAG,qBAAqB,CAAC,KAAK,CAAC,CAAC;IAE/C,MAAM,UAAU,GAAoB,CAAO,QAAQ,EAAE,OAAO,EAAE,EAAE;;QAC9D,2CAA2C;QAC3C,IAAI,kBAAkB,IAAI,yBAAyB,KAAK,WAAW,EAAE;YACnE,MAAM,KAAK,GAAG,MAAM,KAAK,CAAC,EAAE,CAAC,iBAAiB,CAAC,kBAAkB,CAAC,CAAC;YACnE,MAAM,KAAK,CAAC,IAAI,EAAE,CAAC;SACpB;QAED,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,qBAAqB,CAAC,2CAAI,CAAA,CAAC;QAClD,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,kBAAkB,CAAC,2CAAI,CAAA,CAAC;QAC/C,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,sBAAsB,CAAC,2CAAI,CAAA,CAAC;QACnD,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,mBAAmB,CAAC,2CAAI,CAAA,CAAC;QAEhD,MAAM,MAAM,GAAG,MAAM,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QAE1D,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,oBAAoB,CAAC,2CAAI,CAAA,CAAC;QACjD,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,iBAAiB,CAAC,2CAAI,CAAA,CAAC;QAC9C,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,qBAAqB,CAAC,2CAAI,CAAA,CAAC;QAClD,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,kBAAkB,CAAC,2CAAI,CAAA,CAAC;QAE/C,iDAAiD;QACjD,MAAM,SAAS,EAAE,CAAC;QAElB,OAAO,MAAM,CAAC;IAChB,CAAC,CAAA,CAAC;IAEF,OAAO,UAAU,CAAC;AACpB,CAAC,CAAC;AAEF,mFAAmF;AACnF,MAAM,qBAAqB,GAAG,CAAC,KAAyC,EAAE,EAAE;IAC1E,iDAAiD;IACjD,MAAM,SAAS,GAAc,CAAO,SAA+B,EAAE,EAAE;;QACrE,MAAM,EACJ,gBAAgB,EAChB,mBAAmB,EACnB,mBAAmB,GACpB,GAAG,IAAA,iBAAQ,EAAC,EAAE,EAAE,SAAS,EAAE,MAAA,KAAK,CAAC,KAAK,mCAAI,EAAE,CAAC,CAAC,CAAC,kBAAkB;QAElE,IAAI,CAAC,gBAAgB;YAAE,OAAO;QAE9B,MAAM,KAAK,CAAC,EAAE,CAAC,wBAAwB,CAAC,gBAAgB,EAAE,mBAAmB,EAAE;YAC7E,KAAK,EAAE,mBAAmB;SAC3B,CAAC,CAAC;IACL,CAAC,CAAA,CAAC;IAEF,OAAO,SAAS,CAAC;AACnB,CAAC,CAAC;AAEF,uEAAuE;AACvE,MAAM,oBAAoB,GAAG,CAAC,KAAmD,EAAE,EAAE;;IACnF,MAAM,EACJ,mBAAmB,EACnB,cAAc,EACd,gBAAgB,EAChB,eAAe,EACf,YAAY,EACZ,eAAe,EACf,gBAAgB,EAChB,kBAAkB,EAClB,kBAAkB,EAClB,sBAAsB,EACtB,yBAAyB,GAC1B,GAAG,CAAC,MAAA,KAAK,CAAC,KAAK,mCAAI,EAAE,CAA6D,CAAC;IAEpF,MAAM,cAAc,GAA6B,CAAO,OAAO,EAAE,GAAG,EAAE,OAAO,EAAE,EAAE;QAC/E,MAAM,WAAW,GAAG,SAAS,CAAC,KAAK,EAAE,eAAe,CAAC,CAAC;QACtD,MAAM,QAAQ,GAAG,SAAS,CAAC,KAAK,EAAE,YAAY,CAAC,CAAC;QAEhD,MAAM,aAAa,GAAG,gBACpB,EAAE,EAAE,KAAK,CAAC,EAAE,EACZ,WAAW,EAAE,mBAAmB,EAChC,QAAQ,EAAE,gBAAgB,EAC1B,QAAQ,EAAE,gBAAgB,EAC1B,SAAS,EAAE,kBAAkB,EAC7B,SAAS,EAAE,WAAW,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,EAChE,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,EACvD,SAAS,EAAE,eAAe,EAC1B,cAAc,EACd,YAAY,EAAE,kBAAkB,EAChC,gBAAgB,EAAE,sBAAsB,EACxC,mBAAmB,EAAE,yBAAyB,IAC3C,OAAO,CACuB,CAAC;QAEpC,OAAO,IAAA,mBAAQ,EAAC,OAAO,EAAE,GAAG,EAAE,aAAa,CAAC,CAAC;IAC/C,CAAC,CAAA,CAAC;IAEF,OAAO,cAAc,CAAC;AACxB,CAAC,CAAC;AAEF,2EAA2E;AAC3E,MAAM,wBAAwB,GAAG,CAAC,KAAmD,EAAE,EAAE;;IACvF,MAAM,EAAE,cAAc,EAAE,iBAAiB,EAAE,gBAAgB,EAAE,aAAa,EAAE,GAAG,CAAC,MAAA,KAAK,CAAC,KAAK,mCACzF,EAAE,CAAsB,CAAC;IAE3B,MAAM,iBAAiB,GAAiC,CAAO,OAAO,EAAE,GAAG,EAAE,OAAO,EAAE,EAAE;QACtF,MAAM,WAAW,GAAG,SAAS,CAAC,KAAK,EAAE,gBAAgB,CAAC,CAAC;QACvD,MAAM,QAAQ,GAAG,SAAS,CAAC,KAAK,EAAE,aAAa,CAAC,CAAC;QAEjD,MAAM,aAAa,GAAG,gBACpB,EAAE,EAAE,KAAK,CAAC,EAAE,EACZ,QAAQ,EAAE,iBAAiB,EAC3B,SAAS,EAAE,WAAW,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,EAChE,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,EACvD,cAAc,IACX,OAAO,CACwB,CAAC;QAErC,OAAO,IAAA,2BAAY,EAAC,OAAO,EAAE,GAAG,EAAE,aAAa,CAAC,CAAC;IACnD,CAAC,CAAA,CAAC;IAEF,OAAO,iBAAiB,CAAC;AAC3B,CAAC,CAAC;AAEF,4DAA4D;AACrD,MAAM,wBAAwB,GAAG,CAGtC,EACA,KAAK,EACL,QAAQ,EACR,SAAS,GAcV,EAAE,EAAE;IACH,MAAM,sBAAsB,GAAG,CAAoC,MAAS,EAAE,EAAE,CAC9E,IAAA,aAAI,EAAC,MAAM,EAAE,MAAM,CAAC,IAAI,CAAC,qBAAY,CAAC,CAAC,CAAC;IAE1C,OAAO,8CAEF,IAAA,eAAM,EAAC,QAAQ,aAAR,QAAQ,cAAR,QAAQ,GAAK,EAAY,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,KAAK,SAAS,CAAC,GAEjE,IAAA,eAAM,EAAC,sBAAsB,CAAC,KAAK,aAAL,KAAK,cAAL,KAAK,GAAI,EAAE,CAAC,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,KAAK,SAAS,CAAC,GAE3E,IAAA,eAAM,EAAC,SAAS,aAAT,SAAS,cAAT,SAAS,GAAK,EAAY,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,KAAK,SAAS,CAAC,CAC7C,CAAC;AAC7B,CAAC,CAAC;AAhCW,QAAA,wBAAwB,4BAgCnC;AAEF,MAAM,qBAAqB,GAAG,CAAO,KAAmD,EAAE,EAAE;;IAC1F,MAAM,EAAE,SAAS,EAAE,oBAAoB,EAAE,qBAAqB,EAAE,GAAG,CAAC,MAAA,KAAK,CAAC,KAAK,mCAC7E,EAAE,CAAwB,CAAC;IAE7B,MAAM,OAAO,GAAG,CAAC,GAAG,CAAC,SAAS,aAAT,SAAS,cAAT,SAAS,GAAI,EAAE,CAAC,CAAC,CAAC;IAEvC,IAAI,oBAAoB,EAAE;QACxB,MAAM,CAAC,SAAS,EAAE,KAAK,CAAC,GAAG,oBAAoB,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC3D,MAAM,eAAe,GAAG,MAAM,IAAA,8BAAoB,EAAM,SAAS,EAAE,KAAK,EAAE,EAAE,EAAE,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,CAAC;QAC5F,OAAO,CAAC,IAAI,CAAC,GAAG,eAAe,CAAC,CAAC;KAClC;IAED,IAAI,qBAAqB,EAAE;QACzB,MAAM,UAAU,GAAG,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,qBAAqB,CAAC,2CAAI,CAAA,CAAC;QACrE,OAAO,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,CAAC;KAC7B;IAED,OAAO,OAAO,CAAC;AACjB,CAAC,CAAA,CAAC","sourcesContent":["import {\n BasicCrawler,\n CrawlingContext,\n RouterHandler,\n BasicCrawlerOptions,\n CheerioCrawler,\n Router,\n HttpCrawler,\n JSDOMCrawler,\n PlaywrightCrawler,\n PuppeteerCrawler,\n} from 'crawlee';\nimport { omitBy, pick, defaults } from 'lodash';\nimport * as Sentry from '@sentry/node';\nimport { gotScraping } from 'got-scraping';\n\nimport type { CrawlerMeta, CrawlerType } from '../../types';\nimport type { MaybePromise, PickPartial } from '../../utils/types';\nimport { createErrorHandler } from '../error/errorHandler';\nimport { setupSentry } from '../error/sentry';\nimport { type PushDataOptions, itemCacheKey, pushData } from '../io/pushData';\nimport { getColumnFromDataset } from '../io/dataset';\nimport { PushRequestsOptions, pushRequests } from '../io/pushRequests';\nimport type { CrawleeOneIO } from '../integrations/types';\nimport { apifyIO } from '../integrations/apify';\nimport { registerHandlers, setupDefaultRoute } from '../router';\nimport {\n CrawlerConfigActorInput,\n OutputActorInput,\n MetamorphActorInput,\n PrivacyActorInput,\n crawlerInput,\n StartUrlsActorInput,\n InputActorInput,\n RequestActorInput,\n AllActorInputs,\n} from '../config';\nimport { logLevelHandlerWrapper } from '../log';\nimport type {\n ActorContext,\n ActorDefinition,\n ActorHookContext,\n ActorRouterContext,\n Metamorph,\n RunCrawler,\n} from './types';\n\nconst actorClassByType = {\n basic: BasicCrawler,\n http: HttpCrawler,\n cheerio: CheerioCrawler,\n jsdom: JSDOMCrawler,\n playwright: PlaywrightCrawler,\n puppeteer: PuppeteerCrawler,\n} satisfies Record<CrawlerType, { new (options: Record<string, any>): any }>;\n\nconst isRouter = (r: any): r is RouterHandler<any> => {\n return !!((r as RouterHandler).addHandler && (r as RouterHandler).addDefaultHandler);\n};\nconst isFunc = (f: any): f is (...args: any[]) => any => {\n return typeof f === 'function';\n};\n\n/** Run a function that was defined as a string via Actor input */\nconst genHookFn = <\n Ctx extends CrawlingContext<any> = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO\n>(\n actor: Pick<ActorContext<Ctx, Labels, Input, TIO>, 'input' | 'state' | 'io'>,\n fnStr?: string\n) => {\n if (!fnStr) return null;\n\n const hookCtx = {\n io: actor.io,\n input: actor.input,\n state: actor.state,\n itemCacheKey,\n sendRequest: gotScraping,\n } satisfies ActorHookContext<TIO>;\n\n const hookFn = eval(fnStr);\n if (!hookFn) return null;\n\n return async (...args) => hookFn(...args, hookCtx);\n};\n\n/**\n * Create default configuration for an opinionated Crawlee actor,\n * and run the actor within Apify's `Actor.main()` context.\n *\n * Apify context can be replaced with custom implementation using the `actorConfig.io` option.\n *\n * Read more about what this actor does at {@link createCrawleeOne}.\n */\nexport const createAndRunCrawleeOne = async <\n TCrawlerType extends CrawlerType,\n Ctx extends CrawlerMeta<TCrawlerType, any>['context'] = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO\n>(input: {\n /** String idetifying the actor class, e.g. `'cheerio'` */\n actorType: TCrawlerType;\n actorName: string;\n /** Config passed to the {@link createCrawleeOne} */\n actorConfig: PickPartial<\n ActorDefinition<Ctx, Labels, Input, TIO>,\n 'router' | 'createCrawler' | 'io'\n >;\n /**\n * If using default `createCrawler` implementation, these are crawler options\n * that may be overriden by user input.\n */\n crawlerConfigDefaults?: CrawlerMeta<TCrawlerType, any>['options'];\n /**\n * If using default `createCrawler` implementation, these are crawler options\n * that will override user input.\n *\n * This is useful for testing env.\n */\n crawlerConfigOverrides?: CrawlerMeta<TCrawlerType, any>['options'];\n /**\n * Sentry configuration. If using default `createCrawler` implementation,\n * failed requests are optionally reported to Sentry.\n *\n * To disable Sentry, set `\"enabled\": false`.\n */\n sentryOptions?: Sentry.NodeOptions;\n /**\n * Callback with the created actor. The callback is called within\n * the `Actor.main()` context.\n */\n onActorReady?: (actor: ActorContext<Ctx, Labels, Input, TIO>) => MaybePromise<void>;\n}): Promise<void> => {\n const {\n actorType,\n actorName,\n actorConfig,\n crawlerConfigDefaults,\n crawlerConfigOverrides,\n sentryOptions,\n onActorReady,\n } = input;\n\n const { io = apifyIO as any as TIO } = actorConfig;\n\n await setupSentry({ ...sentryOptions, serverName: actorName }, { io });\n\n // See docs:\n // - https://docs.apify.com/sdk/js/\n // - https://docs.apify.com/academy/deploying-your-code/inputs-outputs#accepting-input-with-the-apify-sdk\n // - https://docs.apify.com/sdk/js/docs/upgrading/upgrading-to-v3#apify-sdk\n await io.runInContext(\n async () => {\n const actorDefaults: ActorDefinition<Ctx, Labels, Input & AllActorInputs, TIO> = {\n io,\n router: Router.create<Ctx>(),\n routerWrappers: ({ input }) => [\n logLevelHandlerWrapper<Ctx, any>(input?.logLevel ?? 'info'),\n ],\n createCrawler: ({ router, proxy, input }) => {\n const options = createHttpCrawlerOptions<\n CrawlerMeta<TCrawlerType, any>['options'],\n Input\n >({\n input,\n defaults: crawlerConfigDefaults,\n overrides: {\n requestHandler: router,\n proxyConfiguration: proxy,\n // Capture errors in a separate (Apify) Dataset and pass errors to Sentry\n failedRequestHandler: createErrorHandler({\n io,\n reportingDatasetId: input?.errorReportingDatasetId ?? 'REPORTING',\n sendToSentry: input?.errorSendToSentry ?? true,\n }),\n ...crawlerConfigOverrides,\n },\n });\n const CrawlerClass = actorClassByType[actorType] as any;\n return new CrawlerClass(options);\n },\n routes: [],\n routeHandlers: {} as any,\n };\n\n const actor = await createCrawleeOne<Ctx, Labels, Input, TIO>({\n ...actorConfig,\n io,\n router: actorConfig.router ?? (actorDefaults.router as any),\n routerWrappers: actorConfig.routerWrappers ?? (actorDefaults.routerWrappers as any),\n createCrawler: actorConfig.createCrawler ?? (actorDefaults.createCrawler as any),\n });\n\n await onActorReady?.(actor);\n },\n { statusMessage: 'Crawling finished!' }\n );\n};\n\n/**\n * Create opinionated Crawlee crawler that uses router for handling requests.\n *\n * This is a quality-of-life function that does the following for you:\n *\n * 1) Full TypeScript coverage - Ensure all components use the same Crawler / CrawlerContext.\n *\n * 2) Get Actor input from `Actor.getInput` if not given.\n *\n * 3) (Optional) Validate Actor input\n *\n * 4) Set up router such that requests that reach default route are\n * redirected to labelled routes based on which item from \"routes\" they match.\n *\n * 5) Register all route handlers for you.\n *\n * 6) (Optional) Wrap all route handlers in a wrapper. Use this e.g.\n * if you want to add a field to the context object, or handle errors\n * from a single place.\n *\n * 7) (Optional) Support transformation and filtering of (scraped) entries,\n * configured via Actor input.\n *\n * 8) (Optional) Support Actor metamorphing, configured via Actor input.\n *\n * 9) Apify context (e.g. calling `Actor.getInput`) can be replaced with custom\n * implementation using the `io` option.\n */\nexport const createCrawleeOne = async <\n Ctx extends CrawlingContext = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO\n>(\n config: PickPartial<ActorDefinition<Ctx, Labels, Input, TIO>, 'io'>\n): Promise<ActorContext<Ctx, Labels, Input, TIO>> => {\n const { io = apifyIO as any as TIO } = config;\n\n // Mutable state that is available to the actor hooks\n const state = {};\n\n // Initialize actor inputs\n const rawInput = config.input\n ? isFunc(config.input)\n ? await config.input({ ...config, io })\n : config.input\n : await io.getInput<Input>();\n const input = Object.freeze(await resolveInput<Input | null>(rawInput, state, { io }));\n\n if (config.validateInput) await config.validateInput(input);\n\n // This is context that is available to options that use initialization function\n const getConfig = () => ({ ...config, input, state, io });\n\n // Set up proxy\n const defaultProxy =\n config.proxy == null ? await io.createDefaultProxyConfiguration(input ?? undefined) : undefined;\n const proxy =\n config.proxy == null\n ? defaultProxy\n : isFunc(config.proxy)\n ? await config.proxy(getConfig())\n : config.proxy;\n\n // Run initialization functions\n const router: RouterHandler<Ctx> = isRouter(config.router)\n ? config.router\n : await (config.router as any)(getConfig());\n const routes = isFunc(config.routes) ? await config.routes(getConfig()) : config.routes; // prettier-ignore\n const routeHandlers = isFunc(config.routeHandlers) ? await config.routeHandlers(getConfig()) : config.routeHandlers; // prettier-ignore\n const routerWrappers = isFunc(config.routerWrappers) ? await config.routerWrappers(getConfig()) : config.routerWrappers; // prettier-ignore\n\n // Create Crawlee crawler\n const getActorCtx = () => ({\n io,\n router,\n routes,\n routeHandlers,\n proxy,\n config,\n input,\n state,\n });\n const crawler = await config.createCrawler(getActorCtx());\n\n // Create actor (our custom entity)\n const preActor = { crawler, ...getActorCtx() };\n const runCrawler = createScopedCrawlerRun(preActor);\n const metamorph = createScopedMetamorph(preActor);\n const scopedPushData = createScopedPushData(preActor);\n const scopedPushRequest = createScopedPushRequests(preActor);\n const startUrls = await getStartUrlsFromInput(preActor);\n\n const actor = {\n ...preActor,\n crawler,\n runCrawler,\n metamorph,\n pushData: scopedPushData,\n pushRequests: scopedPushRequest,\n startUrls,\n } satisfies ActorContext<Ctx, Labels, Input, TIO>;\n\n // Extra data that we make available to the route handlers\n const routerContext = { actor, pushData: scopedPushData };\n\n // Set up router\n await setupDefaultRoute<Ctx, ActorRouterContext<Ctx, Labels, Input, TIO>, Labels, Input>({\n io,\n router,\n routerWrappers,\n routerContext,\n routes,\n routeHandlers,\n input,\n });\n await registerHandlers<Ctx, ActorRouterContext<Ctx, Labels, Input, TIO>, Labels>({\n router,\n routerWrappers,\n routerContext,\n routeHandlers,\n });\n\n return actor;\n};\n\nconst resolveInput = async <T extends Record<string, any> | null>(\n input: object | null,\n state: Record<string, unknown>,\n options?: { io?: CrawleeOneIO }\n) => {\n const { io = apifyIO as CrawleeOneIO } = options ?? {};\n const { inputExtendUrl, inputExtendFromFunction } = (input ?? {}) as InputActorInput;\n\n const inputFromUrl = inputExtendUrl ? await gotScraping.get(inputExtendUrl).json<object>() : null;\n const inputFn = genHookFn({ state, input, io }, inputExtendFromFunction);\n const inputFromFunc = (await inputFn?.()) ?? null;\n const extendedInput = { ...inputFromUrl, ...inputFromFunc, ...input };\n\n return extendedInput as T;\n};\n\n/**\n * Create a function that wraps `crawler.run(requests, runOtions)` with additional\n * features like:\n * - Automatically metamorph into another actor after the run finishes\n */\nconst createScopedCrawlerRun = <\n Ctx extends CrawlingContext<any> = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO\n>(\n actor: Omit<\n ActorContext<Ctx, Labels, Input, TIO>,\n 'runCrawler' | 'metamorph' | 'pushData' | 'pushRequests' | 'startUrls'\n >\n) => {\n const {\n requestTransformBefore,\n requestTransformAfter,\n requestFilterBefore,\n requestFilterAfter,\n outputTransformBefore,\n outputTransformAfter,\n outputFilterBefore,\n outputFilterAfter,\n outputCacheStoreId,\n outputCacheActionOnResult,\n } = (actor.input ?? {}) as OutputActorInput & RequestActorInput;\n\n const metamorph = createScopedMetamorph(actor);\n\n const runCrawler: RunCrawler<Ctx> = async (requests, options) => {\n // Clear cache if it was set from the input\n if (outputCacheStoreId && outputCacheActionOnResult === 'overwrite') {\n const store = await actor.io.openKeyValueStore(outputCacheStoreId);\n await store.drop();\n }\n\n await genHookFn(actor, outputTransformBefore)?.();\n await genHookFn(actor, outputFilterBefore)?.();\n await genHookFn(actor, requestTransformBefore)?.();\n await genHookFn(actor, requestFilterBefore)?.();\n\n const runRes = await actor.crawler.run(requests, options);\n\n await genHookFn(actor, outputTransformAfter)?.();\n await genHookFn(actor, outputFilterAfter)?.();\n await genHookFn(actor, requestTransformAfter)?.();\n await genHookFn(actor, requestFilterAfter)?.();\n\n // Trigger metamorph if it was set from the input\n await metamorph();\n\n return runRes;\n };\n\n return runCrawler;\n};\n\n/** Create a function that triggers metamorph, using Actor's inputs as defaults. */\nconst createScopedMetamorph = (actor: Pick<ActorContext, 'input' | 'io'>) => {\n // Trigger metamorph if it was set from the input\n const metamorph: Metamorph = async (overrides?: MetamorphActorInput) => {\n const {\n metamorphActorId,\n metamorphActorBuild,\n metamorphActorInput,\n } = defaults({}, overrides, actor.input ?? {}); // prettier-ignore\n\n if (!metamorphActorId) return;\n\n await actor.io.triggerDownstreamCrawler(metamorphActorId, metamorphActorInput, {\n build: metamorphActorBuild,\n });\n };\n\n return metamorph;\n};\n\n/** pushData wrapper that pre-populates options based on actor input */\nconst createScopedPushData = (actor: Pick<ActorContext, 'input' | 'state' | 'io'>) => {\n const {\n includePersonalData,\n requestQueueId,\n outputMaxEntries,\n outputTransform,\n outputFilter,\n outputDatasetId,\n outputPickFields,\n outputRenameFields,\n outputCacheStoreId,\n outputCachePrimaryKeys,\n outputCacheActionOnResult,\n } = (actor.input ?? {}) as OutputActorInput & PrivacyActorInput & RequestActorInput;\n\n const scopedPushData: ActorContext['pushData'] = async (entries, ctx, options) => {\n const transformFn = genHookFn(actor, outputTransform);\n const filterFn = genHookFn(actor, outputFilter);\n\n const mergedOptions = {\n io: actor.io,\n showPrivate: includePersonalData,\n maxCount: outputMaxEntries,\n pickKeys: outputPickFields,\n remapKeys: outputRenameFields,\n transform: transformFn ? (item) => transformFn(item) : undefined,\n filter: filterFn ? (item) => filterFn(item) : undefined,\n datasetId: outputDatasetId,\n requestQueueId,\n cacheStoreId: outputCacheStoreId,\n cachePrimaryKeys: outputCachePrimaryKeys,\n cacheActionOnResult: outputCacheActionOnResult,\n ...options,\n } satisfies PushDataOptions<object>;\n\n return pushData(entries, ctx, mergedOptions);\n };\n\n return scopedPushData;\n};\n\n/** pushRequests wrapper that pre-populates options based on actor input */\nconst createScopedPushRequests = (actor: Pick<ActorContext, 'input' | 'state' | 'io'>) => {\n const { requestQueueId, requestMaxEntries, requestTransform, requestFilter } = (actor.input ??\n {}) as RequestActorInput;\n\n const scopedPushRequest: ActorContext['pushRequests'] = async (entries, ctx, options) => {\n const transformFn = genHookFn(actor, requestTransform);\n const filterFn = genHookFn(actor, requestFilter);\n\n const mergedOptions = {\n io: actor.io,\n maxCount: requestMaxEntries,\n transform: transformFn ? (item) => transformFn(item) : undefined,\n filter: filterFn ? (item) => filterFn(item) : undefined,\n requestQueueId,\n ...options,\n } satisfies PushRequestsOptions<any>;\n\n return pushRequests(entries, ctx, mergedOptions);\n };\n\n return scopedPushRequest;\n};\n\n/** Given the actor input, create common crawler options. */\nexport const createHttpCrawlerOptions = <\n TOpts extends BasicCrawlerOptions<any> = BasicCrawlerOptions,\n Input extends Record<string, any> = Record<string, any>\n>({\n input,\n defaults,\n overrides,\n}: {\n /** Actor input */\n input: Input | null;\n /**\n * Default config options set by us. These may be overriden\n * by values from actor input (set by user).\n */\n defaults?: TOpts;\n /**\n * These config options will overwrite both the default and user\n * options. This is useful for hard-setting values e.g. in tests.\n */\n overrides?: TOpts;\n}) => {\n const pickCrawlerInputFields = <T extends CrawlerConfigActorInput>(config: T) =>\n pick(config, Object.keys(crawlerInput));\n\n return {\n // ----- 1. DEFAULTS -----\n ...omitBy(defaults ?? ({} as TOpts), (field) => field === undefined),\n // ----- 2. CONFIG FROM INPUT -----\n ...omitBy(pickCrawlerInputFields(input ?? {}), (field) => field === undefined),\n // ----- 3. OVERRIDES - E.G. TEST CONFIG -----\n ...omitBy(overrides ?? ({} as TOpts), (field) => field === undefined),\n } satisfies Partial<TOpts>;\n};\n\nconst getStartUrlsFromInput = async (actor: Pick<ActorContext, 'input' | 'state' | 'io'>) => {\n const { startUrls, startUrlsFromDataset, startUrlsFromFunction } = (actor.input ??\n {}) as StartUrlsActorInput;\n\n const urlsAgg = [...(startUrls ?? [])];\n\n if (startUrlsFromDataset) {\n const [datasetId, field] = startUrlsFromDataset.split('#');\n const urlsFromDataset = await getColumnFromDataset<any>(datasetId, field, { io: actor.io });\n urlsAgg.push(...urlsFromDataset);\n }\n\n if (startUrlsFromFunction) {\n const urlsFromFn = await genHookFn(actor, startUrlsFromFunction)?.();\n urlsAgg.push(...urlsFromFn);\n }\n\n return urlsAgg;\n};\n"]}
|
|
1
|
+
{"version":3,"file":"actor.js","sourceRoot":"","sources":["../../../../src/lib/actor/actor.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,qCAaiB;AACjB,mCAAgD;AAEhD,+CAA2C;AAI3C,wDAA2D;AAC3D,4CAA8C;AAC9C,6CAA8E;AAC9E,2CAAqD;AACrD,qDAAuE;AAEvE,iDAAgD;AAChD,sCAAgE;AAChE,sCAWmB;AACnB,gCAAmE;AAUnE,MAAM,gBAAgB,GAAG;IACvB,KAAK,EAAE,sBAAY;IACnB,IAAI,EAAE,qBAAW;IACjB,OAAO,EAAE,wBAAc;IACvB,KAAK,EAAE,sBAAY;IACnB,UAAU,EAAE,2BAAiB;IAC7B,SAAS,EAAE,0BAAgB;CAC+C,CAAC;AAE7E,MAAM,QAAQ,GAAG,CAAC,CAAM,EAA2B,EAAE;IACnD,OAAO,CAAC,CAAC,CAAE,CAAmB,CAAC,UAAU,IAAK,CAAmB,CAAC,iBAAiB,CAAC,CAAC;AACvF,CAAC,CAAC;AACF,MAAM,MAAM,GAAG,CAAC,CAAM,EAAgC,EAAE;IACtD,OAAO,OAAO,CAAC,KAAK,UAAU,CAAC;AACjC,CAAC,CAAC;AAEF,kEAAkE;AAClE,MAAM,SAAS,GAAG,CAMhB,KAA4E,EAC5E,KAAc,EACd,EAAE;IACF,IAAI,CAAC,KAAK;QAAE,OAAO,IAAI,CAAC;IAExB,MAAM,OAAO,GAAG;QACd,EAAE,EAAE,KAAK,CAAC,EAAE;QACZ,KAAK,EAAE,KAAK,CAAC,KAAK;QAClB,KAAK,EAAE,KAAK,CAAC,KAAK;QAClB,YAAY,EAAZ,uBAAY;QACZ,WAAW,EAAE,0BAAW;KACO,CAAC;IAElC,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC;IAC3B,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC;IAEzB,OAAO,CAAO,GAAG,IAAI,EAAE,EAAE,kDAAC,OAAA,MAAM,CAAC,GAAG,IAAI,EAAE,OAAO,CAAC,CAAA,GAAA,CAAC;AACrD,CAAC,CAAC;AAEF;;;;;;;GAOG;AACI,MAAM,sBAAsB,GAAG,CAMpC,IAiCD,EAAiB,EAAE;IAClB,MAAM,EACJ,SAAS,EACT,SAAS,EACT,WAAW,EACX,qBAAqB,EACrB,sBAAsB,EACtB,aAAa,EACb,YAAY,GACb,GAAG,IAAI,CAAC;IAET,MAAM,EAAE,EAAE,GAAG,eAAqB,EAAE,GAAG,WAAW,CAAC;IAEnD,MAAM,IAAA,oBAAW,kCAAM,aAAa,KAAE,UAAU,EAAE,SAAS,KAAI,EAAE,EAAE,EAAE,CAAC,CAAC;IAEvE,YAAY;IACZ,mCAAmC;IACnC,yGAAyG;IACzG,2EAA2E;IAC3E,MAAM,EAAE,CAAC,YAAY,CACnB,GAAS,EAAE;;QACT,MAAM,aAAa,GAA8D;YAC/E,EAAE;YACF,MAAM,EAAE,gBAAM,CAAC,MAAM,EAAO;YAC5B,cAAc,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE;;gBAAC,OAAA;oBAC7B,IAAA,4BAAsB,EAAW,MAAA,KAAK,aAAL,KAAK,uBAAL,KAAK,CAAE,QAAQ,mCAAI,MAAM,CAAC;iBAC5D,CAAA;aAAA;YACD,aAAa,EAAE,CAAC,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE,EAAE;;gBAC1C,MAAM,OAAO,GAAG,IAAA,gCAAwB,EAGtC;oBACA,KAAK;oBACL,QAAQ,EAAE,qBAAqB;oBAC/B,SAAS,kBACP,cAAc,EAAE,MAAM,EACtB,kBAAkB,EAAE,KAAK;wBACzB,yEAAyE;wBACzE,oBAAoB,EAAE,IAAA,iCAAkB,EAAC;4BACvC,EAAE;4BACF,kBAAkB,EAAE,MAAA,KAAK,aAAL,KAAK,uBAAL,KAAK,CAAE,uBAAuB,mCAAI,WAAW;4BACjE,YAAY,EAAE,MAAA,KAAK,aAAL,KAAK,uBAAL,KAAK,CAAE,iBAAiB,mCAAI,IAAI;yBAC/C,CAAC,IACC,sBAAsB,CAC1B;iBACF,CAAC,CAAC;gBACH,MAAM,YAAY,GAAG,gBAAgB,CAAC,SAAS,CAAQ,CAAC;gBACxD,OAAO,IAAI,YAAY,CAAC,OAAO,CAAC,CAAC;YACnC,CAAC;YACD,MAAM,EAAE,EAAE;YACV,aAAa,EAAE,EAAS;SACzB,CAAC;QAEF,MAAM,KAAK,GAAG,MAAM,IAAA,wBAAgB,kCAC/B,WAAW,KACd,EAAE,EACF,MAAM,EAAE,MAAA,WAAW,CAAC,MAAM,mCAAK,aAAa,CAAC,MAAc,EAC3D,cAAc,EAAE,MAAA,WAAW,CAAC,cAAc,mCAAK,aAAa,CAAC,cAAsB,EACnF,aAAa,EAAE,MAAA,WAAW,CAAC,aAAa,mCAAK,aAAa,CAAC,aAAqB,IAChF,CAAC;QAEH,MAAM,CAAA,YAAY,aAAZ,YAAY,uBAAZ,YAAY,CAAG,KAAK,CAAC,CAAA,CAAC;IAC9B,CAAC,CAAA,EACD,EAAE,aAAa,EAAE,oBAAoB,EAAE,CACxC,CAAC;AACJ,CAAC,CAAA,CAAC;AAxGW,QAAA,sBAAsB,0BAwGjC;AAEF;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AACI,MAAM,gBAAgB,GAAG,CAM9B,MAAmE,EACnB,EAAE;IAClD,MAAM,EAAE,EAAE,GAAG,eAAqB,EAAE,GAAG,MAAM,CAAC;IAE9C,qDAAqD;IACrD,MAAM,KAAK,GAAG,EAAE,CAAC;IAEjB,0BAA0B;IAC1B,MAAM,QAAQ,GAAG,MAAM,CAAC,KAAK;QAC3B,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC;YACpB,CAAC,CAAC,MAAM,MAAM,CAAC,KAAK,iCAAM,MAAM,KAAE,EAAE,IAAG;YACvC,CAAC,CAAC,MAAM,CAAC,KAAK;QAChB,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAS,CAAC;IAC/B,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,YAAY,CAAe,QAAQ,EAAE,KAAK,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC;IAEvF,IAAI,MAAM,CAAC,aAAa;QAAE,MAAM,MAAM,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;IAE5D,MAAM,EAAE,QAAQ,EAAE,GAAG,CAAC,KAAK,aAAL,KAAK,cAAL,KAAK,GAAI,EAAE,CAAsB,CAAC;IACxD,MAAM,GAAG,GAAG,IAAI,aAAG,CAAC,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAC,CAAC,uBAAiB,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC;IAEnF,gFAAgF;IAChF,MAAM,SAAS,GAAG,GAAG,EAAE,CAAC,iCAAM,MAAM,KAAE,KAAK,EAAE,KAAK,EAAE,EAAE,EAAE,GAAG,IAAG,CAAC;IAE/D,eAAe;IACf,MAAM,YAAY,GAChB,MAAM,CAAC,KAAK,IAAI,IAAI,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,+BAA+B,CAAC,KAAK,aAAL,KAAK,cAAL,KAAK,GAAI,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IAClG,MAAM,KAAK,GACT,MAAM,CAAC,KAAK,IAAI,IAAI;QAClB,CAAC,CAAC,YAAY;QACd,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC;YACtB,CAAC,CAAC,MAAM,MAAM,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC;YACjC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;IAEnB,+BAA+B;IAC/B,MAAM,MAAM,GAAuB,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC;QACxD,CAAC,CAAC,MAAM,CAAC,MAAM;QACf,CAAC,CAAC,MAAO,MAAM,CAAC,MAAc,CAAC,SAAS,EAAE,CAAC,CAAC;IAC9C,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,kBAAkB;IAC3G,MAAM,aAAa,GAAG,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,aAAa,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,kBAAkB;IACvI,MAAM,cAAc,GAAG,MAAM,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,cAAc,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,kBAAkB;IAE3I,yBAAyB;IACzB,MAAM,WAAW,GAAG,GAAG,EAAE,CAAC,CAAC;QACzB,EAAE;QACF,MAAM;QACN,MAAM;QACN,aAAa;QACb,KAAK;QACL,MAAM;QACN,KAAK;QACL,KAAK;QACL,GAAG;KACJ,CAAC,CAAC;IACH,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC,WAAW,EAAE,CAAC,CAAC;IAE1D,mCAAmC;IACnC,MAAM,QAAQ,mBAAK,OAAO,IAAK,WAAW,EAAE,CAAE,CAAC;IAC/C,MAAM,UAAU,GAAG,sBAAsB,CAAC,QAAQ,CAAC,CAAC;IACpD,MAAM,SAAS,GAAG,qBAAqB,CAAC,QAAQ,CAAC,CAAC;IAClD,MAAM,cAAc,GAAG,oBAAoB,CAAC,QAAQ,CAAC,CAAC;IACtD,MAAM,iBAAiB,GAAG,wBAAwB,CAAC,QAAQ,CAAC,CAAC;IAC7D,MAAM,SAAS,GAAG,MAAM,qBAAqB,CAAC,QAAQ,CAAC,CAAC;IAExD,MAAM,KAAK,GAAG,gCACT,QAAQ,KACX,OAAO;QACP,UAAU;QACV,SAAS,EACT,QAAQ,EAAE,cAAc,EACxB,YAAY,EAAE,iBAAiB,EAC/B,SAAS,GACsC,CAAC;IAElD,0DAA0D;IAC1D,MAAM,aAAa,GAAG,EAAE,KAAK,EAAE,QAAQ,EAAE,cAAc,EAAE,CAAC;IAE1D,gBAAgB;IAChB,MAAM,IAAA,0BAAiB,EAAkE;QACvF,EAAE;QACF,MAAM;QACN,cAAc;QACd,aAAa;QACb,MAAM;QACN,aAAa;QACb,KAAK;KACN,CAAC,CAAC;IACH,MAAM,IAAA,yBAAgB,EAA2D;QAC/E,MAAM;QACN,cAAc;QACd,aAAa;QACb,aAAa;KACd,CAAC,CAAC;IAEH,2DAA2D;IAC3D,MAAM,iBAAiB,CAAC,SAA6B,CAAC,CAAC;IAEvD,OAAO,KAAK,CAAC;AACf,CAAC,CAAA,CAAC;AAvGW,QAAA,gBAAgB,oBAuG3B;AAEF,MAAM,YAAY,GAAG,CACnB,KAAoB,EACpB,KAA8B,EAC9B,OAA+B,EAC/B,EAAE;;IACF,MAAM,EAAE,EAAE,GAAG,eAAuB,EAAE,GAAG,OAAO,aAAP,OAAO,cAAP,OAAO,GAAI,EAAE,CAAC;IACvD,MAAM,EAAE,cAAc,EAAE,uBAAuB,EAAE,GAAG,CAAC,KAAK,aAAL,KAAK,cAAL,KAAK,GAAI,EAAE,CAAoB,CAAC;IAErF,MAAM,YAAY,GAAG,cAAc,CAAC,CAAC,CAAC,MAAM,0BAAW,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC,IAAI,EAAU,CAAC,CAAC,CAAC,IAAI,CAAC;IAClG,MAAM,OAAO,GAAG,SAAS,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE,EAAE,EAAE,uBAAuB,CAAC,CAAC;IACzE,MAAM,aAAa,GAAG,MAAA,CAAC,MAAM,CAAA,OAAO,aAAP,OAAO,uBAAP,OAAO,EAAI,CAAA,CAAC,mCAAI,IAAI,CAAC;IAClD,MAAM,aAAa,iDAAQ,YAAY,GAAK,aAAa,GAAK,KAAK,CAAE,CAAC;IAEtE,OAAO,aAAkB,CAAC;AAC5B,CAAC,CAAA,CAAC;AAEF;;;;GAIG;AACH,MAAM,sBAAsB,GAAG,CAM7B,KAGC,EACD,EAAE;;IACF,MAAM,EACJ,sBAAsB,EACtB,qBAAqB,EACrB,mBAAmB,EACnB,kBAAkB,EAClB,qBAAqB,EACrB,oBAAoB,EACpB,kBAAkB,EAClB,iBAAiB,EACjB,kBAAkB,EAClB,yBAAyB,GAC1B,GAAG,CAAC,MAAA,KAAK,CAAC,KAAK,mCAAI,EAAE,CAAyC,CAAC;IAEhE,MAAM,SAAS,GAAG,qBAAqB,CAAC,KAAK,CAAC,CAAC;IAE/C,MAAM,UAAU,GAAoB,CAAO,QAAQ,EAAE,OAAO,EAAE,EAAE;;QAC9D,2CAA2C;QAC3C,IAAI,kBAAkB,IAAI,yBAAyB,KAAK,WAAW,EAAE;YACnE,MAAM,KAAK,GAAG,MAAM,KAAK,CAAC,EAAE,CAAC,iBAAiB,CAAC,kBAAkB,CAAC,CAAC;YACnE,MAAM,KAAK,CAAC,IAAI,EAAE,CAAC;SACpB;QAED,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,qBAAqB,CAAC,2CAAI,CAAA,CAAC;QAClD,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,kBAAkB,CAAC,2CAAI,CAAA,CAAC;QAC/C,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,sBAAsB,CAAC,2CAAI,CAAA,CAAC;QACnD,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,mBAAmB,CAAC,2CAAI,CAAA,CAAC;QAEhD,MAAM,MAAM,GAAG,MAAM,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QAE1D,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,oBAAoB,CAAC,2CAAI,CAAA,CAAC;QACjD,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,iBAAiB,CAAC,2CAAI,CAAA,CAAC;QAC9C,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,qBAAqB,CAAC,2CAAI,CAAA,CAAC;QAClD,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,kBAAkB,CAAC,2CAAI,CAAA,CAAC;QAE/C,iDAAiD;QACjD,MAAM,SAAS,EAAE,CAAC;QAElB,OAAO,MAAM,CAAC;IAChB,CAAC,CAAA,CAAC;IAEF,OAAO,UAAU,CAAC;AACpB,CAAC,CAAC;AAEF,mFAAmF;AACnF,MAAM,qBAAqB,GAAG,CAAC,KAAyC,EAAE,EAAE;IAC1E,iDAAiD;IACjD,MAAM,SAAS,GAAc,CAAO,SAA+B,EAAE,EAAE;;QACrE,MAAM,EACJ,gBAAgB,EAChB,mBAAmB,EACnB,mBAAmB,GACpB,GAAG,IAAA,iBAAQ,EAAC,EAAE,EAAE,SAAS,EAAE,MAAA,KAAK,CAAC,KAAK,mCAAI,EAAE,CAAC,CAAC,CAAC,kBAAkB;QAElE,IAAI,CAAC,gBAAgB;YAAE,OAAO;QAE9B,MAAM,KAAK,CAAC,EAAE,CAAC,wBAAwB,CAAC,gBAAgB,EAAE,mBAAmB,EAAE;YAC7E,KAAK,EAAE,mBAAmB;SAC3B,CAAC,CAAC;IACL,CAAC,CAAA,CAAC;IAEF,OAAO,SAAS,CAAC;AACnB,CAAC,CAAC;AAEF,uEAAuE;AACvE,MAAM,oBAAoB,GAAG,CAAC,KAA2D,EAAE,EAAE;;IAC3F,MAAM,EACJ,mBAAmB,EACnB,cAAc,EACd,gBAAgB,EAChB,eAAe,EACf,YAAY,EACZ,eAAe,EACf,gBAAgB,EAChB,kBAAkB,EAClB,kBAAkB,EAClB,sBAAsB,EACtB,yBAAyB,GAC1B,GAAG,CAAC,MAAA,KAAK,CAAC,KAAK,mCAAI,EAAE,CAA6D,CAAC;IAEpF,MAAM,cAAc,GAA6B,CAAO,OAAO,EAAE,GAAG,EAAE,OAAO,EAAE,EAAE;QAC/E,MAAM,WAAW,GAAG,SAAS,CAAC,KAAK,EAAE,eAAe,CAAC,CAAC;QACtD,MAAM,QAAQ,GAAG,SAAS,CAAC,KAAK,EAAE,YAAY,CAAC,CAAC;QAEhD,MAAM,aAAa,GAAG,gBACpB,EAAE,EAAE,KAAK,CAAC,EAAE,EACZ,GAAG,EAAE,KAAK,CAAC,GAAG,EACd,WAAW,EAAE,mBAAmB,EAChC,QAAQ,EAAE,gBAAgB,EAC1B,QAAQ,EAAE,gBAAgB,EAC1B,SAAS,EAAE,kBAAkB,EAC7B,SAAS,EAAE,WAAW,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,EAChE,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,EACvD,SAAS,EAAE,eAAe,EAC1B,cAAc,EACd,YAAY,EAAE,kBAAkB,EAChC,gBAAgB,EAAE,sBAAsB,EACxC,mBAAmB,EAAE,yBAAyB,IAC3C,OAAO,CACuB,CAAC;QAEpC,OAAO,IAAA,mBAAQ,EAAC,OAAO,EAAE,GAAG,EAAE,aAAa,CAAC,CAAC;IAC/C,CAAC,CAAA,CAAC;IAEF,OAAO,cAAc,CAAC;AACxB,CAAC,CAAC;AAEF,2EAA2E;AAC3E,MAAM,wBAAwB,GAAG,CAAC,KAA2D,EAAE,EAAE;;IAC/F,MAAM,EAAE,cAAc,EAAE,iBAAiB,EAAE,gBAAgB,EAAE,aAAa,EAAE,GAAG,CAAC,MAAA,KAAK,CAAC,KAAK,mCACzF,EAAE,CAAsB,CAAC;IAE3B,MAAM,iBAAiB,GAAiC,CAAO,OAAO,EAAE,OAAO,EAAE,EAAE;QACjF,MAAM,WAAW,GAAG,SAAS,CAAC,KAAK,EAAE,gBAAgB,CAAC,CAAC;QACvD,MAAM,QAAQ,GAAG,SAAS,CAAC,KAAK,EAAE,aAAa,CAAC,CAAC;QAEjD,MAAM,aAAa,GAAG,gBACpB,EAAE,EAAE,KAAK,CAAC,EAAE,EACZ,GAAG,EAAE,KAAK,CAAC,GAAG,EACd,QAAQ,EAAE,iBAAiB,EAC3B,SAAS,EAAE,WAAW,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,EAChE,MAAM,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,EACvD,cAAc,IACX,OAAO,CACwB,CAAC;QAErC,OAAO,IAAA,2BAAY,EAAC,OAAO,EAAE,aAAa,CAAC,CAAC;IAC9C,CAAC,CAAA,CAAC;IAEF,OAAO,iBAAiB,CAAC;AAC3B,CAAC,CAAC;AAEF,4DAA4D;AACrD,MAAM,wBAAwB,GAAG,CAGtC,EACA,KAAK,EACL,QAAQ,EACR,SAAS,GAcV,EAAE,EAAE;IACH,MAAM,sBAAsB,GAAG,CAAoC,MAAS,EAAE,EAAE,CAC9E,IAAA,aAAI,EAAC,MAAM,EAAE,MAAM,CAAC,IAAI,CAAC,qBAAY,CAAC,CAAC,CAAC;IAE1C,OAAO,8CAEF,IAAA,eAAM,EAAC,QAAQ,aAAR,QAAQ,cAAR,QAAQ,GAAK,EAAY,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,KAAK,SAAS,CAAC,GAEjE,IAAA,eAAM,EAAC,sBAAsB,CAAC,KAAK,aAAL,KAAK,cAAL,KAAK,GAAI,EAAE,CAAC,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,KAAK,SAAS,CAAC,GAE3E,IAAA,eAAM,EAAC,SAAS,aAAT,SAAS,cAAT,SAAS,GAAK,EAAY,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,KAAK,SAAS,CAAC,CAC7C,CAAC;AAC7B,CAAC,CAAC;AAhCW,QAAA,wBAAwB,4BAgCnC;AAEF,MAAM,qBAAqB,GAAG,CAC5B,KAA2D,EAC3D,EAAE;;IACF,MAAM,EAAE,SAAS,EAAE,oBAAoB,EAAE,qBAAqB,EAAE,GAAG,CAAC,MAAA,KAAK,CAAC,KAAK,mCAC7E,EAAE,CAAwB,CAAC;IAE7B,MAAM,OAAO,GAAG,CAAC,GAAG,CAAC,SAAS,aAAT,SAAS,cAAT,SAAS,GAAI,EAAE,CAAC,CAAC,CAAC;IAEvC,IAAI,oBAAoB,EAAE;QACxB,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,mCAAmC,oBAAoB,EAAE,CAAC,CAAC;QAC3E,MAAM,CAAC,SAAS,EAAE,KAAK,CAAC,GAAG,oBAAoB,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC3D,MAAM,eAAe,GAAG,MAAM,IAAA,8BAAoB,EAAM,SAAS,EAAE,KAAK,EAAE,EAAE,EAAE,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,CAAC;QAC5F,OAAO,CAAC,IAAI,CAAC,GAAG,eAAe,CAAC,CAAC;KAClC;IAED,IAAI,qBAAqB,EAAE;QACzB,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,kCAAkC,CAAC,CAAC;QACpD,MAAM,UAAU,GAAG,MAAM,CAAA,MAAA,SAAS,CAAC,KAAK,EAAE,qBAAqB,CAAC,2CAAI,CAAA,CAAC;QACrE,OAAO,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,CAAC;KAC7B;IAED,OAAO,OAAO,CAAC;AACjB,CAAC,CAAA,CAAC","sourcesContent":["import {\n BasicCrawler,\n CrawlingContext,\n RouterHandler,\n BasicCrawlerOptions,\n CheerioCrawler,\n Router,\n HttpCrawler,\n JSDOMCrawler,\n PlaywrightCrawler,\n PuppeteerCrawler,\n Log,\n Request as CrawleeRequest,\n} from 'crawlee';\nimport { omitBy, pick, defaults } from 'lodash';\nimport * as Sentry from '@sentry/node';\nimport { gotScraping } from 'got-scraping';\n\nimport type { CrawlerMeta, CrawlerType } from '../../types';\nimport type { MaybePromise, PickPartial } from '../../utils/types';\nimport { createErrorHandler } from '../error/errorHandler';\nimport { setupSentry } from '../error/sentry';\nimport { type PushDataOptions, itemCacheKey, pushData } from '../io/pushData';\nimport { getColumnFromDataset } from '../io/dataset';\nimport { PushRequestsOptions, pushRequests } from '../io/pushRequests';\nimport type { CrawleeOneIO } from '../integrations/types';\nimport { apifyIO } from '../integrations/apify';\nimport { registerHandlers, setupDefaultRoute } from '../router';\nimport {\n CrawlerConfigActorInput,\n OutputActorInput,\n MetamorphActorInput,\n PrivacyActorInput,\n crawlerInput,\n StartUrlsActorInput,\n InputActorInput,\n RequestActorInput,\n AllActorInputs,\n LoggingActorInput,\n} from '../config';\nimport { logLevelHandlerWrapper, logLevelToCrawlee } from '../log';\nimport type {\n ActorContext,\n ActorDefinition,\n ActorHookContext,\n ActorRouterContext,\n Metamorph,\n RunCrawler,\n} from './types';\n\nconst actorClassByType = {\n basic: BasicCrawler,\n http: HttpCrawler,\n cheerio: CheerioCrawler,\n jsdom: JSDOMCrawler,\n playwright: PlaywrightCrawler,\n puppeteer: PuppeteerCrawler,\n} satisfies Record<CrawlerType, { new (options: Record<string, any>): any }>;\n\nconst isRouter = (r: any): r is RouterHandler<any> => {\n return !!((r as RouterHandler).addHandler && (r as RouterHandler).addDefaultHandler);\n};\nconst isFunc = (f: any): f is (...args: any[]) => any => {\n return typeof f === 'function';\n};\n\n/** Run a function that was defined as a string via Actor input */\nconst genHookFn = <\n Ctx extends CrawlingContext<any> = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO\n>(\n actor: Pick<ActorContext<Ctx, Labels, Input, TIO>, 'input' | 'state' | 'io'>,\n fnStr?: string\n) => {\n if (!fnStr) return null;\n\n const hookCtx = {\n io: actor.io,\n input: actor.input,\n state: actor.state,\n itemCacheKey,\n sendRequest: gotScraping,\n } satisfies ActorHookContext<TIO>;\n\n const hookFn = eval(fnStr);\n if (!hookFn) return null;\n\n return async (...args) => hookFn(...args, hookCtx);\n};\n\n/**\n * Create default configuration for an opinionated Crawlee actor,\n * and run the actor within Apify's `Actor.main()` context.\n *\n * Apify context can be replaced with custom implementation using the `actorConfig.io` option.\n *\n * Read more about what this actor does at {@link createCrawleeOne}.\n */\nexport const createAndRunCrawleeOne = async <\n TCrawlerType extends CrawlerType,\n Ctx extends CrawlerMeta<TCrawlerType, any>['context'] = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO\n>(args: {\n /** String idetifying the actor class, e.g. `'cheerio'` */\n actorType: TCrawlerType;\n actorName: string;\n /** Config passed to the {@link createCrawleeOne} */\n actorConfig: PickPartial<\n ActorDefinition<Ctx, Labels, Input, TIO>,\n 'router' | 'createCrawler' | 'io'\n >;\n /**\n * If using default `createCrawler` implementation, these are crawler options\n * that may be overriden by user input.\n */\n crawlerConfigDefaults?: CrawlerMeta<TCrawlerType, any>['options'];\n /**\n * If using default `createCrawler` implementation, these are crawler options\n * that will override user input.\n *\n * This is useful for testing env.\n */\n crawlerConfigOverrides?: CrawlerMeta<TCrawlerType, any>['options'];\n /**\n * Sentry configuration. If using default `createCrawler` implementation,\n * failed requests are optionally reported to Sentry.\n *\n * To disable Sentry, set `\"enabled\": false`.\n */\n sentryOptions?: Sentry.NodeOptions;\n /**\n * Callback with the created actor. The callback is called within\n * the `Actor.main()` context.\n */\n onActorReady?: (actor: ActorContext<Ctx, Labels, Input, TIO>) => MaybePromise<void>;\n}): Promise<void> => {\n const {\n actorType,\n actorName,\n actorConfig,\n crawlerConfigDefaults,\n crawlerConfigOverrides,\n sentryOptions,\n onActorReady,\n } = args;\n\n const { io = apifyIO as any as TIO } = actorConfig;\n\n await setupSentry({ ...sentryOptions, serverName: actorName }, { io });\n\n // See docs:\n // - https://docs.apify.com/sdk/js/\n // - https://docs.apify.com/academy/deploying-your-code/inputs-outputs#accepting-input-with-the-apify-sdk\n // - https://docs.apify.com/sdk/js/docs/upgrading/upgrading-to-v3#apify-sdk\n await io.runInContext(\n async () => {\n const actorDefaults: ActorDefinition<Ctx, Labels, Input & AllActorInputs, TIO> = {\n io,\n router: Router.create<Ctx>(),\n routerWrappers: ({ input }) => [\n logLevelHandlerWrapper<Ctx, any>(input?.logLevel ?? 'info'),\n ],\n createCrawler: ({ router, proxy, input }) => {\n const options = createHttpCrawlerOptions<\n CrawlerMeta<TCrawlerType, any>['options'],\n Input\n >({\n input,\n defaults: crawlerConfigDefaults,\n overrides: {\n requestHandler: router,\n proxyConfiguration: proxy,\n // Capture errors in a separate (Apify) Dataset and pass errors to Sentry\n failedRequestHandler: createErrorHandler({\n io,\n reportingDatasetId: input?.errorReportingDatasetId ?? 'REPORTING',\n sendToSentry: input?.errorSendToSentry ?? true,\n }),\n ...crawlerConfigOverrides,\n },\n });\n const CrawlerClass = actorClassByType[actorType] as any;\n return new CrawlerClass(options);\n },\n routes: [],\n routeHandlers: {} as any,\n };\n\n const actor = await createCrawleeOne<Ctx, Labels, Input, TIO>({\n ...actorConfig,\n io,\n router: actorConfig.router ?? (actorDefaults.router as any),\n routerWrappers: actorConfig.routerWrappers ?? (actorDefaults.routerWrappers as any),\n createCrawler: actorConfig.createCrawler ?? (actorDefaults.createCrawler as any),\n });\n\n await onActorReady?.(actor);\n },\n { statusMessage: 'Crawling finished!' }\n );\n};\n\n/**\n * Create opinionated Crawlee crawler that uses router for handling requests.\n *\n * This is a quality-of-life function that does the following for you:\n *\n * 1) Full TypeScript coverage - Ensure all components use the same Crawler / CrawlerContext.\n *\n * 2) Get Actor input from `Actor.getInput` if not given.\n *\n * 3) (Optional) Validate Actor input\n *\n * 4) Set up router such that requests that reach default route are\n * redirected to labelled routes based on which item from \"routes\" they match.\n *\n * 5) Register all route handlers for you.\n *\n * 6) (Optional) Wrap all route handlers in a wrapper. Use this e.g.\n * if you want to add a field to the context object, or handle errors\n * from a single place.\n *\n * 7) (Optional) Support transformation and filtering of (scraped) entries,\n * configured via Actor input.\n *\n * 8) (Optional) Support Actor metamorphing, configured via Actor input.\n *\n * 9) Apify context (e.g. calling `Actor.getInput`) can be replaced with custom\n * implementation using the `io` option.\n */\nexport const createCrawleeOne = async <\n Ctx extends CrawlingContext = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO\n>(\n config: PickPartial<ActorDefinition<Ctx, Labels, Input, TIO>, 'io'>\n): Promise<ActorContext<Ctx, Labels, Input, TIO>> => {\n const { io = apifyIO as any as TIO } = config;\n\n // Mutable state that is available to the actor hooks\n const state = {};\n\n // Initialize actor inputs\n const rawInput = config.input\n ? isFunc(config.input)\n ? await config.input({ ...config, io })\n : config.input\n : await io.getInput<Input>();\n const input = Object.freeze(await resolveInput<Input | null>(rawInput, state, { io }));\n\n if (config.validateInput) await config.validateInput(input);\n\n const { logLevel } = (input ?? {}) as LoggingActorInput;\n const log = new Log({ level: logLevel ? logLevelToCrawlee[logLevel] : undefined });\n\n // This is context that is available to options that use initialization function\n const getConfig = () => ({ ...config, input, state, io, log });\n\n // Set up proxy\n const defaultProxy =\n config.proxy == null ? await io.createDefaultProxyConfiguration(input ?? undefined) : undefined;\n const proxy =\n config.proxy == null\n ? defaultProxy\n : isFunc(config.proxy)\n ? await config.proxy(getConfig())\n : config.proxy;\n\n // Run initialization functions\n const router: RouterHandler<Ctx> = isRouter(config.router)\n ? config.router\n : await (config.router as any)(getConfig());\n const routes = isFunc(config.routes) ? await config.routes(getConfig()) : config.routes; // prettier-ignore\n const routeHandlers = isFunc(config.routeHandlers) ? await config.routeHandlers(getConfig()) : config.routeHandlers; // prettier-ignore\n const routerWrappers = isFunc(config.routerWrappers) ? await config.routerWrappers(getConfig()) : config.routerWrappers; // prettier-ignore\n\n // Create Crawlee crawler\n const getActorCtx = () => ({\n io,\n router,\n routes,\n routeHandlers,\n proxy,\n config,\n input,\n state,\n log,\n });\n const crawler = await config.createCrawler(getActorCtx());\n\n // Create actor (our custom entity)\n const preActor = { crawler, ...getActorCtx() };\n const runCrawler = createScopedCrawlerRun(preActor);\n const metamorph = createScopedMetamorph(preActor);\n const scopedPushData = createScopedPushData(preActor);\n const scopedPushRequest = createScopedPushRequests(preActor);\n const startUrls = await getStartUrlsFromInput(preActor);\n\n const actor = {\n ...preActor,\n crawler,\n runCrawler,\n metamorph,\n pushData: scopedPushData,\n pushRequests: scopedPushRequest,\n startUrls,\n } satisfies ActorContext<Ctx, Labels, Input, TIO>;\n\n // Extra data that we make available to the route handlers\n const routerContext = { actor, pushData: scopedPushData };\n\n // Set up router\n await setupDefaultRoute<Ctx, ActorRouterContext<Ctx, Labels, Input, TIO>, Labels, Input>({\n io,\n router,\n routerWrappers,\n routerContext,\n routes,\n routeHandlers,\n input,\n });\n await registerHandlers<Ctx, ActorRouterContext<Ctx, Labels, Input, TIO>, Labels>({\n router,\n routerWrappers,\n routerContext,\n routeHandlers,\n });\n\n // Now that the actor is ready, enqueue the URLs right away\n await scopedPushRequest(startUrls as CrawleeRequest[]);\n\n return actor;\n};\n\nconst resolveInput = async <T extends Record<string, any> | null>(\n input: object | null,\n state: Record<string, unknown>,\n options?: { io?: CrawleeOneIO }\n) => {\n const { io = apifyIO as CrawleeOneIO } = options ?? {};\n const { inputExtendUrl, inputExtendFromFunction } = (input ?? {}) as InputActorInput;\n\n const inputFromUrl = inputExtendUrl ? await gotScraping.get(inputExtendUrl).json<object>() : null;\n const inputFn = genHookFn({ state, input, io }, inputExtendFromFunction);\n const inputFromFunc = (await inputFn?.()) ?? null;\n const extendedInput = { ...inputFromUrl, ...inputFromFunc, ...input };\n\n return extendedInput as T;\n};\n\n/**\n * Create a function that wraps `crawler.run(requests, runOtions)` with additional\n * features like:\n * - Automatically metamorph into another actor after the run finishes\n */\nconst createScopedCrawlerRun = <\n Ctx extends CrawlingContext<any> = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO\n>(\n actor: Omit<\n ActorContext<Ctx, Labels, Input, TIO>,\n 'runCrawler' | 'metamorph' | 'pushData' | 'pushRequests' | 'startUrls'\n >\n) => {\n const {\n requestTransformBefore,\n requestTransformAfter,\n requestFilterBefore,\n requestFilterAfter,\n outputTransformBefore,\n outputTransformAfter,\n outputFilterBefore,\n outputFilterAfter,\n outputCacheStoreId,\n outputCacheActionOnResult,\n } = (actor.input ?? {}) as OutputActorInput & RequestActorInput;\n\n const metamorph = createScopedMetamorph(actor);\n\n const runCrawler: RunCrawler<Ctx> = async (requests, options) => {\n // Clear cache if it was set from the input\n if (outputCacheStoreId && outputCacheActionOnResult === 'overwrite') {\n const store = await actor.io.openKeyValueStore(outputCacheStoreId);\n await store.drop();\n }\n\n await genHookFn(actor, outputTransformBefore)?.();\n await genHookFn(actor, outputFilterBefore)?.();\n await genHookFn(actor, requestTransformBefore)?.();\n await genHookFn(actor, requestFilterBefore)?.();\n\n const runRes = await actor.crawler.run(requests, options);\n\n await genHookFn(actor, outputTransformAfter)?.();\n await genHookFn(actor, outputFilterAfter)?.();\n await genHookFn(actor, requestTransformAfter)?.();\n await genHookFn(actor, requestFilterAfter)?.();\n\n // Trigger metamorph if it was set from the input\n await metamorph();\n\n return runRes;\n };\n\n return runCrawler;\n};\n\n/** Create a function that triggers metamorph, using Actor's inputs as defaults. */\nconst createScopedMetamorph = (actor: Pick<ActorContext, 'input' | 'io'>) => {\n // Trigger metamorph if it was set from the input\n const metamorph: Metamorph = async (overrides?: MetamorphActorInput) => {\n const {\n metamorphActorId,\n metamorphActorBuild,\n metamorphActorInput,\n } = defaults({}, overrides, actor.input ?? {}); // prettier-ignore\n\n if (!metamorphActorId) return;\n\n await actor.io.triggerDownstreamCrawler(metamorphActorId, metamorphActorInput, {\n build: metamorphActorBuild,\n });\n };\n\n return metamorph;\n};\n\n/** pushData wrapper that pre-populates options based on actor input */\nconst createScopedPushData = (actor: Pick<ActorContext, 'input' | 'state' | 'io' | 'log'>) => {\n const {\n includePersonalData,\n requestQueueId,\n outputMaxEntries,\n outputTransform,\n outputFilter,\n outputDatasetId,\n outputPickFields,\n outputRenameFields,\n outputCacheStoreId,\n outputCachePrimaryKeys,\n outputCacheActionOnResult,\n } = (actor.input ?? {}) as OutputActorInput & PrivacyActorInput & RequestActorInput;\n\n const scopedPushData: ActorContext['pushData'] = async (entries, ctx, options) => {\n const transformFn = genHookFn(actor, outputTransform);\n const filterFn = genHookFn(actor, outputFilter);\n\n const mergedOptions = {\n io: actor.io,\n log: actor.log,\n showPrivate: includePersonalData,\n maxCount: outputMaxEntries,\n pickKeys: outputPickFields,\n remapKeys: outputRenameFields,\n transform: transformFn ? (item) => transformFn(item) : undefined,\n filter: filterFn ? (item) => filterFn(item) : undefined,\n datasetId: outputDatasetId,\n requestQueueId,\n cacheStoreId: outputCacheStoreId,\n cachePrimaryKeys: outputCachePrimaryKeys,\n cacheActionOnResult: outputCacheActionOnResult,\n ...options,\n } satisfies PushDataOptions<object>;\n\n return pushData(entries, ctx, mergedOptions);\n };\n\n return scopedPushData;\n};\n\n/** pushRequests wrapper that pre-populates options based on actor input */\nconst createScopedPushRequests = (actor: Pick<ActorContext, 'input' | 'state' | 'io' | 'log'>) => {\n const { requestQueueId, requestMaxEntries, requestTransform, requestFilter } = (actor.input ??\n {}) as RequestActorInput;\n\n const scopedPushRequest: ActorContext['pushRequests'] = async (entries, options) => {\n const transformFn = genHookFn(actor, requestTransform);\n const filterFn = genHookFn(actor, requestFilter);\n\n const mergedOptions = {\n io: actor.io,\n log: actor.log,\n maxCount: requestMaxEntries,\n transform: transformFn ? (item) => transformFn(item) : undefined,\n filter: filterFn ? (item) => filterFn(item) : undefined,\n requestQueueId,\n ...options,\n } satisfies PushRequestsOptions<any>;\n\n return pushRequests(entries, mergedOptions);\n };\n\n return scopedPushRequest;\n};\n\n/** Given the actor input, create common crawler options. */\nexport const createHttpCrawlerOptions = <\n TOpts extends BasicCrawlerOptions<any> = BasicCrawlerOptions,\n Input extends Record<string, any> = Record<string, any>\n>({\n input,\n defaults,\n overrides,\n}: {\n /** Actor input */\n input: Input | null;\n /**\n * Default config options set by us. These may be overriden\n * by values from actor input (set by user).\n */\n defaults?: TOpts;\n /**\n * These config options will overwrite both the default and user\n * options. This is useful for hard-setting values e.g. in tests.\n */\n overrides?: TOpts;\n}) => {\n const pickCrawlerInputFields = <T extends CrawlerConfigActorInput>(config: T) =>\n pick(config, Object.keys(crawlerInput));\n\n return {\n // ----- 1. DEFAULTS -----\n ...omitBy(defaults ?? ({} as TOpts), (field) => field === undefined),\n // ----- 2. CONFIG FROM INPUT -----\n ...omitBy(pickCrawlerInputFields(input ?? {}), (field) => field === undefined),\n // ----- 3. OVERRIDES - E.G. TEST CONFIG -----\n ...omitBy(overrides ?? ({} as TOpts), (field) => field === undefined),\n } satisfies Partial<TOpts>;\n};\n\nconst getStartUrlsFromInput = async (\n actor: Pick<ActorContext, 'input' | 'state' | 'io' | 'log'>\n) => {\n const { startUrls, startUrlsFromDataset, startUrlsFromFunction } = (actor.input ??\n {}) as StartUrlsActorInput;\n\n const urlsAgg = [...(startUrls ?? [])];\n\n if (startUrlsFromDataset) {\n actor.log.debug(`Loading start URLs from Dataset ${startUrlsFromDataset}`);\n const [datasetId, field] = startUrlsFromDataset.split('#');\n const urlsFromDataset = await getColumnFromDataset<any>(datasetId, field, { io: actor.io });\n urlsAgg.push(...urlsFromDataset);\n }\n\n if (startUrlsFromFunction) {\n actor.log.debug(`Loading start URLs from function`);\n const urlsFromFn = await genHookFn(actor, startUrlsFromFunction)?.();\n urlsAgg.push(...urlsFromFn);\n }\n\n return urlsAgg;\n};\n"]}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { BasicCrawler, CrawlingContext, ProxyConfiguration, RouterHandler } from 'crawlee';
|
|
1
|
+
import type { BasicCrawler, CrawlingContext, Log, ProxyConfiguration, RouterHandler } from 'crawlee';
|
|
2
2
|
import type { gotScraping } from 'got-scraping';
|
|
3
3
|
import type { MaybePromise, PickPartial } from '../../utils/types';
|
|
4
4
|
import type { CrawlerUrl } from '../../types';
|
|
@@ -158,5 +158,6 @@ export interface ActorContext<Ctx extends CrawlingContext = CrawlingContext<Basi
|
|
|
158
158
|
* This is modelled and similar to Apify's `Actor` static class.
|
|
159
159
|
*/
|
|
160
160
|
io: TIO;
|
|
161
|
+
log: Log;
|
|
161
162
|
}
|
|
162
163
|
export {};
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../../../src/lib/actor/types.ts"],"names":[],"mappings":"","sourcesContent":["import type {
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../../../../src/lib/actor/types.ts"],"names":[],"mappings":"","sourcesContent":["import type {\n BasicCrawler,\n CrawlingContext,\n Log,\n ProxyConfiguration,\n RouterHandler,\n} from 'crawlee';\nimport type { gotScraping } from 'got-scraping';\n\nimport type { MaybePromise, PickPartial } from '../../utils/types';\nimport type { CrawlerUrl } from '../../types';\nimport type { itemCacheKey, pushData } from '../io/pushData';\nimport type { pushRequests } from '../io/pushRequests';\nimport type { RouteHandler, RouteMatcher, CrawlerRouterWrapper } from '../router';\nimport type { MetamorphActorInput } from '../config';\nimport type { CrawleeOneIO } from '../integrations/types';\n\ntype MaybeAsyncFn<R, Args extends any[]> = R | ((...args: Args) => MaybePromise<R>);\n\ntype OrigRunCrawler<T extends CrawlingContext<any, any>> = BasicCrawler<T>['run'];\n\n/** Extended type of `crawler.run()` function */\nexport type RunCrawler<Ctx extends CrawlingContext = CrawlingContext<BasicCrawler>> = (\n requests?: CrawlerUrl[],\n options?: Parameters<OrigRunCrawler<Ctx>>[1]\n) => ReturnType<OrigRunCrawler<Ctx>>;\n\n/** Trigger actor metamorph, using actor's inputs as defaults. */\nexport type Metamorph = (overrides?: MetamorphActorInput) => Promise<void>;\n\n/** Context passed to route handlers */\nexport type ActorRouterContext<\n Ctx extends CrawlingContext = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO\n> = {\n actor: ActorContext<Ctx, Labels, Input, TIO>;\n};\n\n/** Context passed to user-defined functions passed from input */\nexport type ActorHookContext<TIO extends CrawleeOneIO> = Pick<ActorContext, 'input' | 'state'> & {\n io: TIO;\n itemCacheKey: typeof itemCacheKey;\n sendRequest: typeof gotScraping;\n};\n\nexport interface ActorDefinition<\n Ctx extends CrawlingContext = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO\n> {\n /** Client for communicating with cloud/local storage. */\n io: TIO;\n\n // Actor input\n /**\n * Actor input which you can get e.g. via `Actor.getInput()`\n *\n * Input is automatically retrieved if undefined.\n */\n input?: MaybeAsyncFn<Input, [ActorDefinition<Ctx, Labels, Input, TIO>]>;\n /** Validation for the actor input. Should throw error if validation fails. */\n validateInput?: (input: Input | null) => MaybePromise<void>;\n\n // Router setup\n /**\n * Router instance that redirects the request to handlers.\n * @example\n * import { createCheerioRouter } from 'crawlee';\n *\n * ({\n * ...\n * router: createCheerioRouter(),\n * })\n */\n router: MaybeAsyncFn<RouterHandler<Ctx>, [ActorDefinitionWithInput<Ctx, Labels, Input, TIO>]>;\n /**\n * Criteria that un-labelled requests are matched against.\n *\n * E.g. If `match` function returns truthy value,\n * the request is passed to the `action` function for processing.\n *\n * @example\n * ({\n * ...\n * routes: [{\n * // If match returns true, the request is forwarded to handler\n * // with label JOB_DETAIL.\n * name: 'Job detail',\n * handlerLabel: routeLabels.JOB_DETAIL,\n * match: (url) => isUrlOfJobOffer(url),\n * }, {\n * // Define custom action function:\n * // If match returns true, we replace this request with new one\n * // pointing to new domain.\n * name: 'Main page',\n * handlerLabel: null,\n * match: (url) => url.match(/example\\.com\\/?(?:[?#~]|$)/i),\n * action: async (url, ctx, _, handlers) => {\n * ctx.log.info(`Redirecting to https://www.new-domain.com`);\n * await ctx.crawler.addRequests(['https://www.new-domain.com'], { forefront: true });\n * },\n * }],\n * })\n */\n routes: MaybeAsyncFn<\n RouteMatcher<Ctx, ActorRouterContext<Ctx, Labels, Input, TIO>, Labels>[],\n [ActorDefinitionWithInput<Ctx, Labels, Input, TIO>]\n >;\n /** Handlers for the labelled requests. The object keys are the labels. */\n routeHandlers: MaybeAsyncFn<Record<Labels, RouteHandler<Ctx, ActorRouterContext<Ctx, Labels, Input, TIO>>>, [ActorDefinitionWithInput<Ctx, Labels, Input, TIO>]>; // prettier-ignore\n /**\n * Provides the option to modify or extend all router handlers by wrapping\n * them in these functions.\n *\n * Wrappers are applied from right to left. That means that wrappers `[A, B, C]`\n * will be applied like so `A( B( C( handler ) ) )`.\n *\n * Default `routerWrappers`:\n * ```js\n * {\n * ...\n * routerWrappers: ({ input }) => [\n * logLevelHandlerWrapper<Ctx, any>(input?.logLevel ?? 'info'),\n * ],\n * }\n * ```\n */\n routerWrappers?: MaybeAsyncFn<CrawlerRouterWrapper<Ctx, ActorRouterContext<Ctx, Labels, Input, TIO>>[], [ActorDefinitionWithInput<Ctx, Labels, Input, TIO>]>; // prettier-ignore\n\n // Proxy setup\n proxy?: MaybeAsyncFn<ProxyConfiguration, [ActorDefinitionWithInput<Ctx, Labels, Input, TIO>]>; // prettier-ignore\n\n // Crawler setup\n createCrawler: (\n actorCtx: Omit<\n ActorContext<Ctx, Labels, Input, TIO>,\n 'crawler' | 'runCrawler' | 'metamorph' | 'pushData' | 'pushRequests' | 'startUrls'\n >\n ) => MaybePromise<Ctx['crawler']>;\n}\n\n/** ActorDefinition object where the input is already resolved */\nexport type ActorDefinitionWithInput<\n Ctx extends CrawlingContext = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO\n> = Omit<ActorDefinition<Ctx, Labels, Input, TIO>, 'input'> & {\n input: Input | null;\n state: Record<string, unknown>;\n};\n\n/** Context available while creating a Crawlee crawler/actor */\nexport interface ActorContext<\n Ctx extends CrawlingContext = CrawlingContext<BasicCrawler>,\n Labels extends string = string,\n Input extends Record<string, any> = Record<string, any>,\n TIO extends CrawleeOneIO = CrawleeOneIO\n> {\n crawler: Ctx['crawler'];\n /**\n * This function wraps `crawler.run(requests, runOtions)` with additional\n * features:\n * - Automatically metamorph into another actor after the run finishes\n */\n runCrawler: RunCrawler<Ctx>;\n /** Trigger actor metamorph, using actor's inputs as defaults. */\n metamorph: Metamorph;\n /**\n * `Actor.pushData` with extra optional features:\n *\n * - Limit the number of entries pushed to the Dataset based on the Actor input\n * - Transform and filter entries via Actor input.\n * - Add metadata to entries before they are pushed to Dataset.\n * - Set which (nested) properties are personal data optionally redact them for privacy compliance.\n */\n pushData: typeof pushData;\n /**\n * Similar to `Actor.openRequestQueue().addRequests`, but with extra features:\n *\n * - Limit the max size of the RequestQueue. No requests are added when RequestQueue is at or above the limit.\n * - Transform and filter requests. Requests that did not pass the filter are not added to the RequestQueue.\n */\n pushRequests: typeof pushRequests;\n /**\n * A list of resolved Requests to be scraped.\n *\n * This list is a combination of 3 Actor inputs:\n * - `startUrls` - Static list of URLs to scrape.\n * - `startUrlsFromDataset` - From a specific field from a Dataset (e.g. \"dataset123#fieldName\" - Dataset: \"dataset123\", field: \"fieldName\").\n * - `startUrlsFromFunction` - A function that is evaulated to generate the Requests.\n */\n startUrls: CrawlerUrl[];\n proxy?: ProxyConfiguration;\n router: RouterHandler<Ctx>;\n routes: RouteMatcher<Ctx, ActorRouterContext<Ctx, Labels, Input, TIO>, Labels>[];\n routeHandlers: Record<Labels, RouteHandler<Ctx, ActorRouterContext<Ctx, Labels, Input, TIO>>>;\n /** Original config from which this actor context was created */\n config: PickPartial<ActorDefinition<Ctx, Labels, Input, TIO>, 'io'>;\n /** Read-only inputs passed to the actor */\n input: Input | null;\n /** Mutable state that is shared across setup and teardown hooks */\n state: Record<string, unknown>;\n /**\n * Instance managing communication with databases - storage & retrieval\n * (Dataset, RequestQueue, KeyValueStore).\n *\n * This is modelled and similar to Apify's `Actor` static class.\n */\n io: TIO;\n log: Log;\n}\n"]}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import
|
|
1
|
+
import { CrawlingContext, Log } from 'crawlee';
|
|
2
2
|
import type { CrawleeOneIO } from '../integrations/types';
|
|
3
3
|
/** Functions that generates a "redacted" version of a value */
|
|
4
4
|
export type PrivateValueGen<V, K, O> = (val: V, key: K, obj: O) => any;
|
|
@@ -28,6 +28,7 @@ export type PrivacyMask<T extends object> = {
|
|
|
28
28
|
};
|
|
29
29
|
export interface PushDataOptions<T extends object> {
|
|
30
30
|
io?: CrawleeOneIO<any, any>;
|
|
31
|
+
log?: Log;
|
|
31
32
|
/**
|
|
32
33
|
* If set, only at most this many entries will be scraped.
|
|
33
34
|
*
|
|
@@ -10,6 +10,7 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
10
10
|
};
|
|
11
11
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
12
|
exports.pushData = exports.itemCacheKey = void 0;
|
|
13
|
+
const crawlee_1 = require("crawlee");
|
|
13
14
|
const lodash_1 = require("lodash");
|
|
14
15
|
const async_1 = require("../../utils/async");
|
|
15
16
|
const apify_1 = require("../integrations/apify");
|
|
@@ -144,12 +145,12 @@ const shortenToSize = (entries, maxCount, options) => __awaiter(void 0, void 0,
|
|
|
144
145
|
* - Add/remove entries to/from KeyValueStore. Entries are saved to the store by hash generated from entry fields set by `cachePrimaryKeys`.
|
|
145
146
|
*/
|
|
146
147
|
const pushData = (oneOrManyItems, ctx, options) => __awaiter(void 0, void 0, void 0, function* () {
|
|
147
|
-
const { io = apify_1.apifyIO, maxCount, includeMetadata, showPrivate, privacyMask, remapKeys, pickKeys, transform, filter, datasetId, requestQueueId, cacheStoreId, cachePrimaryKeys, cacheActionOnResult, } = options;
|
|
148
|
+
const { io = apify_1.apifyIO, log = new crawlee_1.Log(), maxCount, includeMetadata, showPrivate, privacyMask, remapKeys, pickKeys, transform, filter, datasetId, requestQueueId, cacheStoreId, cachePrimaryKeys, cacheActionOnResult, } = options;
|
|
148
149
|
const manyItems = Array.isArray(oneOrManyItems) ? oneOrManyItems : [oneOrManyItems];
|
|
149
150
|
const items = maxCount != null
|
|
150
|
-
? yield shortenToSize(manyItems, maxCount, { io, datasetId, requestQueueId, log
|
|
151
|
+
? yield shortenToSize(manyItems, maxCount, { io, datasetId, requestQueueId, log })
|
|
151
152
|
: manyItems;
|
|
152
|
-
|
|
153
|
+
log.debug(`Preparing to push ${items.length} entries to dataset`); // prettier-ignore
|
|
153
154
|
const addMetadataToData = yield createMetadataMapper(ctx, { io });
|
|
154
155
|
const adjustedItems = yield items.reduce((aggPromise, item) => __awaiter(void 0, void 0, void 0, function* () {
|
|
155
156
|
const agg = yield aggPromise;
|
|
@@ -168,13 +169,13 @@ const pushData = (oneOrManyItems, ctx, options) => __awaiter(void 0, void 0, voi
|
|
|
168
169
|
return agg;
|
|
169
170
|
}), Promise.resolve([]));
|
|
170
171
|
// Push entries to primary dataset
|
|
171
|
-
|
|
172
|
+
log.info(`Pushing ${adjustedItems.length} entries to dataset`);
|
|
172
173
|
const dataset = yield io.openDataset(datasetId);
|
|
173
174
|
yield dataset.pushData(adjustedItems);
|
|
174
|
-
|
|
175
|
+
log.info(`Done pushing ${adjustedItems.length} entries to dataset`);
|
|
175
176
|
// Update entries in cache
|
|
176
177
|
if (cacheStoreId && cacheActionOnResult) {
|
|
177
|
-
|
|
178
|
+
log.info(`Update ${adjustedItems.length} entries in cache`);
|
|
178
179
|
const store = yield io.openKeyValueStore(cacheStoreId);
|
|
179
180
|
yield (0, async_1.serialAsyncMap)(adjustedItems, (item) => __awaiter(void 0, void 0, void 0, function* () {
|
|
180
181
|
const cacheId = (0, exports.itemCacheKey)(item, cachePrimaryKeys);
|
|
@@ -185,7 +186,7 @@ const pushData = (oneOrManyItems, ctx, options) => __awaiter(void 0, void 0, voi
|
|
|
185
186
|
yield store.setValue(cacheId, null);
|
|
186
187
|
}
|
|
187
188
|
}));
|
|
188
|
-
|
|
189
|
+
log.info(`Done updating ${adjustedItems.length} entries in cache`);
|
|
189
190
|
}
|
|
190
191
|
return adjustedItems;
|
|
191
192
|
});
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pushData.js","sourceRoot":"","sources":["../../../../src/lib/io/pushData.ts"],"names":[],"mappings":";;;;;;;;;;;;AACA,mCAAuF;AAEvF,6CAAmD;AAEnD,iDAAmE;AACnE,uCAA+C;AAqH/C,MAAM,oBAAoB,GAAG,CAI3B,GAAQ,EACR,OAAoB,EACpB,EAAE;IACF,MAAM,EAAE,EAAE,GAAG,eAAO,EAAE,GAAG,OAAO,aAAP,OAAO,cAAP,OAAO,GAAI,EAAE,CAAC;IAEvC,MAAM,QAAQ,GAAG,MAAM,EAAE,CAAC,qBAAqB,CAAC,GAAG,CAAC,CAAC;IACrD,MAAM,iBAAiB,GAAG,CAAmB,IAAO,EAAE,EAAE,CAAC,iCAAM,IAAI,KAAE,QAAQ,IAAG,CAAC;IACjF,OAAO,iBAAiB,CAAC;AAC3B,CAAC,CAAA,CAAC;AAEF,MAAM,gBAAgB,GAAG,CACvB,IAAO,EACP,OAIC,EACD,EAAE;IACF,MAAM,EACJ,WAAW,EACX,WAAW,EACX,eAAe,GAAG,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE,CAAC,uBAAuB,GAAG,IAAI,GAC7D,GAAG,OAAO,CAAC;IAEZ,MAAM,mBAAmB,GAAG,CAAC,GAAW,EAAE,GAAQ,EAAE,EAAE;QACpD,kDAAkD;QAClD,2DAA2D;QAC3D,IAAI,kBAAkB,CAAC;QACvB,IAAI,2BAA2B,GAAG,KAAK,CAAC;QACxC,MAAM,qBAAqB,GAAG,CAAC,GAAQ,EAAE,EAAE;YACzC,kBAAkB,GAAG,GAAG,CAAC;YACzB,2BAA2B,GAAG,IAAI,CAAC;QACrC,CAAC,CAAC;QAEF,MAAM,aAAa,GAAG,WAAW,CAAC,GAAG,CAA6C,CAAC;QACnF,MAAM,SAAS,GAAG,aAAa;YAC7B,CAAC,CAAC,aAAa,CAAC,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,EAAE,qBAAqB,EAAE,eAAe,EAAE,CAAC;YAC3E,CAAC,CAAC,KAAK,CAAC;QAEV,kBAAkB;QAClB,MAAM,YAAY,GAAG;QACnB,4DAA4D;QAC5D,WAAW,CAAC,CAAC,CAAC,GAAG;YACjB,iDAAiD;YACjD,CAAC,CAAC,2BAA2B,CAAC,CAAC,CAAC,kBAAkB;gBAClD,+CAA+C;gBAC/C,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,eAAe,CAAC,GAAG,EAAE,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CACpD,CAAC;QACF,OAAO,YAAY,CAAC;IACtB,CAAC,CAAC;IAEF,MAAM,WAAW,GAAG,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,GAAG,EAAE,GAAG,CAAC,EAAE,EAAE;;QAClE,MAAM,WAAW,GACf,OAAO,GAAG,KAAK,QAAQ,IAAI,GAAG,IAAI,IAAI,IAAI,CAAC,CAAC,GAAG,YAAY,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QAE1F,IAAI,WAAW,EAAE;YACf,qCAAqC;YACrC,MAAM,MAAM,GAAG,gBAAgB,CAAC,GAAG,EAAE;gBACnC,WAAW;gBACX,WAAW,EAAE,CAAC,MAAA,WAAW,CAAC,GAAG,CAAC,mCAAI,EAAE,CAAQ;gBAC5C,eAAe;aAChB,CAAC,CAAC;YACH,GAAG,CAAC,GAAc,CAAC,GAAG,MAAa,CAAC;SACrC;aAAM;YACL,GAAG,CAAC,GAAc,CAAC,GAAG,mBAAmB,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;SACrD;QACD,OAAO,GAAG,CAAC;IACb,CAAC,EAAE,EAAO,CAAC,CAAC;IAEZ,OAAO,WAAW,CAAC;AACrB,CAAC,CAAC;AAEF,wCAAwC;AACxC,MAAM,UAAU,GAAG,CAAmB,IAAO,EAAE,UAAkC,EAAE,EAAE;IACnF,MAAM,CAAC,OAAO,CAAC,UAAU,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,EAAE,OAAO,CAAC,EAAE,EAAE;QAC9D,IAAI,OAAO,KAAK,OAAO;YAAE,OAAO;QAChC,MAAM,GAAG,GAAG,IAAA,YAAG,EAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QAC/B,IAAA,YAAG,EAAC,IAAI,EAAE,OAAiB,EAAE,GAAG,CAAC,CAAC;QAClC,IAAA,cAAK,EAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IACvB,CAAC,CAAC,CAAC;IACH,OAAO,IAAI,CAAC;AACd,CAAC,CAAC;AAEF,MAAM,cAAc,GAAG,CAAmB,GAAM,EAAE,EAAE,CAClD,IAAA,kBAAS,EAAC,IAAA,eAAM,EAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,GAAG,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;AAEpE;;;;;;GAMG;AACI,MAAM,YAAY,GAAG,CAAC,IAAS,EAAE,WAAsB,EAAE,EAAE;IAChE,MAAM,cAAc,GAAG,WAAW;QAChC,CAAC,CAAC,IAAA,eAAM,EAAC,IAAA,aAAI,EAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,aAAD,CAAC,uBAAD,CAAC,CAAE,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC;QACjE,CAAC,CAAC,IAAI,CAAC;IAET,MAAM,cAAc,GAAG,cAAc;QACnC,CAAC,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,aAAJ,IAAI,uBAAJ,IAAI,CAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC;QAChD,CAAC,CAAC,IAAI,IAAI,IAAA,sBAAa,EAAC,IAAI,CAAC;YAC7B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,qCAAqC;YAC5E,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;IAEzB,MAAM,OAAO,GAAG,MAAM,CAAC,cAAc,CAAC,CAAC;IACvC,OAAO,OAAO,CAAC,QAAQ,EAAE,CAAC;AAC5B,CAAC,CAAC;AAbW,QAAA,YAAY,gBAavB;AAEF;;;;GAIG;AACH,MAAM,MAAM,GAAG,CAAC,GAAG,EAAE,IAAI,GAAG,CAAC,EAAE,EAAE;IAC/B,IAAI,EAAE,GAAG,UAAU,GAAG,IAAI,EACxB,EAAE,GAAG,UAAU,GAAG,IAAI,CAAC;IACzB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;QACvC,EAAE,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;QACvB,EAAE,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,EAAE,UAAU,CAAC,CAAC;QACpC,EAAE,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,EAAE,UAAU,CAAC,CAAC;KACrC;IACD,EAAE,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,GAAG,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,UAAU,CAAC,CAAC;IAC7C,EAAE,IAAI,IAAI,CAAC,IAAI,CAAC,EAAE,GAAG,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,UAAU,CAAC,CAAC;IAC9C,EAAE,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,GAAG,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,UAAU,CAAC,CAAC;IAC7C,EAAE,IAAI,IAAI,CAAC,IAAI,CAAC,EAAE,GAAG,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,UAAU,CAAC,CAAC;IAE9C,OAAO,UAAU,GAAG,CAAC,OAAO,GAAG,EAAE,CAAC,GAAG,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC;AAClD,CAAC,CAAC;AAEF,MAAM,aAAa,GAAG,CACpB,OAAY,EACZ,QAAgB,EAChB,OAAsF,EACtF,EAAE;IACF,MAAM,EAAE,EAAE,EAAE,SAAS,EAAE,cAAc,EAAE,GAAG,EAAE,GAAG,OAAO,aAAP,OAAO,cAAP,OAAO,GAAI,EAAE,CAAC;IAC7D,MAAM,WAAW,GAAG,SAAS,CAAC,CAAC,CAAC,IAAI,SAAS,GAAG,CAAC,CAAC,CAAC,SAAS,CAAC;IAE7D,MAAM,WAAW,GAAG,IAAA,4BAAkB,EAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,cAAc,EAAE,EAAE,EAAE,CAAC,CAAC;IAEpF,yDAAyD;IACzD,MAAM,aAAa,GAAG,MAAM,WAAW,CAAC,MAAM,EAAE,CAAC;IACjD,IAAI,aAAa,EAAE;QACjB,GAAG,aAAH,GAAG,uBAAH,GAAG,CAAE,OAAO,CAAC,YAAY,WAAW,sBAAsB,QAAQ,cAAc,OAAO,CAAC,MAAM,6BAA6B,CAAC,CAAC;QAC7H,OAAO,EAAE,CAAC;KACX,CAAC,kBAAkB;IAEpB,4EAA4E;IAC5E,MAAM,aAAa,GAAG,MAAM,WAAW,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;IAC/D,IAAI,aAAa,CAAC,MAAM,KAAK,OAAO,CAAC,MAAM,EAAE;QAC3C,GAAG,aAAH,GAAG,uBAAH,GAAG,CAAE,OAAO,CAAC,YAAY,WAAW,sBAAsB,QAAQ,cAAc,OAAO,CAAC,MAAM,6BAA6B,CAAC,CAAC;QAC7H,OAAO,EAAE,CAAC;KACX,CAAC,kBAAkB;IAEpB,OAAO,aAAa,CAAC;AACvB,CAAC,CAAA,CAAC;AAEF;;;;;;;;;;GAUG;AACI,MAAM,QAAQ,GAAG,CAItB,cAAuB,EACvB,GAAQ,EACR,OAA2B,EAC3B,EAAE;IACF,MAAM,EACJ,EAAE,GAAG,eAAuB,EAC5B,QAAQ,EACR,eAAe,EACf,WAAW,EACX,WAAW,EACX,SAAS,EACT,QAAQ,EACR,SAAS,EACT,MAAM,EACN,SAAS,EACT,cAAc,EACd,YAAY,EACZ,gBAAgB,EAChB,mBAAmB,GACpB,GAAG,OAAO,CAAC;IAEZ,MAAM,SAAS,GAAG,KAAK,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC;IACpF,MAAM,KAAK,GACT,QAAQ,IAAI,IAAI;QACd,CAAC,CAAC,MAAM,aAAa,CAAC,SAAS,EAAE,QAAQ,EAAE,EAAE,EAAE,EAAE,SAAS,EAAE,cAAc,EAAE,GAAG,EAAE,GAAG,CAAC,GAAG,EAAE,CAAC;QAC3F,CAAC,CAAC,SAAS,CAAC;IAEhB,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,qBAAqB,KAAK,CAAC,MAAM,qBAAqB,CAAC,CAAC,CAAC,kBAAkB;IACzF,MAAM,iBAAiB,GAAG,MAAM,oBAAoB,CAAC,GAAG,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC;IAElE,MAAM,aAAa,GAAG,MAAM,KAAK,CAAC,MAAM,CAAC,CAAO,UAAU,EAAE,IAAI,EAAE,EAAE;QAClE,MAAM,GAAG,GAAG,MAAM,UAAU,CAAC;QAE7B,MAAM,gBAAgB,GAAG,eAAe,CAAC,CAAC,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;QAC1E,MAAM,UAAU,GAAG,gBAAgB,CAAC,gBAAgB,EAAE;YACpD,WAAW;YACX,WAAW;YACX,eAAe,EAAE,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAC5B,uBAAuB,GAAG,qFAAqF;SAClH,CAAC,CAAC;QAEH,MAAM,WAAW,GAAG,SAAS,CAAC,CAAC,CAAC,UAAU,CAAC,UAAU,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC;QAC/E,MAAM,UAAU,GAAG,QAAQ,CAAC,CAAC,CAAC,IAAA,aAAI,EAAC,WAAW,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC;QACxE,MAAM,eAAe,GAAG,SAAS,CAAC,CAAC,CAAC,MAAM,SAAS,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC;QAC7E,MAAM,YAAY,GAAG,MAAM,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;QAEnE,IAAI,YAAY;YAAE,GAAG,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;QAE5C,OAAO,GAAG,CAAC;IACb,CAAC,CAAA,EAAE,OAAO,CAAC,OAAO,CAAC,EAAe,CAAC,CAAC,CAAC;IAErC,kCAAkC;IAClC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,aAAa,CAAC,MAAM,qBAAqB,CAAC,CAAC;IACnE,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,WAAW,CAAC,SAAS,CAAC,CAAC;IAChD,MAAM,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;IACtC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,gBAAgB,aAAa,CAAC,MAAM,qBAAqB,CAAC,CAAC;IAExE,0BAA0B;IAC1B,IAAI,YAAY,IAAI,mBAAmB,EAAE;QACvC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,UAAU,aAAa,CAAC,MAAM,mBAAmB,CAAC,CAAC;QAChE,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC,iBAAiB,CAAC,YAAY,CAAC,CAAC;QACvD,MAAM,IAAA,sBAAc,EAAC,aAAa,EAAE,CAAO,IAAS,EAAE,EAAE;YACtD,MAAM,OAAO,GAAG,IAAA,oBAAY,EAAC,IAAI,EAAE,gBAAgB,CAAC,CAAC;YAErD,IAAI,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC,QAAQ,CAAC,mBAAmB,CAAC,EAAE;gBACtD,MAAM,KAAK,CAAC,QAAQ,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;aACrC;iBAAM,IAAI,mBAAmB,KAAK,QAAQ,EAAE;gBAC3C,MAAM,KAAK,CAAC,QAAQ,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;aACrC;QACH,CAAC,CAAA,CAAC,CAAC;QACH,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,iBAAiB,aAAa,CAAC,MAAM,mBAAmB,CAAC,CAAC;KACxE;IAED,OAAO,aAAa,CAAC;AACvB,CAAC,CAAA,CAAC;AA9EW,QAAA,QAAQ,YA8EnB","sourcesContent":["import type { CrawlingContext, Log } from 'crawlee';\nimport { get, pick, set, unset, uniq, sortBy, isPlainObject, fromPairs } from 'lodash';\n\nimport { serialAsyncMap } from '../../utils/async';\nimport type { CrawleeOneIO } from '../integrations/types';\nimport { ApifyCrawleeOneIO, apifyIO } from '../integrations/apify';\nimport { datasetSizeMonitor } from './dataset';\n\n/** Functions that generates a \"redacted\" version of a value */\nexport type PrivateValueGen<V, K, O> = (val: V, key: K, obj: O) => any;\n\n/**\n * Given a property value (and its position) this function\n * determines if the property is considered private (and\n * hence should be hidden for privacy reasons).\n *\n * Property is private if the function returns truthy value.\n */\nexport type PrivacyFilter<V, K, O> = (\n val: V,\n key: K,\n obj: O,\n options?: {\n setCustomPrivateValue: (val: V) => any;\n privateValueGen: PrivateValueGen<V, K, O>;\n }\n) => any;\n\n/**\n * PrivacyMask determines which (potentally nested) properties\n * of an object are considered private.\n *\n * PrivacyMask copies the structure of another object, but each\n * non-object property on PrivacyMask is a PrivacyFilter - function\n * that determines if the property is considered private.\n *\n * Property is private if the function returns truthy value.\n */\nexport type PrivacyMask<T extends object> = {\n [Key in keyof T]?: T[Key] extends Date | any[] // Consider Data and Array as non-objects\n ? PrivacyFilter<T[Key], Key, T>\n : T[Key] extends object\n ? PrivacyMask<T[Key]>\n : PrivacyFilter<T[Key], Key, T>;\n};\n\nexport interface PushDataOptions<T extends object> {\n io?: CrawleeOneIO<any, any>;\n /**\n * If set, only at most this many entries will be scraped.\n *\n * The count is determined from the Dataset that's used for the crawler run.\n *\n * This means that if `maxCount` is set to 50, but the\n * associated Dataset already has 40 items in it, then only 10 new entries\n * will be saved.\n */\n maxCount?: number;\n /**\n * Whether items should be enriched with request and run metadata.\n *\n * If truthy, the metadata is set under the `metadata` property.\n */\n includeMetadata?: boolean;\n /**\n * Whether properties that are considered personal data should be shown as is.\n *\n * If falsy or not set, these properties are redacted to hide the actual information.\n *\n * Which properties are personal data is determined by `privacyMask`.\n */\n showPrivate?: boolean;\n /**\n * Determine which properties are considered personal data.\n *\n * See {@link PrivacyMask}.\n **/\n privacyMask: PrivacyMask<T>;\n /**\n * Option to select which keys (fields) of an entry to keep (discarding the rest)\n * before pushing the entries to the dataset.\n *\n * This serves mainly to allow users to select the keys from actor input UI.\n *\n * This is done before `remapKeys`.\n *\n * Keys can be nested, e.g. `\"someProp.value[0]\"`. Nested path is\n * resolved using Lodash.get().\n */\n pickKeys?: string[];\n /**\n * Option to remap the keys before pushing the entries to the dataset.\n *\n * This serves mainly to allow users to remap the keys from actor input UI.\n *\n * Keys can be nested, e.g. `\"someProp.value[0]\"`. Nested path is\n * resolved using Lodash.get().\n */\n remapKeys?: Record<string, string>;\n /**\n * Option to freely transform an entry before pushing it to the dataset.\n *\n * This serves mainly to allow users to transform the entries from actor input UI.\n */\n transform?: (item: any) => any;\n /**\n * Option to filter an entry before pushing it to the dataset.\n *\n * This serves mainly to allow users to filter the entries from actor input UI.\n */\n filter?: (item: any) => any;\n /** ID or name of the dataset to which the data should be pushed */\n datasetId?: string;\n /** ID of the RequestQueue that stores remaining requests */\n requestQueueId?: string;\n /** ID or name of the key-value store used as cache */\n cacheStoreId?: string;\n /** Define fields that uniquely identify entries for caching */\n cachePrimaryKeys?: string[];\n /** Define whether we want to add, remove, or overwrite cached entries with results from the actor run */\n cacheActionOnResult?: 'add' | 'remove' | 'overwrite' | null;\n}\n\nconst createMetadataMapper = async <\n Ctx extends CrawlingContext,\n TIO extends CrawleeOneIO<any, any> = ApifyCrawleeOneIO\n>(\n ctx: Ctx,\n options: { io: TIO }\n) => {\n const { io = apifyIO } = options ?? {};\n\n const metadata = await io.generateEntryMetadata(ctx);\n const addMetadataToData = <T extends object>(item: T) => ({ ...item, metadata });\n return addMetadataToData;\n};\n\nconst applyPrivacyMask = <T extends Record<any, any> = Record<any, any>>(\n item: T,\n options: {\n showPrivate?: boolean;\n privacyMask: PrivacyMask<T>;\n privateValueGen?: (val: any, key: string, item: T) => any;\n }\n) => {\n const {\n showPrivate,\n privacyMask,\n privateValueGen = (_, key) => `<Redacted property \"${key}\">`,\n } = options;\n\n const resolvePrivateValue = (key: string, val: any) => {\n // Allow to set custom \"redacted\" value by calling\n // `setCustomPrivateValue` from inside the filter function.\n let customPrivateValue;\n let setCustomPrivateValueCalled = false;\n const setCustomPrivateValue = (val: any) => {\n customPrivateValue = val;\n setCustomPrivateValueCalled = true;\n };\n\n const privacyFilter = privacyMask[key] as PrivacyFilter<any, any, any> | undefined;\n const isPrivate = privacyFilter\n ? privacyFilter(val, key, item, { setCustomPrivateValue, privateValueGen })\n : false;\n\n // prettier-ignore\n const privateValue = (\n // Don't redact anything if we're asked to show private data\n showPrivate ? val\n // Otherwise, if custom value was given, use that\n : setCustomPrivateValueCalled ? customPrivateValue\n // Otherwise, decide based on filter truthiness\n : isPrivate ? privateValueGen(val, key, item) : val\n );\n return privateValue;\n };\n\n const redactedObj = Object.entries(item).reduce((agg, [key, val]) => {\n const isNestedObj =\n typeof val === 'object' && val != null && !(val instanceof Date) && !Array.isArray(val);\n\n if (isNestedObj) {\n // Recursively process nested objects\n const subObj = applyPrivacyMask(val, {\n showPrivate,\n privacyMask: (privacyMask[key] ?? {}) as any,\n privateValueGen,\n });\n agg[key as keyof T] = subObj as any;\n } else {\n agg[key as keyof T] = resolvePrivateValue(key, val);\n }\n return agg;\n }, {} as T);\n\n return redactedObj;\n};\n\n/** Rename object properties in place */\nconst renameKeys = <T extends object>(item: T, keyNameMap: Record<string, string>) => {\n Object.entries(keyNameMap || {}).forEach(([oldPath, newPath]) => {\n if (oldPath === newPath) return;\n const val = get(item, oldPath);\n set(item, newPath as string, val);\n unset(item, oldPath);\n });\n return item;\n};\n\nconst sortObjectKeys = <T extends object>(obj: T) =>\n fromPairs(sortBy(Object.keys(obj)).map((key) => [key, obj[key]]));\n\n/**\n * Serialize dataset item to fixed-length hash.\n *\n * NOTE: Apify (around which this lib is designed) allows the key-value store key\n * to be max 256 char long.\n * https://docs.apify.com/sdk/js/reference/class/KeyValueStore#setValue\n */\nexport const itemCacheKey = (item: any, primaryKeys?: string[]) => {\n const thePrimaryKeys = primaryKeys\n ? sortBy(uniq(primaryKeys.map((s) => s?.trim()).filter(Boolean)))\n : null;\n\n const serializedItem = thePrimaryKeys\n ? thePrimaryKeys.map((k) => item?.[k]).join(':')\n : item && isPlainObject(item)\n ? JSON.stringify(sortObjectKeys(item)) // If possible sort the object's keys\n : JSON.stringify(item);\n\n const cacheId = cyrb53(serializedItem);\n return cacheId.toString();\n};\n\n/**\n * Hashing function used when calculating cache ID hash from entries.\n *\n * See https://stackoverflow.com/a/52171480/9788634.\n */\nconst cyrb53 = (str, seed = 0) => {\n let h1 = 0xdeadbeef ^ seed,\n h2 = 0x41c6ce57 ^ seed;\n for (let i = 0, ch; i < str.length; i++) {\n ch = str.charCodeAt(i);\n h1 = Math.imul(h1 ^ ch, 2654435761);\n h2 = Math.imul(h2 ^ ch, 1597334677);\n }\n h1 = Math.imul(h1 ^ (h1 >>> 16), 2246822507);\n h1 ^= Math.imul(h2 ^ (h2 >>> 13), 3266489909);\n h2 = Math.imul(h2 ^ (h2 >>> 16), 2246822507);\n h2 ^= Math.imul(h1 ^ (h1 >>> 13), 3266489909);\n\n return 4294967296 * (2097151 & h2) + (h1 >>> 0);\n};\n\nconst shortenToSize = async <T>(\n entries: T[],\n maxCount: number,\n options?: { io?: CrawleeOneIO; datasetId?: string; requestQueueId?: string; log: Log }\n) => {\n const { io, datasetId, requestQueueId, log } = options ?? {};\n const datasetName = datasetId ? `\"${datasetId}\"` : 'DEFAULT';\n\n const sizeMonitor = datasetSizeMonitor(maxCount, { datasetId, requestQueueId, io });\n\n // Ignore incoming entries if the dataset is already full\n const isDatasetFull = await sizeMonitor.isFull();\n if (isDatasetFull) {\n log?.warning(`Dataset (${datasetName}) is already full (${maxCount} entries), ${entries.length} entries will be discarded.`);\n return [];\n } // prettier-ignore\n\n // Show warning when only part of the incoming data made it into the dataset\n const slicedEntries = await sizeMonitor.shortenToSize(entries);\n if (slicedEntries.length !== entries.length) {\n log?.warning(`Dataset (${datasetName}) has become full (${maxCount} entries), ${entries.length} entries will be discarded.`);\n return [];\n } // prettier-ignore\n\n return slicedEntries;\n};\n\n/**\n * Apify's `Actor.pushData` with extra features:\n *\n * - Data can be sent elsewhere, not just to Apify. This is set by the `io` options. By default data is sent using Apify (cloud/local).\n * - Limit the max size of the Dataset. No entries are added when Dataset is at or above the limit.\n * - Redact \"private\" fields\n * - Add metadata to entries before they are pushed to dataset.\n * - Select and rename (nested) properties\n * - Transform and filter entries. Entries that did not pass the filter are not added to the dataset.\n * - Add/remove entries to/from KeyValueStore. Entries are saved to the store by hash generated from entry fields set by `cachePrimaryKeys`.\n */\nexport const pushData = async <\n Ctx extends CrawlingContext,\n T extends Record<any, any> = Record<any, any>\n>(\n oneOrManyItems: T | T[],\n ctx: Ctx,\n options: PushDataOptions<T>\n) => {\n const {\n io = apifyIO as CrawleeOneIO,\n maxCount,\n includeMetadata,\n showPrivate,\n privacyMask,\n remapKeys,\n pickKeys,\n transform,\n filter,\n datasetId,\n requestQueueId,\n cacheStoreId,\n cachePrimaryKeys,\n cacheActionOnResult,\n } = options;\n\n const manyItems = Array.isArray(oneOrManyItems) ? oneOrManyItems : [oneOrManyItems];\n const items =\n maxCount != null\n ? await shortenToSize(manyItems, maxCount, { io, datasetId, requestQueueId, log: ctx.log })\n : manyItems;\n\n ctx.log.debug(`Preparing to push ${items.length} entries to dataset`); // prettier-ignore\n const addMetadataToData = await createMetadataMapper(ctx, { io });\n\n const adjustedItems = await items.reduce(async (aggPromise, item) => {\n const agg = await aggPromise;\n\n const itemWithMetadata = includeMetadata ? addMetadataToData(item) : item;\n const maskedItem = applyPrivacyMask(itemWithMetadata, {\n showPrivate,\n privacyMask,\n privateValueGen: (val, key) =>\n `<Redacted property \"${key}\". To include the actual value, toggle ON the input option \"Include personal data\">`,\n });\n\n const renamedItem = remapKeys ? renameKeys(maskedItem, remapKeys) : maskedItem;\n const pickedItem = pickKeys ? pick(renamedItem, pickKeys) : renamedItem;\n const transformedItem = transform ? await transform(pickedItem) : pickedItem;\n const passedFilter = filter ? await filter(transformedItem) : true;\n\n if (passedFilter) agg.push(transformedItem);\n\n return agg;\n }, Promise.resolve([] as unknown[]));\n\n // Push entries to primary dataset\n ctx.log.info(`Pushing ${adjustedItems.length} entries to dataset`);\n const dataset = await io.openDataset(datasetId);\n await dataset.pushData(adjustedItems);\n ctx.log.info(`Done pushing ${adjustedItems.length} entries to dataset`);\n\n // Update entries in cache\n if (cacheStoreId && cacheActionOnResult) {\n ctx.log.info(`Update ${adjustedItems.length} entries in cache`);\n const store = await io.openKeyValueStore(cacheStoreId);\n await serialAsyncMap(adjustedItems, async (item: any) => {\n const cacheId = itemCacheKey(item, cachePrimaryKeys);\n\n if (['add', 'overwrite'].includes(cacheActionOnResult)) {\n await store.setValue(cacheId, item);\n } else if (cacheActionOnResult === 'remove') {\n await store.setValue(cacheId, null);\n }\n });\n ctx.log.info(`Done updating ${adjustedItems.length} entries in cache`);\n }\n\n return adjustedItems;\n};\n"]}
|
|
1
|
+
{"version":3,"file":"pushData.js","sourceRoot":"","sources":["../../../../src/lib/io/pushData.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,qCAA+C;AAC/C,mCAAuF;AAEvF,6CAAmD;AAEnD,iDAAmE;AACnE,uCAA+C;AAsH/C,MAAM,oBAAoB,GAAG,CAI3B,GAAQ,EACR,OAAoB,EACpB,EAAE;IACF,MAAM,EAAE,EAAE,GAAG,eAAO,EAAE,GAAG,OAAO,aAAP,OAAO,cAAP,OAAO,GAAI,EAAE,CAAC;IAEvC,MAAM,QAAQ,GAAG,MAAM,EAAE,CAAC,qBAAqB,CAAC,GAAG,CAAC,CAAC;IACrD,MAAM,iBAAiB,GAAG,CAAmB,IAAO,EAAE,EAAE,CAAC,iCAAM,IAAI,KAAE,QAAQ,IAAG,CAAC;IACjF,OAAO,iBAAiB,CAAC;AAC3B,CAAC,CAAA,CAAC;AAEF,MAAM,gBAAgB,GAAG,CACvB,IAAO,EACP,OAIC,EACD,EAAE;IACF,MAAM,EACJ,WAAW,EACX,WAAW,EACX,eAAe,GAAG,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE,CAAC,uBAAuB,GAAG,IAAI,GAC7D,GAAG,OAAO,CAAC;IAEZ,MAAM,mBAAmB,GAAG,CAAC,GAAW,EAAE,GAAQ,EAAE,EAAE;QACpD,kDAAkD;QAClD,2DAA2D;QAC3D,IAAI,kBAAkB,CAAC;QACvB,IAAI,2BAA2B,GAAG,KAAK,CAAC;QACxC,MAAM,qBAAqB,GAAG,CAAC,GAAQ,EAAE,EAAE;YACzC,kBAAkB,GAAG,GAAG,CAAC;YACzB,2BAA2B,GAAG,IAAI,CAAC;QACrC,CAAC,CAAC;QAEF,MAAM,aAAa,GAAG,WAAW,CAAC,GAAG,CAA6C,CAAC;QACnF,MAAM,SAAS,GAAG,aAAa;YAC7B,CAAC,CAAC,aAAa,CAAC,GAAG,EAAE,GAAG,EAAE,IAAI,EAAE,EAAE,qBAAqB,EAAE,eAAe,EAAE,CAAC;YAC3E,CAAC,CAAC,KAAK,CAAC;QAEV,kBAAkB;QAClB,MAAM,YAAY,GAAG;QACnB,4DAA4D;QAC5D,WAAW,CAAC,CAAC,CAAC,GAAG;YACjB,iDAAiD;YACjD,CAAC,CAAC,2BAA2B,CAAC,CAAC,CAAC,kBAAkB;gBAClD,+CAA+C;gBAC/C,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,eAAe,CAAC,GAAG,EAAE,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CACpD,CAAC;QACF,OAAO,YAAY,CAAC;IACtB,CAAC,CAAC;IAEF,MAAM,WAAW,GAAG,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,CAAC,GAAG,EAAE,GAAG,CAAC,EAAE,EAAE;;QAClE,MAAM,WAAW,GACf,OAAO,GAAG,KAAK,QAAQ,IAAI,GAAG,IAAI,IAAI,IAAI,CAAC,CAAC,GAAG,YAAY,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QAE1F,IAAI,WAAW,EAAE;YACf,qCAAqC;YACrC,MAAM,MAAM,GAAG,gBAAgB,CAAC,GAAG,EAAE;gBACnC,WAAW;gBACX,WAAW,EAAE,CAAC,MAAA,WAAW,CAAC,GAAG,CAAC,mCAAI,EAAE,CAAQ;gBAC5C,eAAe;aAChB,CAAC,CAAC;YACH,GAAG,CAAC,GAAc,CAAC,GAAG,MAAa,CAAC;SACrC;aAAM;YACL,GAAG,CAAC,GAAc,CAAC,GAAG,mBAAmB,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;SACrD;QACD,OAAO,GAAG,CAAC;IACb,CAAC,EAAE,EAAO,CAAC,CAAC;IAEZ,OAAO,WAAW,CAAC;AACrB,CAAC,CAAC;AAEF,wCAAwC;AACxC,MAAM,UAAU,GAAG,CAAmB,IAAO,EAAE,UAAkC,EAAE,EAAE;IACnF,MAAM,CAAC,OAAO,CAAC,UAAU,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,EAAE,OAAO,CAAC,EAAE,EAAE;QAC9D,IAAI,OAAO,KAAK,OAAO;YAAE,OAAO;QAChC,MAAM,GAAG,GAAG,IAAA,YAAG,EAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QAC/B,IAAA,YAAG,EAAC,IAAI,EAAE,OAAiB,EAAE,GAAG,CAAC,CAAC;QAClC,IAAA,cAAK,EAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IACvB,CAAC,CAAC,CAAC;IACH,OAAO,IAAI,CAAC;AACd,CAAC,CAAC;AAEF,MAAM,cAAc,GAAG,CAAmB,GAAM,EAAE,EAAE,CAClD,IAAA,kBAAS,EAAC,IAAA,eAAM,EAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC,GAAG,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;AAEpE;;;;;;GAMG;AACI,MAAM,YAAY,GAAG,CAAC,IAAS,EAAE,WAAsB,EAAE,EAAE;IAChE,MAAM,cAAc,GAAG,WAAW;QAChC,CAAC,CAAC,IAAA,eAAM,EAAC,IAAA,aAAI,EAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,aAAD,CAAC,uBAAD,CAAC,CAAE,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC;QACjE,CAAC,CAAC,IAAI,CAAC;IAET,MAAM,cAAc,GAAG,cAAc;QACnC,CAAC,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,aAAJ,IAAI,uBAAJ,IAAI,CAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC;QAChD,CAAC,CAAC,IAAI,IAAI,IAAA,sBAAa,EAAC,IAAI,CAAC;YAC7B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,qCAAqC;YAC5E,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;IAEzB,MAAM,OAAO,GAAG,MAAM,CAAC,cAAc,CAAC,CAAC;IACvC,OAAO,OAAO,CAAC,QAAQ,EAAE,CAAC;AAC5B,CAAC,CAAC;AAbW,QAAA,YAAY,gBAavB;AAEF;;;;GAIG;AACH,MAAM,MAAM,GAAG,CAAC,GAAG,EAAE,IAAI,GAAG,CAAC,EAAE,EAAE;IAC/B,IAAI,EAAE,GAAG,UAAU,GAAG,IAAI,EACxB,EAAE,GAAG,UAAU,GAAG,IAAI,CAAC;IACzB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;QACvC,EAAE,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;QACvB,EAAE,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,EAAE,UAAU,CAAC,CAAC;QACpC,EAAE,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,EAAE,UAAU,CAAC,CAAC;KACrC;IACD,EAAE,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,GAAG,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,UAAU,CAAC,CAAC;IAC7C,EAAE,IAAI,IAAI,CAAC,IAAI,CAAC,EAAE,GAAG,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,UAAU,CAAC,CAAC;IAC9C,EAAE,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,GAAG,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,UAAU,CAAC,CAAC;IAC7C,EAAE,IAAI,IAAI,CAAC,IAAI,CAAC,EAAE,GAAG,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,UAAU,CAAC,CAAC;IAE9C,OAAO,UAAU,GAAG,CAAC,OAAO,GAAG,EAAE,CAAC,GAAG,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC;AAClD,CAAC,CAAC;AAEF,MAAM,aAAa,GAAG,CACpB,OAAY,EACZ,QAAgB,EAChB,OAAsF,EACtF,EAAE;IACF,MAAM,EAAE,EAAE,EAAE,SAAS,EAAE,cAAc,EAAE,GAAG,EAAE,GAAG,OAAO,aAAP,OAAO,cAAP,OAAO,GAAI,EAAE,CAAC;IAC7D,MAAM,WAAW,GAAG,SAAS,CAAC,CAAC,CAAC,IAAI,SAAS,GAAG,CAAC,CAAC,CAAC,SAAS,CAAC;IAE7D,MAAM,WAAW,GAAG,IAAA,4BAAkB,EAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,cAAc,EAAE,EAAE,EAAE,CAAC,CAAC;IAEpF,yDAAyD;IACzD,MAAM,aAAa,GAAG,MAAM,WAAW,CAAC,MAAM,EAAE,CAAC;IACjD,IAAI,aAAa,EAAE;QACjB,GAAG,aAAH,GAAG,uBAAH,GAAG,CAAE,OAAO,CAAC,YAAY,WAAW,sBAAsB,QAAQ,cAAc,OAAO,CAAC,MAAM,6BAA6B,CAAC,CAAC;QAC7H,OAAO,EAAE,CAAC;KACX,CAAC,kBAAkB;IAEpB,4EAA4E;IAC5E,MAAM,aAAa,GAAG,MAAM,WAAW,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;IAC/D,IAAI,aAAa,CAAC,MAAM,KAAK,OAAO,CAAC,MAAM,EAAE;QAC3C,GAAG,aAAH,GAAG,uBAAH,GAAG,CAAE,OAAO,CAAC,YAAY,WAAW,sBAAsB,QAAQ,cAAc,OAAO,CAAC,MAAM,6BAA6B,CAAC,CAAC;QAC7H,OAAO,EAAE,CAAC;KACX,CAAC,kBAAkB;IAEpB,OAAO,aAAa,CAAC;AACvB,CAAC,CAAA,CAAC;AAEF;;;;;;;;;;GAUG;AACI,MAAM,QAAQ,GAAG,CAItB,cAAuB,EACvB,GAAQ,EACR,OAA2B,EAC3B,EAAE;IACF,MAAM,EACJ,EAAE,GAAG,eAAuB,EAC5B,GAAG,GAAG,IAAI,aAAG,EAAE,EACf,QAAQ,EACR,eAAe,EACf,WAAW,EACX,WAAW,EACX,SAAS,EACT,QAAQ,EACR,SAAS,EACT,MAAM,EACN,SAAS,EACT,cAAc,EACd,YAAY,EACZ,gBAAgB,EAChB,mBAAmB,GACpB,GAAG,OAAO,CAAC;IAEZ,MAAM,SAAS,GAAG,KAAK,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC;IACpF,MAAM,KAAK,GACT,QAAQ,IAAI,IAAI;QACd,CAAC,CAAC,MAAM,aAAa,CAAC,SAAS,EAAE,QAAQ,EAAE,EAAE,EAAE,EAAE,SAAS,EAAE,cAAc,EAAE,GAAG,EAAE,CAAC;QAClF,CAAC,CAAC,SAAS,CAAC;IAEhB,GAAG,CAAC,KAAK,CAAC,qBAAqB,KAAK,CAAC,MAAM,qBAAqB,CAAC,CAAC,CAAC,kBAAkB;IACrF,MAAM,iBAAiB,GAAG,MAAM,oBAAoB,CAAC,GAAG,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC;IAElE,MAAM,aAAa,GAAG,MAAM,KAAK,CAAC,MAAM,CAAC,CAAO,UAAU,EAAE,IAAI,EAAE,EAAE;QAClE,MAAM,GAAG,GAAG,MAAM,UAAU,CAAC;QAE7B,MAAM,gBAAgB,GAAG,eAAe,CAAC,CAAC,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;QAC1E,MAAM,UAAU,GAAG,gBAAgB,CAAC,gBAAgB,EAAE;YACpD,WAAW;YACX,WAAW;YACX,eAAe,EAAE,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAC5B,uBAAuB,GAAG,qFAAqF;SAClH,CAAC,CAAC;QAEH,MAAM,WAAW,GAAG,SAAS,CAAC,CAAC,CAAC,UAAU,CAAC,UAAU,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC;QAC/E,MAAM,UAAU,GAAG,QAAQ,CAAC,CAAC,CAAC,IAAA,aAAI,EAAC,WAAW,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC;QACxE,MAAM,eAAe,GAAG,SAAS,CAAC,CAAC,CAAC,MAAM,SAAS,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC;QAC7E,MAAM,YAAY,GAAG,MAAM,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;QAEnE,IAAI,YAAY;YAAE,GAAG,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;QAE5C,OAAO,GAAG,CAAC;IACb,CAAC,CAAA,EAAE,OAAO,CAAC,OAAO,CAAC,EAAe,CAAC,CAAC,CAAC;IAErC,kCAAkC;IAClC,GAAG,CAAC,IAAI,CAAC,WAAW,aAAa,CAAC,MAAM,qBAAqB,CAAC,CAAC;IAC/D,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,WAAW,CAAC,SAAS,CAAC,CAAC;IAChD,MAAM,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;IACtC,GAAG,CAAC,IAAI,CAAC,gBAAgB,aAAa,CAAC,MAAM,qBAAqB,CAAC,CAAC;IAEpE,0BAA0B;IAC1B,IAAI,YAAY,IAAI,mBAAmB,EAAE;QACvC,GAAG,CAAC,IAAI,CAAC,UAAU,aAAa,CAAC,MAAM,mBAAmB,CAAC,CAAC;QAC5D,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC,iBAAiB,CAAC,YAAY,CAAC,CAAC;QACvD,MAAM,IAAA,sBAAc,EAAC,aAAa,EAAE,CAAO,IAAS,EAAE,EAAE;YACtD,MAAM,OAAO,GAAG,IAAA,oBAAY,EAAC,IAAI,EAAE,gBAAgB,CAAC,CAAC;YAErD,IAAI,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC,QAAQ,CAAC,mBAAmB,CAAC,EAAE;gBACtD,MAAM,KAAK,CAAC,QAAQ,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;aACrC;iBAAM,IAAI,mBAAmB,KAAK,QAAQ,EAAE;gBAC3C,MAAM,KAAK,CAAC,QAAQ,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;aACrC;QACH,CAAC,CAAA,CAAC,CAAC;QACH,GAAG,CAAC,IAAI,CAAC,iBAAiB,aAAa,CAAC,MAAM,mBAAmB,CAAC,CAAC;KACpE;IAED,OAAO,aAAa,CAAC;AACvB,CAAC,CAAA,CAAC;AA/EW,QAAA,QAAQ,YA+EnB","sourcesContent":["import { CrawlingContext, Log } from 'crawlee';\nimport { get, pick, set, unset, uniq, sortBy, isPlainObject, fromPairs } from 'lodash';\n\nimport { serialAsyncMap } from '../../utils/async';\nimport type { CrawleeOneIO } from '../integrations/types';\nimport { ApifyCrawleeOneIO, apifyIO } from '../integrations/apify';\nimport { datasetSizeMonitor } from './dataset';\n\n/** Functions that generates a \"redacted\" version of a value */\nexport type PrivateValueGen<V, K, O> = (val: V, key: K, obj: O) => any;\n\n/**\n * Given a property value (and its position) this function\n * determines if the property is considered private (and\n * hence should be hidden for privacy reasons).\n *\n * Property is private if the function returns truthy value.\n */\nexport type PrivacyFilter<V, K, O> = (\n val: V,\n key: K,\n obj: O,\n options?: {\n setCustomPrivateValue: (val: V) => any;\n privateValueGen: PrivateValueGen<V, K, O>;\n }\n) => any;\n\n/**\n * PrivacyMask determines which (potentally nested) properties\n * of an object are considered private.\n *\n * PrivacyMask copies the structure of another object, but each\n * non-object property on PrivacyMask is a PrivacyFilter - function\n * that determines if the property is considered private.\n *\n * Property is private if the function returns truthy value.\n */\nexport type PrivacyMask<T extends object> = {\n [Key in keyof T]?: T[Key] extends Date | any[] // Consider Data and Array as non-objects\n ? PrivacyFilter<T[Key], Key, T>\n : T[Key] extends object\n ? PrivacyMask<T[Key]>\n : PrivacyFilter<T[Key], Key, T>;\n};\n\nexport interface PushDataOptions<T extends object> {\n io?: CrawleeOneIO<any, any>;\n log?: Log;\n /**\n * If set, only at most this many entries will be scraped.\n *\n * The count is determined from the Dataset that's used for the crawler run.\n *\n * This means that if `maxCount` is set to 50, but the\n * associated Dataset already has 40 items in it, then only 10 new entries\n * will be saved.\n */\n maxCount?: number;\n /**\n * Whether items should be enriched with request and run metadata.\n *\n * If truthy, the metadata is set under the `metadata` property.\n */\n includeMetadata?: boolean;\n /**\n * Whether properties that are considered personal data should be shown as is.\n *\n * If falsy or not set, these properties are redacted to hide the actual information.\n *\n * Which properties are personal data is determined by `privacyMask`.\n */\n showPrivate?: boolean;\n /**\n * Determine which properties are considered personal data.\n *\n * See {@link PrivacyMask}.\n **/\n privacyMask: PrivacyMask<T>;\n /**\n * Option to select which keys (fields) of an entry to keep (discarding the rest)\n * before pushing the entries to the dataset.\n *\n * This serves mainly to allow users to select the keys from actor input UI.\n *\n * This is done before `remapKeys`.\n *\n * Keys can be nested, e.g. `\"someProp.value[0]\"`. Nested path is\n * resolved using Lodash.get().\n */\n pickKeys?: string[];\n /**\n * Option to remap the keys before pushing the entries to the dataset.\n *\n * This serves mainly to allow users to remap the keys from actor input UI.\n *\n * Keys can be nested, e.g. `\"someProp.value[0]\"`. Nested path is\n * resolved using Lodash.get().\n */\n remapKeys?: Record<string, string>;\n /**\n * Option to freely transform an entry before pushing it to the dataset.\n *\n * This serves mainly to allow users to transform the entries from actor input UI.\n */\n transform?: (item: any) => any;\n /**\n * Option to filter an entry before pushing it to the dataset.\n *\n * This serves mainly to allow users to filter the entries from actor input UI.\n */\n filter?: (item: any) => any;\n /** ID or name of the dataset to which the data should be pushed */\n datasetId?: string;\n /** ID of the RequestQueue that stores remaining requests */\n requestQueueId?: string;\n /** ID or name of the key-value store used as cache */\n cacheStoreId?: string;\n /** Define fields that uniquely identify entries for caching */\n cachePrimaryKeys?: string[];\n /** Define whether we want to add, remove, or overwrite cached entries with results from the actor run */\n cacheActionOnResult?: 'add' | 'remove' | 'overwrite' | null;\n}\n\nconst createMetadataMapper = async <\n Ctx extends CrawlingContext,\n TIO extends CrawleeOneIO<any, any> = ApifyCrawleeOneIO\n>(\n ctx: Ctx,\n options: { io: TIO }\n) => {\n const { io = apifyIO } = options ?? {};\n\n const metadata = await io.generateEntryMetadata(ctx);\n const addMetadataToData = <T extends object>(item: T) => ({ ...item, metadata });\n return addMetadataToData;\n};\n\nconst applyPrivacyMask = <T extends Record<any, any> = Record<any, any>>(\n item: T,\n options: {\n showPrivate?: boolean;\n privacyMask: PrivacyMask<T>;\n privateValueGen?: (val: any, key: string, item: T) => any;\n }\n) => {\n const {\n showPrivate,\n privacyMask,\n privateValueGen = (_, key) => `<Redacted property \"${key}\">`,\n } = options;\n\n const resolvePrivateValue = (key: string, val: any) => {\n // Allow to set custom \"redacted\" value by calling\n // `setCustomPrivateValue` from inside the filter function.\n let customPrivateValue;\n let setCustomPrivateValueCalled = false;\n const setCustomPrivateValue = (val: any) => {\n customPrivateValue = val;\n setCustomPrivateValueCalled = true;\n };\n\n const privacyFilter = privacyMask[key] as PrivacyFilter<any, any, any> | undefined;\n const isPrivate = privacyFilter\n ? privacyFilter(val, key, item, { setCustomPrivateValue, privateValueGen })\n : false;\n\n // prettier-ignore\n const privateValue = (\n // Don't redact anything if we're asked to show private data\n showPrivate ? val\n // Otherwise, if custom value was given, use that\n : setCustomPrivateValueCalled ? customPrivateValue\n // Otherwise, decide based on filter truthiness\n : isPrivate ? privateValueGen(val, key, item) : val\n );\n return privateValue;\n };\n\n const redactedObj = Object.entries(item).reduce((agg, [key, val]) => {\n const isNestedObj =\n typeof val === 'object' && val != null && !(val instanceof Date) && !Array.isArray(val);\n\n if (isNestedObj) {\n // Recursively process nested objects\n const subObj = applyPrivacyMask(val, {\n showPrivate,\n privacyMask: (privacyMask[key] ?? {}) as any,\n privateValueGen,\n });\n agg[key as keyof T] = subObj as any;\n } else {\n agg[key as keyof T] = resolvePrivateValue(key, val);\n }\n return agg;\n }, {} as T);\n\n return redactedObj;\n};\n\n/** Rename object properties in place */\nconst renameKeys = <T extends object>(item: T, keyNameMap: Record<string, string>) => {\n Object.entries(keyNameMap || {}).forEach(([oldPath, newPath]) => {\n if (oldPath === newPath) return;\n const val = get(item, oldPath);\n set(item, newPath as string, val);\n unset(item, oldPath);\n });\n return item;\n};\n\nconst sortObjectKeys = <T extends object>(obj: T) =>\n fromPairs(sortBy(Object.keys(obj)).map((key) => [key, obj[key]]));\n\n/**\n * Serialize dataset item to fixed-length hash.\n *\n * NOTE: Apify (around which this lib is designed) allows the key-value store key\n * to be max 256 char long.\n * https://docs.apify.com/sdk/js/reference/class/KeyValueStore#setValue\n */\nexport const itemCacheKey = (item: any, primaryKeys?: string[]) => {\n const thePrimaryKeys = primaryKeys\n ? sortBy(uniq(primaryKeys.map((s) => s?.trim()).filter(Boolean)))\n : null;\n\n const serializedItem = thePrimaryKeys\n ? thePrimaryKeys.map((k) => item?.[k]).join(':')\n : item && isPlainObject(item)\n ? JSON.stringify(sortObjectKeys(item)) // If possible sort the object's keys\n : JSON.stringify(item);\n\n const cacheId = cyrb53(serializedItem);\n return cacheId.toString();\n};\n\n/**\n * Hashing function used when calculating cache ID hash from entries.\n *\n * See https://stackoverflow.com/a/52171480/9788634.\n */\nconst cyrb53 = (str, seed = 0) => {\n let h1 = 0xdeadbeef ^ seed,\n h2 = 0x41c6ce57 ^ seed;\n for (let i = 0, ch; i < str.length; i++) {\n ch = str.charCodeAt(i);\n h1 = Math.imul(h1 ^ ch, 2654435761);\n h2 = Math.imul(h2 ^ ch, 1597334677);\n }\n h1 = Math.imul(h1 ^ (h1 >>> 16), 2246822507);\n h1 ^= Math.imul(h2 ^ (h2 >>> 13), 3266489909);\n h2 = Math.imul(h2 ^ (h2 >>> 16), 2246822507);\n h2 ^= Math.imul(h1 ^ (h1 >>> 13), 3266489909);\n\n return 4294967296 * (2097151 & h2) + (h1 >>> 0);\n};\n\nconst shortenToSize = async <T>(\n entries: T[],\n maxCount: number,\n options?: { io?: CrawleeOneIO; datasetId?: string; requestQueueId?: string; log: Log }\n) => {\n const { io, datasetId, requestQueueId, log } = options ?? {};\n const datasetName = datasetId ? `\"${datasetId}\"` : 'DEFAULT';\n\n const sizeMonitor = datasetSizeMonitor(maxCount, { datasetId, requestQueueId, io });\n\n // Ignore incoming entries if the dataset is already full\n const isDatasetFull = await sizeMonitor.isFull();\n if (isDatasetFull) {\n log?.warning(`Dataset (${datasetName}) is already full (${maxCount} entries), ${entries.length} entries will be discarded.`);\n return [];\n } // prettier-ignore\n\n // Show warning when only part of the incoming data made it into the dataset\n const slicedEntries = await sizeMonitor.shortenToSize(entries);\n if (slicedEntries.length !== entries.length) {\n log?.warning(`Dataset (${datasetName}) has become full (${maxCount} entries), ${entries.length} entries will be discarded.`);\n return [];\n } // prettier-ignore\n\n return slicedEntries;\n};\n\n/**\n * Apify's `Actor.pushData` with extra features:\n *\n * - Data can be sent elsewhere, not just to Apify. This is set by the `io` options. By default data is sent using Apify (cloud/local).\n * - Limit the max size of the Dataset. No entries are added when Dataset is at or above the limit.\n * - Redact \"private\" fields\n * - Add metadata to entries before they are pushed to dataset.\n * - Select and rename (nested) properties\n * - Transform and filter entries. Entries that did not pass the filter are not added to the dataset.\n * - Add/remove entries to/from KeyValueStore. Entries are saved to the store by hash generated from entry fields set by `cachePrimaryKeys`.\n */\nexport const pushData = async <\n Ctx extends CrawlingContext,\n T extends Record<any, any> = Record<any, any>\n>(\n oneOrManyItems: T | T[],\n ctx: Ctx,\n options: PushDataOptions<T>\n) => {\n const {\n io = apifyIO as CrawleeOneIO,\n log = new Log(),\n maxCount,\n includeMetadata,\n showPrivate,\n privacyMask,\n remapKeys,\n pickKeys,\n transform,\n filter,\n datasetId,\n requestQueueId,\n cacheStoreId,\n cachePrimaryKeys,\n cacheActionOnResult,\n } = options;\n\n const manyItems = Array.isArray(oneOrManyItems) ? oneOrManyItems : [oneOrManyItems];\n const items =\n maxCount != null\n ? await shortenToSize(manyItems, maxCount, { io, datasetId, requestQueueId, log })\n : manyItems;\n\n log.debug(`Preparing to push ${items.length} entries to dataset`); // prettier-ignore\n const addMetadataToData = await createMetadataMapper(ctx, { io });\n\n const adjustedItems = await items.reduce(async (aggPromise, item) => {\n const agg = await aggPromise;\n\n const itemWithMetadata = includeMetadata ? addMetadataToData(item) : item;\n const maskedItem = applyPrivacyMask(itemWithMetadata, {\n showPrivate,\n privacyMask,\n privateValueGen: (val, key) =>\n `<Redacted property \"${key}\". To include the actual value, toggle ON the input option \"Include personal data\">`,\n });\n\n const renamedItem = remapKeys ? renameKeys(maskedItem, remapKeys) : maskedItem;\n const pickedItem = pickKeys ? pick(renamedItem, pickKeys) : renamedItem;\n const transformedItem = transform ? await transform(pickedItem) : pickedItem;\n const passedFilter = filter ? await filter(transformedItem) : true;\n\n if (passedFilter) agg.push(transformedItem);\n\n return agg;\n }, Promise.resolve([] as unknown[]));\n\n // Push entries to primary dataset\n log.info(`Pushing ${adjustedItems.length} entries to dataset`);\n const dataset = await io.openDataset(datasetId);\n await dataset.pushData(adjustedItems);\n log.info(`Done pushing ${adjustedItems.length} entries to dataset`);\n\n // Update entries in cache\n if (cacheStoreId && cacheActionOnResult) {\n log.info(`Update ${adjustedItems.length} entries in cache`);\n const store = await io.openKeyValueStore(cacheStoreId);\n await serialAsyncMap(adjustedItems, async (item: any) => {\n const cacheId = itemCacheKey(item, cachePrimaryKeys);\n\n if (['add', 'overwrite'].includes(cacheActionOnResult)) {\n await store.setValue(cacheId, item);\n } else if (cacheActionOnResult === 'remove') {\n await store.setValue(cacheId, null);\n }\n });\n log.info(`Done updating ${adjustedItems.length} entries in cache`);\n }\n\n return adjustedItems;\n};\n"]}
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
import
|
|
1
|
+
import { Log, Request as CrawleeRequest, RequestQueueOperationOptions } from 'crawlee';
|
|
2
2
|
import type { CrawleeOneIO } from '../integrations/types';
|
|
3
3
|
export interface PushRequestsOptions<T extends CrawleeRequest = CrawleeRequest> {
|
|
4
4
|
io?: CrawleeOneIO<any, any>;
|
|
5
|
+
log?: Log;
|
|
5
6
|
/**
|
|
6
7
|
* If set, only at most this many requests will be added to the RequestQueue.
|
|
7
8
|
*
|
|
@@ -35,4 +36,4 @@ export interface PushRequestsOptions<T extends CrawleeRequest = CrawleeRequest>
|
|
|
35
36
|
* - Limit the max size of the RequestQueue. No requests are added when RequestQueue is at or above the limit.
|
|
36
37
|
* - Transform and filter requests. Requests that did not pass the filter are not added to the RequestQueue.
|
|
37
38
|
*/
|
|
38
|
-
export declare const pushRequests: <
|
|
39
|
+
export declare const pushRequests: <T extends CrawleeRequest<import("crawlee").Dictionary> = CrawleeRequest<import("crawlee").Dictionary>>(oneOrManyItems: T | T[], options?: PushRequestsOptions<T> | undefined) => Promise<unknown[]>;
|
|
@@ -10,6 +10,7 @@ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, ge
|
|
|
10
10
|
};
|
|
11
11
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
12
12
|
exports.pushRequests = void 0;
|
|
13
|
+
const crawlee_1 = require("crawlee");
|
|
13
14
|
const requestQueue_1 = require("./requestQueue");
|
|
14
15
|
const apify_1 = require("../integrations/apify");
|
|
15
16
|
const shortenToSize = (entries, maxCount, options) => __awaiter(void 0, void 0, void 0, function* () {
|
|
@@ -37,13 +38,13 @@ const shortenToSize = (entries, maxCount, options) => __awaiter(void 0, void 0,
|
|
|
37
38
|
* - Limit the max size of the RequestQueue. No requests are added when RequestQueue is at or above the limit.
|
|
38
39
|
* - Transform and filter requests. Requests that did not pass the filter are not added to the RequestQueue.
|
|
39
40
|
*/
|
|
40
|
-
const pushRequests = (oneOrManyItems,
|
|
41
|
-
const { io = apify_1.apifyIO, maxCount, transform, filter, requestQueueId, queueOptions, } = options;
|
|
41
|
+
const pushRequests = (oneOrManyItems, options) => __awaiter(void 0, void 0, void 0, function* () {
|
|
42
|
+
const { io = apify_1.apifyIO, log = new crawlee_1.Log(), maxCount, transform, filter, requestQueueId, queueOptions, } = options !== null && options !== void 0 ? options : {};
|
|
42
43
|
const manyItems = Array.isArray(oneOrManyItems) ? oneOrManyItems : [oneOrManyItems];
|
|
43
44
|
const items = maxCount != null
|
|
44
|
-
? yield shortenToSize(manyItems, maxCount, { io, requestQueueId, log
|
|
45
|
+
? yield shortenToSize(manyItems, maxCount, { io, requestQueueId, log })
|
|
45
46
|
: manyItems;
|
|
46
|
-
|
|
47
|
+
log.debug(`Preparing to push ${items.length} requests to queue`); // prettier-ignore
|
|
47
48
|
const adjustedItems = yield items.reduce((aggPromise, item) => __awaiter(void 0, void 0, void 0, function* () {
|
|
48
49
|
const agg = yield aggPromise;
|
|
49
50
|
const transformedItem = transform ? yield transform(item) : item;
|
|
@@ -53,10 +54,10 @@ const pushRequests = (oneOrManyItems, ctx, options) => __awaiter(void 0, void 0,
|
|
|
53
54
|
return agg;
|
|
54
55
|
}), Promise.resolve([]));
|
|
55
56
|
// Push requests to primary RequestQueue
|
|
56
|
-
|
|
57
|
+
log.info(`Pushing ${adjustedItems.length} requests to queue`);
|
|
57
58
|
const reqQueue = yield io.openRequestQueue(requestQueueId);
|
|
58
59
|
yield reqQueue.addRequests(adjustedItems, queueOptions);
|
|
59
|
-
|
|
60
|
+
log.info(`Done pushing ${adjustedItems.length} requests to queue`);
|
|
60
61
|
return adjustedItems;
|
|
61
62
|
});
|
|
62
63
|
exports.pushRequests = pushRequests;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pushRequests.js","sourceRoot":"","sources":["../../../../src/lib/io/pushRequests.ts"],"names":[],"mappings":";;;;;;;;;;;;
|
|
1
|
+
{"version":3,"file":"pushRequests.js","sourceRoot":"","sources":["../../../../src/lib/io/pushRequests.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,qCAAuF;AAEvF,iDAAyD;AAEzD,iDAAgD;AAkChD,MAAM,aAAa,GAAG,CACpB,OAAY,EACZ,QAAgB,EAChB,OAAmE,EACnE,EAAE;IACF,MAAM,EAAE,cAAc,EAAE,GAAG,EAAE,GAAG,OAAO,aAAP,OAAO,cAAP,OAAO,GAAI,EAAE,CAAC;IAE9C,MAAM,SAAS,GAAG,cAAc,CAAC,CAAC,CAAC,IAAI,cAAc,GAAG,CAAC,CAAC,CAAC,SAAS,CAAC;IAErE,MAAM,WAAW,GAAG,IAAA,sCAAuB,EAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAE/D,uDAAuD;IACvD,MAAM,MAAM,GAAG,MAAM,WAAW,CAAC,MAAM,EAAE,CAAC;IAC1C,IAAI,MAAM,EAAE;QACV,GAAG,aAAH,GAAG,uBAAH,GAAG,CAAE,OAAO,CAAC,iBAAiB,SAAS,sBAAsB,QAAQ,cAAc,OAAO,CAAC,MAAM,6BAA6B,CAAC,CAAC;QAChI,OAAO,EAAE,CAAC;KACX,CAAC,kBAAkB;IAEpB,8EAA8E;IAC9E,MAAM,aAAa,GAAG,MAAM,WAAW,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;IAC/D,IAAI,aAAa,CAAC,MAAM,KAAK,OAAO,CAAC,MAAM,EAAE;QAC3C,GAAG,aAAH,GAAG,uBAAH,GAAG,CAAE,OAAO,CAAC,iBAAiB,SAAS,sBAAsB,QAAQ,cAAc,OAAO,CAAC,MAAM,6BAA6B,CAAC,CAAC;QAChI,OAAO,EAAE,CAAC;KACX,CAAC,kBAAkB;IAEpB,OAAO,aAAa,CAAC;AACvB,CAAC,CAAA,CAAC;AAEF;;;;;;GAMG;AACI,MAAM,YAAY,GAAG,CAC1B,cAAuB,EACvB,OAAgC,EAChC,EAAE;IACF,MAAM,EACJ,EAAE,GAAG,eAAuB,EAC5B,GAAG,GAAG,IAAI,aAAG,EAAE,EACf,QAAQ,EACR,SAAS,EACT,MAAM,EACN,cAAc,EACd,YAAY,GACb,GAAG,OAAO,aAAP,OAAO,cAAP,OAAO,GAAI,EAAE,CAAC;IAElB,MAAM,SAAS,GAAG,KAAK,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC;IACpF,MAAM,KAAK,GACT,QAAQ,IAAI,IAAI;QACd,CAAC,CAAC,MAAM,aAAa,CAAC,SAAS,EAAE,QAAQ,EAAE,EAAE,EAAE,EAAE,cAAc,EAAE,GAAG,EAAE,CAAC;QACvE,CAAC,CAAC,SAAS,CAAC;IAEhB,GAAG,CAAC,KAAK,CAAC,qBAAqB,KAAK,CAAC,MAAM,oBAAoB,CAAC,CAAC,CAAC,kBAAkB;IAEpF,MAAM,aAAa,GAAG,MAAM,KAAK,CAAC,MAAM,CAAC,CAAO,UAAU,EAAE,IAAI,EAAE,EAAE;QAClE,MAAM,GAAG,GAAG,MAAM,UAAU,CAAC;QAE7B,MAAM,eAAe,GAAG,SAAS,CAAC,CAAC,CAAC,MAAM,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;QACjE,MAAM,YAAY,GAAG,MAAM,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;QAEnE,IAAI,YAAY;YAAE,GAAG,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;QAE5C,OAAO,GAAG,CAAC;IACb,CAAC,CAAA,EAAE,OAAO,CAAC,OAAO,CAAC,EAAe,CAAC,CAAC,CAAC;IAErC,wCAAwC;IACxC,GAAG,CAAC,IAAI,CAAC,WAAW,aAAa,CAAC,MAAM,oBAAoB,CAAC,CAAC;IAC9D,MAAM,QAAQ,GAAG,MAAM,EAAE,CAAC,gBAAgB,CAAC,cAAc,CAAC,CAAC;IAC3D,MAAM,QAAQ,CAAC,WAAW,CAAC,aAAsB,EAAE,YAAY,CAAC,CAAC;IACjE,GAAG,CAAC,IAAI,CAAC,gBAAgB,aAAa,CAAC,MAAM,oBAAoB,CAAC,CAAC;IAEnE,OAAO,aAAa,CAAC;AACvB,CAAC,CAAA,CAAC;AAxCW,QAAA,YAAY,gBAwCvB","sourcesContent":["import { Log, Request as CrawleeRequest, RequestQueueOperationOptions } from 'crawlee';\n\nimport { requestQueueSizeMonitor } from './requestQueue';\nimport type { CrawleeOneIO } from '../integrations/types';\nimport { apifyIO } from '../integrations/apify';\n\nexport interface PushRequestsOptions<T extends CrawleeRequest = CrawleeRequest> {\n io?: CrawleeOneIO<any, any>;\n log?: Log;\n /**\n * If set, only at most this many requests will be added to the RequestQueue.\n *\n * The count is determined from the RequestQueue that's used for the crawler run.\n *\n * This means that if `maxCount` is set to 50, but the\n * associated RequestQueue already handled 40 requests, then only 10 new requests\n * will be processed.\n */\n maxCount?: number;\n /**\n * Option to freely transform a request before pushing it to the RequestQueue.\n *\n * This serves mainly to allow users to transform the requests from actor input UI.\n */\n transform?: (req: T) => any;\n /**\n * Option to filter a request before pushing it to the RequestQueue.\n *\n * This serves mainly to allow users to filter the requests from actor input UI.\n */\n filter?: (req: T) => any;\n /** ID of the RequestQueue to which the data should be pushed */\n requestQueueId?: string;\n\n // Pass-through options\n queueOptions?: RequestQueueOperationOptions;\n}\n\nconst shortenToSize = async <T>(\n entries: T[],\n maxCount: number,\n options?: { io?: CrawleeOneIO; requestQueueId?: string; log?: Log }\n) => {\n const { requestQueueId, log } = options ?? {};\n\n const queueName = requestQueueId ? `\"${requestQueueId}\"` : 'DEFAULT';\n\n const sizeMonitor = requestQueueSizeMonitor(maxCount, options);\n\n // Ignore incoming entries if the queue is already full\n const isFull = await sizeMonitor.isFull();\n if (isFull) {\n log?.warning(`RequestQueue (${queueName}) is already full (${maxCount} entries), ${entries.length} entries will be discarded.`);\n return [];\n } // prettier-ignore\n\n // Show warning when only part of the incoming requests made it into the queue\n const slicedEntries = await sizeMonitor.shortenToSize(entries);\n if (slicedEntries.length !== entries.length) {\n log?.warning(`RequestQueue (${queueName}) has become full (${maxCount} entries), ${entries.length} entries will be discarded.`);\n return [];\n } // prettier-ignore\n\n return slicedEntries;\n};\n\n/**\n * Similar to `Actor.openRequestQueue().addRequests`, but with extra features:\n *\n * - Data can be sent elsewhere, not just to Apify. This is set by the `io` options. By default data is sent using Apify (cloud/local).\n * - Limit the max size of the RequestQueue. No requests are added when RequestQueue is at or above the limit.\n * - Transform and filter requests. Requests that did not pass the filter are not added to the RequestQueue.\n */\nexport const pushRequests = async <T extends CrawleeRequest = CrawleeRequest>(\n oneOrManyItems: T | T[],\n options?: PushRequestsOptions<T>\n) => {\n const {\n io = apifyIO as CrawleeOneIO,\n log = new Log(),\n maxCount,\n transform,\n filter,\n requestQueueId,\n queueOptions,\n } = options ?? {};\n\n const manyItems = Array.isArray(oneOrManyItems) ? oneOrManyItems : [oneOrManyItems];\n const items =\n maxCount != null\n ? await shortenToSize(manyItems, maxCount, { io, requestQueueId, log })\n : manyItems;\n\n log.debug(`Preparing to push ${items.length} requests to queue`); // prettier-ignore\n\n const adjustedItems = await items.reduce(async (aggPromise, item) => {\n const agg = await aggPromise;\n\n const transformedItem = transform ? await transform(item) : item;\n const passedFilter = filter ? await filter(transformedItem) : true;\n\n if (passedFilter) agg.push(transformedItem);\n\n return agg;\n }, Promise.resolve([] as unknown[]));\n\n // Push requests to primary RequestQueue\n log.info(`Pushing ${adjustedItems.length} requests to queue`);\n const reqQueue = await io.openRequestQueue(requestQueueId);\n await reqQueue.addRequests(adjustedItems as any[], queueOptions);\n log.info(`Done pushing ${adjustedItems.length} requests to queue`);\n\n return adjustedItems;\n};\n"]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "crawlee-one",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.2",
|
|
4
4
|
"private": false,
|
|
5
5
|
"description": "Crawlee One is a framework built on top of Crawlee and Apify for writing robust and highly configurable web scrapers",
|
|
6
6
|
"author": "Juro Oravec <juraj.oravec.josefson@gmail.com>",
|