crawlee-one 2.0.0 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +30 -31
  2. package/dist/cjs/api.d.ts +2 -2
  3. package/dist/cjs/api.js.map +1 -1
  4. package/dist/cjs/cli/cli.js +4 -4
  5. package/dist/cjs/cli/cli.js.map +1 -1
  6. package/dist/cjs/cli/commands/codegen.js.map +1 -1
  7. package/dist/cjs/lib/actions/scrapeListing.js.map +1 -1
  8. package/dist/cjs/lib/actor/actor.d.ts +5 -5
  9. package/dist/cjs/lib/actor/actor.js +2 -2
  10. package/dist/cjs/lib/actor/actor.js.map +1 -1
  11. package/dist/cjs/lib/error/errorHandler.d.ts +6 -8
  12. package/dist/cjs/lib/error/errorHandler.js +3 -3
  13. package/dist/cjs/lib/error/errorHandler.js.map +1 -1
  14. package/dist/cjs/lib/input.d.ts +2 -2
  15. package/dist/cjs/lib/input.js +28 -28
  16. package/dist/cjs/lib/input.js.map +1 -1
  17. package/dist/cjs/lib/integrations/apify.js.map +1 -1
  18. package/dist/cjs/lib/io/dataset.js.map +1 -1
  19. package/dist/cjs/lib/io/pushData.js +3 -3
  20. package/dist/cjs/lib/io/pushData.js.map +1 -1
  21. package/dist/cjs/lib/io/pushRequests.d.ts +1 -1
  22. package/dist/cjs/lib/io/pushRequests.js.map +1 -1
  23. package/dist/cjs/lib/log.d.ts +1 -1
  24. package/dist/cjs/lib/migrate/localMigrator.js.map +1 -1
  25. package/dist/cjs/lib/router/router.d.ts +14 -18
  26. package/dist/cjs/lib/router/router.js +2 -2
  27. package/dist/cjs/lib/router/router.js.map +1 -1
  28. package/dist/cjs/lib/telemetry/sentry.d.ts +1 -1
  29. package/dist/cjs/lib/test/actor.d.ts +9 -9
  30. package/dist/cjs/lib/test/actor.js +2 -2
  31. package/dist/cjs/lib/test/actor.js.map +1 -1
  32. package/dist/cjs/lib/test/mockApifyClient.d.ts +6 -6
  33. package/dist/cjs/lib/test/mockApifyClient.js.map +1 -1
  34. package/dist/cjs/utils/async.js +1 -1
  35. package/dist/cjs/utils/async.js.map +1 -1
  36. package/dist/cjs/utils/error.d.ts +1 -1
  37. package/dist/cjs/utils/package.js.map +1 -1
  38. package/dist/cjs/utils/url.js.map +1 -1
  39. package/dist/cjs/utils/valueMonitor.js.map +1 -1
  40. package/package.json +21 -20
package/README.md CHANGED
@@ -183,7 +183,7 @@ CrawleeOne also includes helpers and types for:
183
183
  - Privacy compliance
184
184
  - Metamorphing
185
185
 
186
- CrawleeOne supports many common and advanced web scraping use cases. See the [Use cases](#use-cases) for the overview of the use cases.
186
+ CrawleeOne supports many common and advanced web scraping use cases. See the [Use cases](#playbook--use-cases) for the overview of the use cases.
187
187
 
188
188
  See the section [Usage (for end users)](#usage-for-end-users) for how CrawleeOne looks from user's perspective.
189
189
 
@@ -260,6 +260,7 @@ await crawleeOne({
260
260
  // - Downstream crawler with Apify's "metamorph".
261
261
  //
262
262
  // See the Actor input reference for all input fields.
263
+ // https://github.com/JuroOravec/crawlee-one/blob/main/docs/reference-input.md
263
264
  //
264
265
  // Specify input if you plan to use the crawler yourself,
265
266
  // otherwise use `inputDefaults` or set `mergeInput`.
@@ -287,9 +288,9 @@ await crawleeOne({
287
288
  // E.g. if `type: 'playwright'`, then this config is used as:
288
289
  // `new PlaywrightCrawler(crawlerConfig);`
289
290
  //
290
- // Set `crawlerConfig` for config that cannot be configured via `input`,
291
+ // Use `crawlerConfig` for config that cannot be configured via `input`,
291
292
  // or when you need the crawler to use specific settings and you don't
292
- // want users to override that.
293
+ // want users to override.
293
294
  crawlerConfig: {
294
295
  maxRequestsPerMinute: 120,
295
296
  requestHandlerTimeoutSecs: 180,
@@ -314,9 +315,7 @@ await crawleeOne({
314
315
  handler: async (ctx) => {
315
316
  const { $, request, pushData, pushRequests } = ctx;
316
317
  // Scrape data from the page
317
- const data = [
318
- /* ... */
319
- ];
318
+ const data = [ ... ];
320
319
 
321
320
  // Save the scraped data. When you save data with `ctx.pushData`,
322
321
  // then you can filter, transform, limit, redact, and more.
@@ -338,8 +337,8 @@ await crawleeOne({
338
337
  },
339
338
 
340
339
  hooks: {
341
- // By default, CrawleeOne calls `Crawler.run()` once ready.
342
- // If you override it, you have to call it yourself.
340
+ // By default, once ready, CrawleeOne calls `actor.runCrawler` (which calls `Crawler.run()`)
341
+ // If you supply your own `onReady` callback, you have to call `actor.runCrawler` yourself.
343
342
  onReady: async (inst) => {
344
343
  // E.g. in this example, user can select to scrape all entries
345
344
  // or a certain kind by setting a custom `datasetType` input field.
@@ -357,7 +356,7 @@ await crawleeOne({
357
356
  onAfterHandler: (ctx) => { /* ... */ },
358
357
 
359
358
  // If you run the crawler on Apify, or otherwise provide the crawler to others,
360
- // tehn it's a good practice to validate their input.
359
+ // then it's a good practice to validate their input.
361
360
  validateInput: (input) => {
362
361
  const schema = Joi.object({ ... });
363
362
  Joi.assert(input, schema);
@@ -367,7 +366,7 @@ await crawleeOne({
367
366
  // Configure the Crawlee proxy. See Crawlee's `ProxyConfiguration`
368
367
  // By default, no proxy is used.
369
368
  //
370
- // NOTE: DO NOT set proxy if you are deploying the crawler in Apify,
369
+ // NOTE: DO NOT set proxy here if you are deploying the crawler to Apify
371
370
  // and you want the user to specify the proxy!
372
371
  proxy: Actor.createProxyConfiguration({ ... }),
373
372
 
@@ -393,8 +392,9 @@ await crawleeOne({
393
392
  //
394
393
  // You don't need to override this in most of the cases.
395
394
  //
396
- // By default, the data is saved and kept locally in `./storage` directory.
397
- // And if the cralwer runs in Apify's platform then it uses
395
+ // By default, Apify saves the data locally in `./storage` directory, for
396
+ // as long as the crawler is not running from within the Apify's platform.
397
+ // And if the crawler runs in Apify's platform then it uses
398
398
  // Apify's cloud storage.
399
399
  //
400
400
  // See the docs for `CrawleeOneIO`.
@@ -406,16 +406,14 @@ await crawleeOne({
406
406
  });
407
407
  ```
408
408
 
409
- > You can find the full type definition of `crawleeOne` and its arguments here:
410
- >
411
- > - [crawleeOne](./docs/typedoc/modules.md#crawleeone)
412
- > - [CrawleeOneArgs](./docs/typedoc/interfaces/CrawleeOneArgs.md)
413
- >
414
- > To learn more about `pushData` and `pushRequests`, see:
415
- >
416
- > - [pushData](./docs/typedoc/modules.md#pushdata)
417
- > - NOTE: When you use `pushData` from within a handler, you omit the first argument (`ctx`).
418
- > - [pushRequests](./docs/typedoc/modules.md#pushrequests)
409
+ You can find the full type definition of `crawleeOne` and its arguments here:
410
+ - [crawleeOne](./docs/typedoc/modules.md#crawleeone)
411
+ - [CrawleeOneArgs](./docs/typedoc/interfaces/CrawleeOneArgs.md)
412
+
413
+ To learn more about `pushData` and `pushRequests`, see:
414
+ - [pushData](./docs/typedoc/modules.md#pushdata)
415
+ - NOTE: When you use `pushData` from within a handler, you omit the first argument (`ctx`).
416
+ - [pushRequests](./docs/typedoc/modules.md#pushrequests)
419
417
 
420
418
  ### Route handler context
421
419
 
@@ -442,13 +440,13 @@ await crawleeOne({
442
440
  ctx.response
443
441
  const $ = ctx.parseWithCheerio();
444
442
  // And more...
445
-
443
+
446
444
  // Extra props
447
-
445
+
448
446
  // 1. CrawleeOne instance (type: CrawleeOneActorInst):
449
447
  // - Save scraped items
450
448
  await ctx.actor.pushData(scrapedItems);
451
-
449
+
452
450
  // - Enqueue more URLs to scrape
453
451
  const id = Math.floor(Math.random() * 100);
454
452
  const url = `https://example.com/resource/${id}`;
@@ -493,7 +491,6 @@ See either of the two projects as examples:
493
491
  - [SKCRIS Scraper](https://github.com/JuroOravec/apify-actor-skcris)
494
492
  - [Profesia.sk Scraper](https://github.com/JuroOravec/apify-actor-profesia-sk)
495
493
 
496
-
497
494
  #### 1. Write the crawler with CrawleeOne
498
495
 
499
496
  Either use the example projects above or use your own boilerplate project, but remember that Apify requires you to Dockerize the
@@ -518,9 +515,9 @@ For that, you will need to:
518
515
  ```
519
516
 
520
517
  [`apify-actor-config`](https://github.com/JuroOravec/apify-actor-config) is a sister package focused solely on working with and generating
521
- Apify's `actor.json` config files.
518
+ Apify's `actor.json` config files.
522
519
 
523
- 2. Write a JS/TS file where you will only define your config and export it as the *default* export.
520
+ 2. Write a JS/TS file where you will only define your config and export it as the _default_ export.
524
521
 
525
522
  [See here the example config file from Profesia.sk Scraper](https://github.com/JuroOravec/apify-actor-profesia-sk/blob/main/src/config.ts).
526
523
 
@@ -545,7 +542,7 @@ For that, you will need to:
545
542
  // ...
546
543
  input: inputSchema,
547
544
  });
548
-
545
+
549
546
  export default config;
550
547
  ```
551
548
 
@@ -559,7 +556,7 @@ For that, you will need to:
559
556
  3. Build / transpile the config to vanilla JS if necessary.
560
557
 
561
558
  In Profesia.sk Scraper, the config is defined as a TypeScript file, but `apify-actor-config` currently supports only JS files.
562
-
559
+
563
560
  So if you are also using anything other than plain JavaScript, then you will need to build / transpile your project. Do so only once you're happy with the input fields and their defaults.
564
561
 
565
562
  4. Generate `actor.json` file
@@ -792,6 +789,7 @@ interface CrawleeOneTelemetry {
792
789
  ```
793
790
 
794
791
  See existing integrations for inspiration:
792
+
795
793
  - [Sentry](./src/lib/telemetry/sentry.ts)
796
794
 
797
795
  Based on the above, here's an example of a custom telemetry implementation
@@ -871,10 +869,11 @@ interface CrawleeOneIO {
871
869
  ```
872
870
 
873
871
  See existing integrations for inspiration:
872
+
874
873
  - [Apify](./src/lib/integrations/apify.ts)
875
874
 
876
875
  Based on the above, here's an example of a custom CrawleeOneIO implementation
877
- that overrides the datasets to send them to a custom HTTP endpoint.
876
+ that overrides the datasets to send them to a custom HTTP endpoint.
878
877
 
879
878
  ```ts
880
879
  import type { CrawleeOneIO, apifyIO } from 'crawlee-one';
package/dist/cjs/api.d.ts CHANGED
@@ -3,7 +3,7 @@ import type { AllActorInputs } from './lib/input';
3
3
  import type { CrawleeOneRouteHandler, CrawleeOneRoute } from './lib/router/types';
4
4
  import type { CrawlerMeta, CrawlerType } from './types';
5
5
  import type { MaybePromise } from './utils/types';
6
- /** Args obbject passed to `crawleeOne` */
6
+ /** Args object passed to `crawleeOne` */
7
7
  export interface CrawleeOneArgs<TType extends CrawlerType, T extends CrawleeOneCtx<CrawlerMeta<TType>['context']>> {
8
8
  /** Type specifying the Crawlee crawler class, input options, and more. */
9
9
  type: CrawlerType;
@@ -104,4 +104,4 @@ export interface CrawleeOneArgs<TType extends CrawlerType, T extends CrawleeOneC
104
104
  };
105
105
  routes: Record<T['labels'], CrawleeOneRoute<T, CrawleeOneActorRouterCtx<T>>>;
106
106
  }
107
- export declare const crawleeOne: <TType extends "basic" | "http" | "cheerio" | "jsdom" | "playwright" | "puppeteer", T extends CrawleeOneCtx<CrawlerMeta<TType>["context"], string, Record<string, any>, import(".").CrawleeOneIO<object, object, object>, import(".").CrawleeOneTelemetry<any, any>> = CrawleeOneCtx<CrawlerMeta<TType>["context"], string, Record<string, any>, import(".").CrawleeOneIO<object, object, object>, import(".").CrawleeOneTelemetry<any, any>>>(args: CrawleeOneArgs<TType, T>) => Promise<void>;
107
+ export declare const crawleeOne: <TType extends "basic" | "http" | "jsdom" | "cheerio" | "playwright" | "puppeteer", T extends CrawleeOneCtx<CrawlerMeta<TType>["context"], string, Record<string, any>, import(".").CrawleeOneIO<object, object, object>, import(".").CrawleeOneTelemetry<any, any>> = CrawleeOneCtx<CrawlerMeta<TType>["context"], string, Record<string, any>, import(".").CrawleeOneIO<object, object, object>, import(".").CrawleeOneTelemetry<any, any>>>(args: CrawleeOneArgs<TType, T>) => Promise<void>;
@@ -1 +1 @@
1
- {"version":3,"file":"api.js","sourceRoot":"","sources":["../../src/api.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,6CAAkD;AAQlD,mCAAmD;AAmH5C,MAAM,UAAU,GAAG,CAMxB,IAA8B,EAC9B,EAAE;;IACF,MAAM,kBAAkB,GAAG,CAAC,OAA+D,EAAE,EAAE;QAC7F,MAAM,YAAY,GAAG,CAAO,GAAG,EAAE,EAAE;;YACjC,MAAM,CAAA,MAAA,MAAA,IAAI,CAAC,KAAK,0CAAE,eAAe,mDAAG,GAAU,CAAC,CAAA,CAAC;YAChD,MAAM,OAAO,CAAC,GAAG,CAAC,CAAC;YACnB,MAAM,CAAA,MAAA,MAAA,IAAI,CAAC,KAAK,0CAAE,cAAc,mDAAG,GAAU,CAAC,CAAA,CAAC;QACjD,CAAC,CAAA,CAAC;QACF,OAAO,YAAY,CAAC;IACtB,CAAC,CAAC;IAEF,OAAO,IAAA,qBAAa,EAAW;QAC7B,SAAS,EAAE,IAAI,CAAC,IAAI;QACpB,SAAS,EAAE,IAAI,CAAC,IAAa;QAC7B,qBAAqB,EAAE,IAAI,CAAC,qBAAqB;QACjD,sBAAsB,EAAE,IAAI,CAAC,aAAa;QAC1C,WAAW,EAAE;YACX,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,EAAE,EAAE,IAAI,CAAC,EAAE;YAEX,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,aAAa,EAAE,IAAI,CAAC,aAAa;YACjC,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,aAAa,EAAE,MAAA,IAAI,CAAC,KAAK,0CAAE,aAAa;YAExC,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,oBAAoB,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE;;gBAAC,OAAA;oBACnC,IAAA,4BAAsB,EAAC,MAAA,KAAK,aAAL,KAAK,uBAAL,KAAK,CAAE,QAAQ,mCAAI,MAAM,CAAC;oBACjD,kBAAyB;iBAC1B,CAAA;aAAA;SACF;QACD,OAAO,EAAE,CAAO,KAAK,EAAE,EAAE;;YACvB,MAAM,OAAO,GAAG,MAAA,MAAA,IAAI,CAAC,KAAK,0CAAE,OAAO,mCAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,UAAU,EAAE,CAAC,CAAC;YACvE,MAAM,OAAO,CAAC,KAAK,CAAC,CAAC;QACvB,CAAC,CAAA;KACF,CAAC,CAAC;AACL,CAAC,CAAC;AA5CW,QAAA,UAAU,cA4CrB","sourcesContent":["import { runCrawleeOne } from './lib/actor/actor';\nimport type {\n CrawleeOneActorInst,\n CrawleeOneActorDef,\n CrawleeOneActorRouterCtx,\n CrawleeOneCtx,\n} from './lib/actor/types';\nimport type { AllActorInputs } from './lib/input';\nimport { logLevelHandlerWrapper } from './lib/log';\nimport type { CrawleeOneRouteHandler, CrawleeOneRoute } from './lib/router/types';\nimport type { CrawlerMeta, CrawlerType } from './types';\nimport type { MaybePromise } from './utils/types';\n\n/** Args obbject passed to `crawleeOne` */\nexport interface CrawleeOneArgs<\n TType extends CrawlerType,\n T extends CrawleeOneCtx<CrawlerMeta<TType>['context']>\n> {\n /** Type specifying the Crawlee crawler class, input options, and more. */\n type: CrawlerType;\n /** Unique name of the crawler instance. The name may be used in codegen and logging. */\n name?: string;\n\n /** Crawlee crawler configuration that CANNOT be overriden via `input` and `crawlerConfigDefaults` */\n crawlerConfig?: Omit<CrawlerMeta<TType>['options'], 'requestHandler'>;\n /** Crawlee crawler configuration that CAN be overriden via `input` and `crawlerConfig` */\n crawlerConfigDefaults?: Omit<CrawlerMeta<TType>['options'], 'requestHandler'>;\n\n /**\n * If `mergeInput` is truthy, will merge input settings from `inputDefaults`, `input`,\n * and `io.getInput()`.\n * \n * ```js\n * { ...inputDefaults, ...io.getInput(), ...input }\n * ```\n * \n * If `mergeInput` is falsy, `io.getInput()` is ignored if `input` is provided. So the input is either:\n * \n * ```js\n * { ...inputDefaults, ...io.getInput() } // If `input` is not defined\n * ```\n * \n * OR\n * \n * ```js\n * { ...inputDefaults, ...input } // If `input` is defined\n * ```\n * \n * Alternatively, you can supply your own function that merges the sources:\n * \n * ```js\n * {\n * // `mergeInput` can be also async\n * mergeInput: ({ defaults, overrides, env }) => {\n * // This is same as `mergeInput: true`\n * return { ...defaults, ...env, ...overrides };\n * },\n * }\n * ```\n */\n mergeInput?: boolean | ((sources: {\n defaults: Partial<AllActorInputs>;\n overrides: Partial<AllActorInputs>;\n env: Partial<AllActorInputs>;\n }) => MaybePromise<Partial<AllActorInputs>>);\n /** Input configuration that CANNOT be overriden via `inputDefaults` and `io.getInput()` */\n input?: Partial<AllActorInputs>;\n /** Input configuration that CAN be overriden via `input` and `io.getInput()` */\n inputDefaults?: Partial<AllActorInputs>;\n\n // /////// Override services /////////\n /**\n * Configure the Crawlee proxy.\n *\n * See {@link ProxyConfiguration}\n */\n proxy?: CrawleeOneActorDef<T>['proxy'];\n /**\n * Provide a telemetry instance that is used for tracking errors.\n *\n * See {@link CrawleeOneTelemetry}\n */\n telemetry?: CrawleeOneActorDef<T>['telemetry'];\n /**\n * Provide an instance that is responsible for state management:\n * - Adding scraped data to datasets\n * - Adding and removing requests to/from queues\n * - Cache storage\n * \n * This is an API based on Apify's `Actor` utility class, which is also\n * the default.\n * \n * You don't need to override this in most of the cases.\n * \n * By default, the data is saved and kept locally in\n * `./storage` directory. And if the cralwer runs in Apify's platform\n * then it will use Apify's cloud for storage.\n *\n * See {@link CrawleeOneIO}\n */\n io?: CrawleeOneActorDef<T>['io'];\n /**\n * Provide a custom router instance.\n * \n * By default, router is created as:\n * ```ts\n * import { Router } from 'crawlee';\n * Router.create(),\n * ```\n *\n * See {@link Router}\n */\n router?: CrawleeOneActorDef<T>['router'];\n\n hooks?: {\n onReady?: (actor: CrawleeOneActorInst<T>) => MaybePromise<void>;\n validateInput?: (input: AllActorInputs | null) => MaybePromise<void>;\n onBeforeHandler?: CrawleeOneRouteHandler<T, CrawleeOneActorRouterCtx<T>>;\n onAfterHandler?: CrawleeOneRouteHandler<T, CrawleeOneActorRouterCtx<T>>;\n };\n routes: Record<T['labels'], CrawleeOneRoute<T, CrawleeOneActorRouterCtx<T>>>;\n} // prettier-ignore\n\nexport const crawleeOne = <\n TType extends CrawlerType,\n T extends CrawleeOneCtx<CrawlerMeta<TType>['context']> = CrawleeOneCtx<\n CrawlerMeta<TType>['context']\n >\n>(\n args: CrawleeOneArgs<TType, T>\n) => {\n const hookHandlerWrapper = (handler: CrawleeOneRouteHandler<T, CrawleeOneActorRouterCtx<T>>) => {\n const innerHandler = async (ctx) => {\n await args.hooks?.onBeforeHandler?.(ctx as any);\n await handler(ctx);\n await args.hooks?.onAfterHandler?.(ctx as any);\n };\n return innerHandler;\n };\n\n return runCrawleeOne<TType, T>({\n actorName: args.name,\n actorType: args.type as TType,\n crawlerConfigDefaults: args.crawlerConfigDefaults,\n crawlerConfigOverrides: args.crawlerConfig,\n actorConfig: {\n telemetry: args.telemetry,\n router: args.router,\n proxy: args.proxy,\n io: args.io,\n\n input: args.input,\n inputDefaults: args.inputDefaults,\n mergeInput: args.mergeInput,\n validateInput: args.hooks?.validateInput,\n\n routes: args.routes,\n routeHandlerWrappers: ({ input }) => [\n logLevelHandlerWrapper(input?.logLevel ?? 'info'),\n hookHandlerWrapper as any,\n ],\n },\n onReady: async (actor) => {\n const onReady = args.hooks?.onReady ?? ((actor) => actor.runCrawler());\n await onReady(actor);\n },\n });\n};\n"]}
1
+ {"version":3,"file":"api.js","sourceRoot":"","sources":["../../src/api.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,6CAAkD;AAQlD,mCAAmD;AAmH5C,MAAM,UAAU,GAAG,CAMxB,IAA8B,EAC9B,EAAE;;IACF,MAAM,kBAAkB,GAAG,CAAC,OAA+D,EAAE,EAAE;QAC7F,MAAM,YAAY,GAAG,CAAO,GAAQ,EAAE,EAAE;;YACtC,MAAM,CAAA,MAAA,MAAA,IAAI,CAAC,KAAK,0CAAE,eAAe,mDAAG,GAAU,CAAC,CAAA,CAAC;YAChD,MAAM,OAAO,CAAC,GAAG,CAAC,CAAC;YACnB,MAAM,CAAA,MAAA,MAAA,IAAI,CAAC,KAAK,0CAAE,cAAc,mDAAG,GAAU,CAAC,CAAA,CAAC;QACjD,CAAC,CAAA,CAAC;QACF,OAAO,YAAY,CAAC;IACtB,CAAC,CAAC;IAEF,OAAO,IAAA,qBAAa,EAAW;QAC7B,SAAS,EAAE,IAAI,CAAC,IAAI;QACpB,SAAS,EAAE,IAAI,CAAC,IAAa;QAC7B,qBAAqB,EAAE,IAAI,CAAC,qBAAqB;QACjD,sBAAsB,EAAE,IAAI,CAAC,aAAa;QAC1C,WAAW,EAAE;YACX,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,EAAE,EAAE,IAAI,CAAC,EAAE;YAEX,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,aAAa,EAAE,IAAI,CAAC,aAAa;YACjC,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,aAAa,EAAE,MAAA,IAAI,CAAC,KAAK,0CAAE,aAAa;YAExC,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,oBAAoB,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE;;gBAAC,OAAA;oBACnC,IAAA,4BAAsB,EAAC,MAAA,KAAK,aAAL,KAAK,uBAAL,KAAK,CAAE,QAAQ,mCAAI,MAAM,CAAC;oBACjD,kBAAyB;iBAC1B,CAAA;aAAA;SACF;QACD,OAAO,EAAE,CAAO,KAAK,EAAE,EAAE;;YACvB,MAAM,OAAO,GAAG,MAAA,MAAA,IAAI,CAAC,KAAK,0CAAE,OAAO,mCAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,UAAU,EAAE,CAAC,CAAC;YACvE,MAAM,OAAO,CAAC,KAAK,CAAC,CAAC;QACvB,CAAC,CAAA;KACF,CAAC,CAAC;AACL,CAAC,CAAC;AA5CW,QAAA,UAAU,cA4CrB","sourcesContent":["import { runCrawleeOne } from './lib/actor/actor';\nimport type {\n CrawleeOneActorInst,\n CrawleeOneActorDef,\n CrawleeOneActorRouterCtx,\n CrawleeOneCtx,\n} from './lib/actor/types';\nimport type { AllActorInputs } from './lib/input';\nimport { logLevelHandlerWrapper } from './lib/log';\nimport type { CrawleeOneRouteHandler, CrawleeOneRoute } from './lib/router/types';\nimport type { CrawlerMeta, CrawlerType } from './types';\nimport type { MaybePromise } from './utils/types';\n\n/** Args object passed to `crawleeOne` */\nexport interface CrawleeOneArgs<\n TType extends CrawlerType,\n T extends CrawleeOneCtx<CrawlerMeta<TType>['context']>\n> {\n /** Type specifying the Crawlee crawler class, input options, and more. */\n type: CrawlerType;\n /** Unique name of the crawler instance. The name may be used in codegen and logging. */\n name?: string;\n\n /** Crawlee crawler configuration that CANNOT be overriden via `input` and `crawlerConfigDefaults` */\n crawlerConfig?: Omit<CrawlerMeta<TType>['options'], 'requestHandler'>;\n /** Crawlee crawler configuration that CAN be overriden via `input` and `crawlerConfig` */\n crawlerConfigDefaults?: Omit<CrawlerMeta<TType>['options'], 'requestHandler'>;\n\n /**\n * If `mergeInput` is truthy, will merge input settings from `inputDefaults`, `input`,\n * and `io.getInput()`.\n * \n * ```js\n * { ...inputDefaults, ...io.getInput(), ...input }\n * ```\n * \n * If `mergeInput` is falsy, `io.getInput()` is ignored if `input` is provided. So the input is either:\n * \n * ```js\n * { ...inputDefaults, ...io.getInput() } // If `input` is not defined\n * ```\n * \n * OR\n * \n * ```js\n * { ...inputDefaults, ...input } // If `input` is defined\n * ```\n * \n * Alternatively, you can supply your own function that merges the sources:\n * \n * ```js\n * {\n * // `mergeInput` can be also async\n * mergeInput: ({ defaults, overrides, env }) => {\n * // This is same as `mergeInput: true`\n * return { ...defaults, ...env, ...overrides };\n * },\n * }\n * ```\n */\n mergeInput?: boolean | ((sources: {\n defaults: Partial<AllActorInputs>;\n overrides: Partial<AllActorInputs>;\n env: Partial<AllActorInputs>;\n }) => MaybePromise<Partial<AllActorInputs>>);\n /** Input configuration that CANNOT be overriden via `inputDefaults` and `io.getInput()` */\n input?: Partial<AllActorInputs>;\n /** Input configuration that CAN be overriden via `input` and `io.getInput()` */\n inputDefaults?: Partial<AllActorInputs>;\n\n // /////// Override services /////////\n /**\n * Configure the Crawlee proxy.\n *\n * See {@link ProxyConfiguration}\n */\n proxy?: CrawleeOneActorDef<T>['proxy'];\n /**\n * Provide a telemetry instance that is used for tracking errors.\n *\n * See {@link CrawleeOneTelemetry}\n */\n telemetry?: CrawleeOneActorDef<T>['telemetry'];\n /**\n * Provide an instance that is responsible for state management:\n * - Adding scraped data to datasets\n * - Adding and removing requests to/from queues\n * - Cache storage\n * \n * This is an API based on Apify's `Actor` utility class, which is also\n * the default.\n * \n * You don't need to override this in most of the cases.\n * \n * By default, the data is saved and kept locally in\n * `./storage` directory. And if the cralwer runs in Apify's platform\n * then it will use Apify's cloud for storage.\n *\n * See {@link CrawleeOneIO}\n */\n io?: CrawleeOneActorDef<T>['io'];\n /**\n * Provide a custom router instance.\n * \n * By default, router is created as:\n * ```ts\n * import { Router } from 'crawlee';\n * Router.create(),\n * ```\n *\n * See {@link Router}\n */\n router?: CrawleeOneActorDef<T>['router'];\n\n hooks?: {\n onReady?: (actor: CrawleeOneActorInst<T>) => MaybePromise<void>;\n validateInput?: (input: AllActorInputs | null) => MaybePromise<void>;\n onBeforeHandler?: CrawleeOneRouteHandler<T, CrawleeOneActorRouterCtx<T>>;\n onAfterHandler?: CrawleeOneRouteHandler<T, CrawleeOneActorRouterCtx<T>>;\n };\n routes: Record<T['labels'], CrawleeOneRoute<T, CrawleeOneActorRouterCtx<T>>>;\n} // prettier-ignore\n\nexport const crawleeOne = <\n TType extends CrawlerType,\n T extends CrawleeOneCtx<CrawlerMeta<TType>['context']> = CrawleeOneCtx<\n CrawlerMeta<TType>['context']\n >\n>(\n args: CrawleeOneArgs<TType, T>\n) => {\n const hookHandlerWrapper = (handler: CrawleeOneRouteHandler<T, CrawleeOneActorRouterCtx<T>>) => {\n const innerHandler = async (ctx: any) => {\n await args.hooks?.onBeforeHandler?.(ctx as any);\n await handler(ctx);\n await args.hooks?.onAfterHandler?.(ctx as any);\n };\n return innerHandler;\n };\n\n return runCrawleeOne<TType, T>({\n actorName: args.name,\n actorType: args.type as TType,\n crawlerConfigDefaults: args.crawlerConfigDefaults,\n crawlerConfigOverrides: args.crawlerConfig,\n actorConfig: {\n telemetry: args.telemetry,\n router: args.router,\n proxy: args.proxy,\n io: args.io,\n\n input: args.input,\n inputDefaults: args.inputDefaults,\n mergeInput: args.mergeInput,\n validateInput: args.hooks?.validateInput,\n\n routes: args.routes,\n routeHandlerWrappers: ({ input }) => [\n logLevelHandlerWrapper(input?.logLevel ?? 'info'),\n hookHandlerWrapper as any,\n ],\n },\n onReady: async (actor) => {\n const onReady = args.hooks?.onReady ?? ((actor) => actor.runCrawler());\n await onReady(actor);\n },\n });\n};\n"]}
@@ -33,7 +33,7 @@ commander_1.program
33
33
 
34
34
  Example call:
35
35
  $ crawlee-one generate -c ./path/to/config-file -o ./path/to/output.ts`)
36
- .action(({ config: configFile, out: outFile }) => __awaiter(void 0, void 0, void 0, function* () {
36
+ .action((_a) => __awaiter(void 0, [_a], void 0, function* ({ config: configFile, out: outFile }) {
37
37
  yield (0, codegen_1.generateTypes)(outFile, configFile);
38
38
  }));
39
39
  commander_1.program
@@ -44,7 +44,7 @@ commander_1.program
44
44
 
45
45
  Example call:
46
46
  $ crawlee-one validate -c ./path/to/config`)
47
- .action(({ config: configPath }) => __awaiter(void 0, void 0, void 0, function* () {
47
+ .action((_b) => __awaiter(void 0, [_b], void 0, function* ({ config: configPath }) {
48
48
  const config = yield (0, config_1.loadConfig)(configPath);
49
49
  (0, config_1.validateConfig)(config);
50
50
  console.log('CrawleeOne config is OK!');
@@ -60,7 +60,7 @@ commander_1.program
60
60
 
61
61
  Example call:
62
62
  $ crawlee-one migrate -d ./path/to/migrations-dir -t v1`)
63
- .action(({ dir, target, extension, delimeter }) => __awaiter(void 0, void 0, void 0, function* () {
63
+ .action((_c) => __awaiter(void 0, [_c], void 0, function* ({ dir, target, extension, delimeter }) {
64
64
  const migrationsDir = path_1.default.resolve(process.cwd(), dir);
65
65
  const { migrate } = (0, localMigrator_1.createLocalMigrator)({ migrationsDir, extension, delimeter });
66
66
  yield migrate(target);
@@ -76,7 +76,7 @@ commander_1.program
76
76
 
77
77
  Example call:
78
78
  $ crawlee-one unmigrate -d ./path/to/migrations-dir -t v1`)
79
- .action(({ dir, target, extension, delimeter }) => __awaiter(void 0, void 0, void 0, function* () {
79
+ .action((_d) => __awaiter(void 0, [_d], void 0, function* ({ dir, target, extension, delimeter }) {
80
80
  const migrationsDir = path_1.default.resolve(process.cwd(), dir);
81
81
  const { unmigrate } = (0, localMigrator_1.createLocalMigrator)({ migrationsDir, extension, delimeter });
82
82
  yield unmigrate(target);
@@ -1 +1 @@
1
- {"version":3,"file":"cli.js","sourceRoot":"","sources":["../../../src/cli/cli.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;AAAA,yCAAoC;AACpC,gDAAwB;AAExB,8CAAsD;AACtD,gEAAmE;AACnE,8CAA+D;AAC/D,gDAAmD;AAEnD,MAAM,OAAO,GAAG,IAAA,4BAAkB,EAAC,MAAM,EAAE,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC,CAAC;AAEhE,mBAAO,CAAC,EAAE;KACP,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC;KAClB,WAAW,CAAC,8BAA8B,CAAC;KAC3C,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;AAE5B,mBAAO;KACJ,OAAO,CAAC,UAAU,CAAC;KACnB,WAAW,CAAC,2CAA2C,CAAC;KACxD,MAAM,CAAC,2BAA2B,EAAE,qBAAqB,CAAC;KAC1D,cAAc,CAAC,wBAAwB,EAAE,qBAAqB,CAAC;KAC/D,WAAW,CACV,OAAO,EACP;;;yEAGqE,CACtE;KACA,MAAM,CAAC,CAAO,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,EAAE,OAAO,EAAE,EAAE,EAAE;IACrD,MAAM,IAAA,uBAAa,EAAC,OAAO,EAAE,UAAU,CAAC,CAAC;AAC3C,CAAC,CAAA,CAAC,CAAC;AAEL,mBAAO;KACJ,OAAO,CAAC,UAAU,CAAC;KACnB,WAAW,CAAC,4BAA4B,CAAC;KACzC,cAAc,CAAC,2BAA2B,EAAE,qBAAqB,CAAC;KAClE,WAAW,CACV,OAAO,EACP;;;6CAGyC,CAC1C;KACA,MAAM,CAAC,CAAO,EAAE,MAAM,EAAE,UAAU,EAAE,EAAE,EAAE;IACvC,MAAM,MAAM,GAAG,MAAM,IAAA,mBAAU,EAAC,UAAU,CAAC,CAAC;IAC5C,IAAA,uBAAc,EAAC,MAAM,CAAC,CAAC;IACvB,OAAO,CAAC,GAAG,CAAC,0BAA0B,CAAC,CAAC;AAC1C,CAAC,CAAA,CAAC,CAAC;AAEL,mBAAO;KACJ,OAAO,CAAC,SAAS,CAAC;KAClB,WAAW,CAAC,wDAAwD,CAAC;KACrE,cAAc,CAAC,8BAA8B,EAAE,uCAAuC,CAAC;KACvF,cAAc,CAAC,iBAAiB,EAAE,kCAAkC,CAAC;KACrE,MAAM,CACL,yBAAyB,EACzB,mEAAmE,CACpE;KACA,MAAM,CACL,8BAA8B,EAC9B,+EAA+E,CAChF;KACA,WAAW,CACV,OAAO,EACP;;;0DAGsD,CACvD;KACA,MAAM,CAAC,CAAO,EAAE,GAAG,EAAE,MAAM,EAAE,SAAS,EAAE,SAAS,EAAE,EAAE,EAAE;IACtD,MAAM,aAAa,GAAG,cAAI,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,GAAG,CAAC,CAAC;IACvD,MAAM,EAAE,OAAO,EAAE,GAAG,IAAA,mCAAmB,EAAC,EAAE,aAAa,EAAE,SAAS,EAAE,SAAS,EAAE,CAAC,CAAC;IACjF,MAAM,OAAO,CAAC,MAAM,CAAC,CAAC;AACxB,CAAC,CAAA,CAAC,CAAC;AAEL,mBAAO;KACJ,OAAO,CAAC,WAAW,CAAC;KACpB,WAAW,CAAC,4DAA4D,CAAC;KACzE,cAAc,CAAC,8BAA8B,EAAE,uCAAuC,CAAC;KACvF,cAAc,CAAC,iBAAiB,EAAE,kCAAkC,CAAC;KACrE,MAAM,CACL,yBAAyB,EACzB,mEAAmE,CACpE;KACA,MAAM,CACL,8BAA8B,EAC9B,+EAA+E,CAChF;KACA,WAAW,CACV,OAAO,EACP;;;4DAGwD,CACzD;KACA,MAAM,CAAC,CAAO,EAAE,GAAG,EAAE,MAAM,EAAE,SAAS,EAAE,SAAS,EAAE,EAAE,EAAE;IACtD,MAAM,aAAa,GAAG,cAAI,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,GAAG,CAAC,CAAC;IACvD,MAAM,EAAE,SAAS,EAAE,GAAG,IAAA,mCAAmB,EAAC,EAAE,aAAa,EAAE,SAAS,EAAE,SAAS,EAAE,CAAC,CAAC;IACnF,MAAM,SAAS,CAAC,MAAM,CAAC,CAAC;AAC1B,CAAC,CAAA,CAAC,CAAC;AAEE,MAAM,GAAG,GAAG,GAAG,EAAE;IACtB,mBAAO,CAAC,KAAK,EAAE,CAAC;AAClB,CAAC,CAAC;AAFW,QAAA,GAAG,OAEd","sourcesContent":["import { program } from 'commander';\nimport path from 'path';\n\nimport { getPackageJsonInfo } from '../utils/package';\nimport { createLocalMigrator } from '../lib/migrate/localMigrator';\nimport { loadConfig, validateConfig } from './commands/config';\nimport { generateTypes } from './commands/codegen';\n\nconst pkgJson = getPackageJsonInfo(module, ['name', 'version']);\n\nprogram //\n .name(pkgJson.name)\n .description('CLI to run crawlee-one tools')\n .version(pkgJson.version);\n\nprogram\n .command('generate')\n .description('Generate CrawleeOne types based on config')\n .option('-c --config [config-file]', 'path to config file')\n .requiredOption('-o --out <output-file>', 'path to output file')\n .addHelpText(\n 'after',\n `\n\nExample call:\n $ crawlee-one generate -c ./path/to/config-file -o ./path/to/output.ts`\n )\n .action(async ({ config: configFile, out: outFile }) => {\n await generateTypes(outFile, configFile);\n });\n\nprogram\n .command('validate')\n .description('Validate CrawleeOne config')\n .requiredOption('-c --config <config-file>', 'path to config file')\n .addHelpText(\n 'after',\n `\n\nExample call:\n $ crawlee-one validate -c ./path/to/config`\n )\n .action(async ({ config: configPath }) => {\n const config = await loadConfig(configPath);\n validateConfig(config);\n console.log('CrawleeOne config is OK!');\n });\n\nprogram\n .command('migrate')\n .description('Run a migration script specified by the version number')\n .requiredOption('-t --target <target-version>', 'migration version to execute, eg \"v1\"')\n .requiredOption('-d --dir <path>', 'path to the migrations directory')\n .option(\n '--delimeter [delimeter]',\n 'delimeter between version and rest of file name, eg \"v1_filename\"'\n )\n .option(\n '--ext --extension [ext-glob]',\n 'glob pattern for valid extensions for migration files, eg \".js\" or \".{js,ts}\"'\n )\n .addHelpText(\n 'after',\n `\n\nExample call:\n $ crawlee-one migrate -d ./path/to/migrations-dir -t v1`\n )\n .action(async ({ dir, target, extension, delimeter }) => {\n const migrationsDir = path.resolve(process.cwd(), dir);\n const { migrate } = createLocalMigrator({ migrationsDir, extension, delimeter });\n await migrate(target);\n });\n\nprogram\n .command('unmigrate')\n .description('Run an un-migration script specified by the version number')\n .requiredOption('-t --target <target-version>', 'migration version to execute, eg \"v1\"')\n .requiredOption('-d --dir <path>', 'path to the migrations directory')\n .option(\n '--delimeter [delimeter]',\n 'delimeter between version and rest of file name, eg \"v1_filename\"'\n )\n .option(\n '--ext --extension [ext-glob]',\n 'glob pattern for valid extensions for migration files, eg \".js\" or \".{js,ts}\"'\n )\n .addHelpText(\n 'after',\n `\n\nExample call:\n $ crawlee-one unmigrate -d ./path/to/migrations-dir -t v1`\n )\n .action(async ({ dir, target, extension, delimeter }) => {\n const migrationsDir = path.resolve(process.cwd(), dir);\n const { unmigrate } = createLocalMigrator({ migrationsDir, extension, delimeter });\n await unmigrate(target);\n });\n\nexport const cli = () => {\n program.parse();\n};\n"]}
1
+ {"version":3,"file":"cli.js","sourceRoot":"","sources":["../../../src/cli/cli.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;AAAA,yCAAoC;AACpC,gDAAwB;AAExB,8CAAsD;AACtD,gEAAmE;AACnE,8CAA+D;AAC/D,gDAAmD;AAEnD,MAAM,OAAO,GAAG,IAAA,4BAAkB,EAAC,MAAM,EAAE,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC,CAAC;AAEhE,mBAAO,CAAC,EAAE;KACP,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC;KAClB,WAAW,CAAC,8BAA8B,CAAC;KAC3C,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;AAE5B,mBAAO;KACJ,OAAO,CAAC,UAAU,CAAC;KACnB,WAAW,CAAC,2CAA2C,CAAC;KACxD,MAAM,CAAC,2BAA2B,EAAE,qBAAqB,CAAC;KAC1D,cAAc,CAAC,wBAAwB,EAAE,qBAAqB,CAAC;KAC/D,WAAW,CACV,OAAO,EACP;;;yEAGqE,CACtE;KACA,MAAM,CAAC,KAA6C,EAAE,4CAAxC,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,EAAE,OAAO,EAAE;IACjD,MAAM,IAAA,uBAAa,EAAC,OAAO,EAAE,UAAU,CAAC,CAAC;AAC3C,CAAC,CAAA,CAAC,CAAC;AAEL,mBAAO;KACJ,OAAO,CAAC,UAAU,CAAC;KACnB,WAAW,CAAC,4BAA4B,CAAC;KACzC,cAAc,CAAC,2BAA2B,EAAE,qBAAqB,CAAC;KAClE,WAAW,CACV,OAAO,EACP;;;6CAGyC,CAC1C;KACA,MAAM,CAAC,KAA+B,EAAE,4CAA1B,EAAE,MAAM,EAAE,UAAU,EAAE;IACnC,MAAM,MAAM,GAAG,MAAM,IAAA,mBAAU,EAAC,UAAU,CAAC,CAAC;IAC5C,IAAA,uBAAc,EAAC,MAAM,CAAC,CAAC;IACvB,OAAO,CAAC,GAAG,CAAC,0BAA0B,CAAC,CAAC;AAC1C,CAAC,CAAA,CAAC,CAAC;AAEL,mBAAO;KACJ,OAAO,CAAC,SAAS,CAAC;KAClB,WAAW,CAAC,wDAAwD,CAAC;KACrE,cAAc,CAAC,8BAA8B,EAAE,uCAAuC,CAAC;KACvF,cAAc,CAAC,iBAAiB,EAAE,kCAAkC,CAAC;KACrE,MAAM,CACL,yBAAyB,EACzB,mEAAmE,CACpE;KACA,MAAM,CACL,8BAA8B,EAC9B,+EAA+E,CAChF;KACA,WAAW,CACV,OAAO,EACP;;;0DAGsD,CACvD;KACA,MAAM,CAAC,KAA8C,EAAE,4CAAzC,EAAE,GAAG,EAAE,MAAM,EAAE,SAAS,EAAE,SAAS,EAAE;IAClD,MAAM,aAAa,GAAG,cAAI,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,GAAG,CAAC,CAAC;IACvD,MAAM,EAAE,OAAO,EAAE,GAAG,IAAA,mCAAmB,EAAC,EAAE,aAAa,EAAE,SAAS,EAAE,SAAS,EAAE,CAAC,CAAC;IACjF,MAAM,OAAO,CAAC,MAAM,CAAC,CAAC;AACxB,CAAC,CAAA,CAAC,CAAC;AAEL,mBAAO;KACJ,OAAO,CAAC,WAAW,CAAC;KACpB,WAAW,CAAC,4DAA4D,CAAC;KACzE,cAAc,CAAC,8BAA8B,EAAE,uCAAuC,CAAC;KACvF,cAAc,CAAC,iBAAiB,EAAE,kCAAkC,CAAC;KACrE,MAAM,CACL,yBAAyB,EACzB,mEAAmE,CACpE;KACA,MAAM,CACL,8BAA8B,EAC9B,+EAA+E,CAChF;KACA,WAAW,CACV,OAAO,EACP;;;4DAGwD,CACzD;KACA,MAAM,CAAC,KAA8C,EAAE,4CAAzC,EAAE,GAAG,EAAE,MAAM,EAAE,SAAS,EAAE,SAAS,EAAE;IAClD,MAAM,aAAa,GAAG,cAAI,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,GAAG,CAAC,CAAC;IACvD,MAAM,EAAE,SAAS,EAAE,GAAG,IAAA,mCAAmB,EAAC,EAAE,aAAa,EAAE,SAAS,EAAE,SAAS,EAAE,CAAC,CAAC;IACnF,MAAM,SAAS,CAAC,MAAM,CAAC,CAAC;AAC1B,CAAC,CAAA,CAAC,CAAC;AAEE,MAAM,GAAG,GAAG,GAAG,EAAE;IACtB,mBAAO,CAAC,KAAK,EAAE,CAAC;AAClB,CAAC,CAAC;AAFW,QAAA,GAAG,OAEd","sourcesContent":["import { program } from 'commander';\nimport path from 'path';\n\nimport { getPackageJsonInfo } from '../utils/package';\nimport { createLocalMigrator } from '../lib/migrate/localMigrator';\nimport { loadConfig, validateConfig } from './commands/config';\nimport { generateTypes } from './commands/codegen';\n\nconst pkgJson = getPackageJsonInfo(module, ['name', 'version']);\n\nprogram //\n .name(pkgJson.name)\n .description('CLI to run crawlee-one tools')\n .version(pkgJson.version);\n\nprogram\n .command('generate')\n .description('Generate CrawleeOne types based on config')\n .option('-c --config [config-file]', 'path to config file')\n .requiredOption('-o --out <output-file>', 'path to output file')\n .addHelpText(\n 'after',\n `\n\nExample call:\n $ crawlee-one generate -c ./path/to/config-file -o ./path/to/output.ts`\n )\n .action(async ({ config: configFile, out: outFile }) => {\n await generateTypes(outFile, configFile);\n });\n\nprogram\n .command('validate')\n .description('Validate CrawleeOne config')\n .requiredOption('-c --config <config-file>', 'path to config file')\n .addHelpText(\n 'after',\n `\n\nExample call:\n $ crawlee-one validate -c ./path/to/config`\n )\n .action(async ({ config: configPath }) => {\n const config = await loadConfig(configPath);\n validateConfig(config);\n console.log('CrawleeOne config is OK!');\n });\n\nprogram\n .command('migrate')\n .description('Run a migration script specified by the version number')\n .requiredOption('-t --target <target-version>', 'migration version to execute, eg \"v1\"')\n .requiredOption('-d --dir <path>', 'path to the migrations directory')\n .option(\n '--delimeter [delimeter]',\n 'delimeter between version and rest of file name, eg \"v1_filename\"'\n )\n .option(\n '--ext --extension [ext-glob]',\n 'glob pattern for valid extensions for migration files, eg \".js\" or \".{js,ts}\"'\n )\n .addHelpText(\n 'after',\n `\n\nExample call:\n $ crawlee-one migrate -d ./path/to/migrations-dir -t v1`\n )\n .action(async ({ dir, target, extension, delimeter }) => {\n const migrationsDir = path.resolve(process.cwd(), dir);\n const { migrate } = createLocalMigrator({ migrationsDir, extension, delimeter });\n await migrate(target);\n });\n\nprogram\n .command('unmigrate')\n .description('Run an un-migration script specified by the version number')\n .requiredOption('-t --target <target-version>', 'migration version to execute, eg \"v1\"')\n .requiredOption('-d --dir <path>', 'path to the migrations directory')\n .option(\n '--delimeter [delimeter]',\n 'delimeter between version and rest of file name, eg \"v1_filename\"'\n )\n .option(\n '--ext --extension [ext-glob]',\n 'glob pattern for valid extensions for migration files, eg \".js\" or \".{js,ts}\"'\n )\n .addHelpText(\n 'after',\n `\n\nExample call:\n $ crawlee-one unmigrate -d ./path/to/migrations-dir -t v1`\n )\n .action(async ({ dir, target, extension, delimeter }) => {\n const migrationsDir = path.resolve(process.cwd(), dir);\n const { unmigrate } = createLocalMigrator({ migrationsDir, extension, delimeter });\n await unmigrate(target);\n });\n\nexport const cli = () => {\n program.parse();\n};\n"]}
@@ -1 +1 @@
1
- {"version":3,"file":"codegen.js","sourceRoot":"","sources":["../../../../src/cli/commands/codegen.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;AAAA,sDAAsD;AACtD,2DAA8B;AAC9B,gDAAwB;AAGxB,+CAA4D;AAC5D,qCAAsD;AActD,MAAM,SAAS,GAAG,CAAC,KAAe,EAAE,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;AAC9E,MAAM,QAAQ,GAAG,CAAC,KAAe,EAAE,EAAE,CACnC,KAAK,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC;AAEpE,MAAM,UAAU,GAAG;IACjB,IAAI,EAAE,CAAC,IAAY,EAAE,KAAa,EAAE,QAAmB,EAAE,EAAE;QACzD,MAAM,WAAW,GAAG,CAAA,QAAQ,aAAR,QAAQ,uBAAR,QAAQ,CAAE,MAAM,EAAC,CAAC,CAAC,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QACvE,OAAO,eAAe,IAAI,GAAG,WAAW,MAAM,KAAK,GAAG,CAAC;IACzD,CAAC;IACD,QAAQ,EAAE,CAAC,IAAY,EAAE,KAAa,EAAE,QAAmB,EAAE,EAAE;QAC7D,MAAM,WAAW,GAAG,CAAA,QAAQ,aAAR,QAAQ,uBAAR,QAAQ,CAAE,MAAM,EAAC,CAAC,CAAC,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QACvE,OAAO,eAAe,IAAI,MAAM,WAAW,GAAG,KAAK,GAAG,CAAC;IACzD,CAAC;IACD,IAAI,EAAE,CAAC,IAAY,EAAE,KAAa,EAAE,QAAmB,EAAE,EAAE;QACzD,MAAM,WAAW,GAAG,CAAA,QAAQ,aAAR,QAAQ,uBAAR,QAAQ,CAAE,MAAM,EAAC,CAAC,CAAC,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QACvE,OAAO,gBAAgB,IAAI,MAAM,WAAW,GAAG,KAAK,GAAG,CAAC;IAC1D,CAAC;IACD,iGAAiG;IACjG,IAAI,EAAE,CAAC,IAAY,EAAE,KAAa,EAAE,QAAmB,EAAE,EAAE;QACzD,OAAO,eAAe,IAAI,IAAI,KAAK,EAAE,CAAC;IACxC,CAAC;CACiF,CAAC;AAErF,MAAM,oBAAoB,GAAG,CAAC,MAA8B,EAAE,EAAE;IAC9D,mDAAmD;IACnD,MAAM,WAAW,GAA2B,EAAE,CAAC;IAE/C,8DAA8D;IAC9D,MAAM,OAAO,GAA8D,EAAE,CAAC;IAE9E,MAAM,UAAU,GAAG,CACjB,GAAW,EACX,UAAe,EACf,OAAgC,EAChC,EAAE;QACF,MAAM,EAAE,QAAQ,EAAE,GAAG,OAAO,aAAP,OAAO,cAAP,OAAO,GAAI,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,OAAO,CAAC,GAAG,CAAC,IAAI,IAAI,GAAG,EAAE,CAAC,CAAC;QAC3D,UAAU,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAC,CAAC;QAC9D,qEAAqE;QACrE,0CAA0C;QAC1C,OAAO,UAAU,CAAC,MAAM,CAAsB,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE;YACzD,GAAG,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC;YACf,OAAO,GAAG,CAAC;QACb,CAAC,EAAE,EAAS,CAAC,CAAC;IAChB,CAAC,CAAC;IAEF,MAAM,MAAM,GAAG,CACb,GAAW,EACX,KAA8B,EAC9B,OAAiE,EACjE,EAAE;;QACF,MAAM,IAAI,GAAG,MAAA,OAAO,aAAP,OAAO,uBAAP,OAAO,CAAE,IAAI,mCAAI,MAAM,CAAC;QACrC,MAAM,QAAQ,GAAG,MAAA,OAAO,aAAP,OAAO,uBAAP,OAAO,CAAE,QAAQ,mCAAI,EAAE,CAAC;QACzC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,EAAE;YACrB,MAAM,WAAW,GAAG,OAAO,KAAK,KAAK,UAAU,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;YAClE,MAAM,SAAS,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC;YACnC,MAAM,YAAY,GAAG,SAAS,CAAC,GAAG,EAAE,WAAW,EAAE,QAAQ,CAAC,CAAC;YAC3D,WAAW,CAAC,GAAG,CAAC,GAAG,YAAY,CAAC;SACjC;QACD,qDAAqD;QACrD,OAAO,GAAG,CAAC;IACb,CAAC,CAAC;IAEF,oBAAoB;IACpB,MAAM,EACJ,cAAc,EAAE,UAAU,EAC1B,wBAAwB,EAAE,cAAc,EACxC,mBAAmB,EAAE,QAAQ,EAC7B,eAAe,EAAE,SAAS,EAC1B,sBAAsB,EAAE,YAAY,EACpC,sBAAsB,EAAE,YAAY,EACpC,sBAAsB,EAAE,YAAY,EACpC,wBAAwB,EAAE,cAAc,EACxC,YAAY,EAAE,MAAM,EACpB,mBAAmB,EAAE,SAAS,EAC9B,aAAa,EAAE,OAAO,EACtB,cAAc,EAAE,QAAQ,EACxB,UAAU,EAAE,YAAY,GACzB,GAAG,UAAU,CAAC,aAAa,EAAE;QAC5B,gBAAgB;QAChB,0BAA0B;QAC1B,qBAAqB;QACrB,iBAAiB;QACjB,wBAAwB;QACxB,wBAAwB;QACxB,wBAAwB;QACxB,0BAA0B;QAC1B,cAAc;QACd,qBAAqB;QACrB,eAAe;QACf,gBAAgB;QAChB,YAAY;KACb,CAAC,CAAC;IACH,UAAU,CAAC,SAAS,EAAE,MAAM,CAAC,MAAM,CAAC,qCAAyB,CAAC,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;IAEpF,kBAAkB;IAClB,MAAM,MAAM,GAAG,MAAM,CAAC,cAAc,EAAE,gBAAgB,EAAE,EAAE,QAAQ,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAE7E,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,WAAW,EAAE,OAAO,CAAC,EAAE,EAAE;QACjE,MAAM,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC;QAEjC,wEAAwE;QACxE,MAAM,uBAAuB,GAAG,qCAAyB,CAAC,WAAW,CAAC,CAAC;QAEvE,oCAAoC;QACpC,+DAA+D;QAC/D,MAAM,QAAQ,GAAG,MAAM,CAAC,GAAG,WAAW,OAAO,EAAE,GAAG,EAAE,CAAC,SAAS,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC;QAEhF,iGAAiG;QACjG,MAAM,YAAY,GAAG,MAAM,CAAC,GAAG,WAAW,WAAW,EAAE,GAAG,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE;YACrF,IAAI,EAAE,MAAM;SACb,CAAC,CAAC;QAEH,MAAM,WAAW,GAAG;YAClB,wCAAwC,UAAU,EAAE;YACpD,eAAe,MAAM,MAAM,MAAM,EAAE;YACnC,iBAAiB,SAAS,gBAAgB,SAAS,YAAY;SAChE,CAAC;QAEF,+BAA+B;QAC/B,4HAA4H;QAC5H,MAAM,MAAM,GAAG,MAAM,CACnB,GAAG,WAAW,KAAK,EACnB,GAAG,OAAO,IAAI,uBAAuB,KAAK,QAAQ,uBAAuB,EACzE,EAAE,QAAQ,EAAE,WAAW,EAAE,CAC1B,CAAC;QAEF,gCAAgC;QAChC,0FAA0F;QAC1F,MAAM,UAAU,GAAG,MAAM,CACvB,GAAG,WAAW,SAAS,EACvB,eAAe,QAAQ,KAAK,WAAW,MAAM,MAAM,sCAAsC,YAAY,KAAK,WAAW,MAAM,MAAM,2CAA2C,WAAW,MAAM,EAC7L,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,WAAW,EAAE,CACxC,CAAC;QAEF,2DAA2D;QAC3D,iFAAiF;QACjF,oCAAoC;QACpC,MAAM,YAAY,GAAG,MAAM,CACzB,GAAG,WAAW,eAAe,EAC7B,GAAG,cAAc,IAAI,MAAM,uBAAuB,EAClD,EAAE,QAAQ,EAAE,WAAW,EAAE,CAC1B,CAAC;QAEF,+CAA+C;QAC/C,iFAAiF;QACjF,oCAAoC;QACpC,MAAM,WAAW,GAAG,MAAM,CACxB,GAAG,WAAW,UAAU,EACxB,GAAG,QAAQ,IAAI,MAAM,uBAAuB,EAC5C,EAAE,QAAQ,EAAE,WAAW,EAAE,CAC1B,CAAC;QAEF,wBAAwB;QACxB,oEAAoE;QACpE,MAAM,QAAQ,GAAG,MAAM,CACrB,GAAG,WAAW,OAAO,EACrB,GAAG,SAAS,IAAI,MAAM,yBAAyB,YAAY,uBAAuB,EAClF;YACE,QAAQ,EAAE,WAAW;SACtB,CACF,CAAC;QAEF,+GAA+G;QAC/G,MAAM,iBAAiB,GAAG,GAAG,YAAY,IAAI,MAAM,yBAAyB,YAAY,uBAAuB,CAAC;QAChH,MAAM,eAAe,GAAG,MAAM,CAAC,GAAG,WAAW,cAAc,EAAE,iBAAiB,EAAE;YAC9E,QAAQ,EAAE,WAAW;SACtB,CAAC,CAAC;QAEH,+GAA+G;QAC/G,MAAM,eAAe,GAAG,MAAM,CAC5B,GAAG,WAAW,cAAc,EAC5B,GAAG,YAAY,IAAI,MAAM,yBAAyB,YAAY,uBAAuB,EACrF,EAAE,QAAQ,EAAE,WAAW,EAAE,CAC1B,CAAC;QAEF,0GAA0G;QAC1G,MAAM,eAAe,GAAG,MAAM,CAC5B,GAAG,WAAW,cAAc,EAC5B,GAAG,YAAY,IAAI,MAAM,yBAAyB,YAAY,uBAAuB,EACrF,EAAE,QAAQ,EAAE,WAAW,EAAE,CAC1B,CAAC;QAEF,0GAA0G;QAC1G,MAAM,iBAAiB,GAAG,MAAM,CAC9B,GAAG,WAAW,gBAAgB,EAC9B,GAAG,cAAc,IAAI,MAAM,yBAAyB,YAAY,uBAAuB,EACvF,EAAE,QAAQ,EAAE,WAAW,EAAE,CAC1B,CAAC;QAEF,0BAA0B;QAE1B,kEAAkE;QAClE,mHAAmH;QACnH,kHAAkH;QAClH,MAAM,kBAAkB,GAAG,MAAM,CAAC,GAAG,WAAW,iBAAiB,EAAE,iBAAiB,EAAE;YACpF,QAAQ,EAAE,WAAW;SACtB,CAAC,CAAC;QACH,MAAM,iBAAiB,GAAG,MAAM,CAAC,GAAG,WAAW,gBAAgB,EAAE,iBAAiB,EAAE;YAClF,QAAQ,EAAE,WAAW;SACtB,CAAC,CAAC;QAEH,qKAAqK;QACrK,MAAM,UAAU,GAAG,MAAM,CACvB,GAAG,WAAW,SAAS,EACvB,WAAW,WAAW,4BAA4B,MAAM,SAAS,EACjE,EAAE,IAAI,EAAE,UAAU,EAAE,QAAQ,EAAE,WAAW,EAAE,CAC5C,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,MAAM,YAAY,GAAG,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,CACjD,CAAC,GAAG,EAAE,CAAC,GAAG,EAAE,UAAU,CAAC,EAAE,EAAE;QACzB,MAAM,OAAO,GAAG,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC;QACzC,MAAM,gBAAgB,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC/D,MAAM,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC;QAChE,GAAG,CAAC,GAAG,CAAC,GAAG,UAAU,OAAO,KAAK,gBAAgB,YAAY,GAAG,GAAG,CAAC;QACpE,OAAO,GAAG,CAAC;IACb,CAAC,EACD,EAAE,CACH,CAAC;IAEF,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,CAAC;AAChD,CAAC,CAAC;AAEF;;;;;GAKG;AACI,MAAM,aAAa,GAAG,CAAO,OAAe,EAAE,YAAwC,EAAE,EAAE;IAC/F,MAAM,MAAM,GACV,CAAC,YAAY,IAAI,OAAO,YAAY,KAAK,QAAQ;QAC/C,CAAC,CAAC,MAAM,IAAA,mBAAU,EAAC,YAAY,CAAC;QAChC,CAAC,CAAC,YAAY,CAAC;IACnB,IAAA,uBAAc,EAAC,MAAM,CAAC,CAAC;IAEvB,uEAAuE;IACvE,MAAM,EAAE,OAAO,EAAE,WAAW,EAAE,GAAG,MAAM,oBAAoB,CAAC,MAAO,CAAC,MAAM,CAAC,CAAC;IAC5E,MAAM,WAAW,GACf,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAEzF,MAAM,MAAM,GAAG,cAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;IACrC,MAAM,kBAAG,CAAC,KAAK,CAAC,MAAM,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC7C,MAAM,kBAAG,CAAC,SAAS,CAAC,OAAO,EAAE,WAAW,EAAE,OAAO,CAAC,CAAC;IAEnD,OAAO,CAAC,GAAG,CAAC,4BAA4B,OAAO,EAAE,CAAC,CAAC;AACrD,CAAC,CAAA,CAAC;AAjBW,QAAA,aAAa,iBAiBxB","sourcesContent":["/* eslint-disable @typescript-eslint/no-unused-vars */\nimport fsp from 'fs/promises';\nimport path from 'path';\n\nimport type { CrawleeOneConfig, CrawleeOneConfigSchema } from '../../types/config';\nimport { crawlingContextNameByType } from '../../constants';\nimport { loadConfig, validateConfig } from './config';\n// NOTE: We intentionally import these to know when their names change\nimport type { AllActorInputs } from '../../lib/input';\nimport type { CrawleeOneActorInst, CrawleeOneActorRouterCtx } from '../../lib/actor/types';\nimport type {\n CrawleeOneRoute,\n CrawleeOneRouteHandler,\n CrawleeOneRouteMatcher,\n CrawleeOneRouteMatcherFn,\n CrawleeOneRouteWrapper,\n} from '../../lib/router/types';\nimport type { MaybePromise } from '../../utils/types';\nimport type { CrawleeOneArgs, crawleeOne } from '../../api';\n\nconst makeUnion = (items: string[]) => items.map((s) => `\"${s}\"`).join(` | `);\nconst makeEnum = (items: string[]) =>\n '{\\n' + items.map((s) => ` '${s}' = '${s}'`).join(`,\\n`) + '\\n}';\n\nconst formatters = {\n type: (name: string, value: string, typeArgs?: string[]) => {\n const typeArgsStr = typeArgs?.length ? `<${typeArgs.join(', ')}>` : '';\n return `export type ${name}${typeArgsStr} = ${value};`;\n },\n typeFunc: (name: string, value: string, typeArgs?: string[]) => {\n const typeArgsStr = typeArgs?.length ? `<${typeArgs.join(', ')}>` : '';\n return `export type ${name} = ${typeArgsStr}${value};`;\n },\n func: (name: string, value: string, typeArgs?: string[]) => {\n const typeArgsStr = typeArgs?.length ? `<${typeArgs.join(', ')}>` : '';\n return `export const ${name} = ${typeArgsStr}${value};`;\n },\n // enum `CrawlerName`LabelEnum { \"detailPage\" = \"detailPage\", \"otherLabel\" = \"otherLabel\", ... };\n enum: (name: string, value: string, typeArgs?: string[]) => {\n return `export enum ${name} ${value}`;\n },\n} satisfies Record<string, (name: string, value: string, args?: string[]) => string>;\n\nconst parseTypesFromSchema = (schema: CrawleeOneConfigSchema) => {\n /** Remember which types we've already generated */\n const definitions: Record<string, string> = {};\n\n /** Remember what values need to be imported and from where */\n const imports: Record<string, Set<{ name: string; typeOnly?: boolean }>> = {};\n\n const addImports = <T extends string>(\n pkg: string,\n newEntries: T[],\n options?: { typeOnly?: boolean }\n ) => {\n const { typeOnly } = options ?? {};\n const entries = (imports[pkg] = imports[pkg] || new Set());\n newEntries.forEach((name) => entries.add({ name, typeOnly }));\n // Return the entries as variables, so we can define them in a single\n // place but still reference them in code.\n return newEntries.reduce<{ [Key in T]: Key }>((agg, key) => {\n agg[key] = key;\n return agg;\n }, {} as any);\n };\n\n const define = (\n key: string,\n value: string | (() => string),\n options?: { typeArgs?: string[]; kind?: keyof typeof formatters }\n ) => {\n const kind = options?.kind ?? 'type';\n const typeArgs = options?.typeArgs ?? [];\n if (!definitions[key]) {\n const resolvedVal = typeof value === 'function' ? value() : value;\n const formatter = formatters[kind];\n const valFormatted = formatter(key, resolvedVal, typeArgs);\n definitions[key] = valFormatted;\n }\n // Return the key as variable, so we can reference it\n return key;\n };\n\n // 1. Define imports\n const {\n AllActorInputs: actorInput,\n CrawleeOneActorRouterCtx: actorRouterCtx,\n CrawleeOneActorInst: actorCtx,\n CrawleeOneRoute: routeType,\n CrawleeOneRouteHandler: routeHandler,\n CrawleeOneRouteWrapper: routeWrapper,\n CrawleeOneRouteMatcher: routeMatcher,\n CrawleeOneRouteMatcherFn: routeMatcherFn,\n CrawleeOneIO: ioType,\n CrawleeOneTelemetry: telemType,\n CrawleeOneCtx: ctxType,\n CrawleeOneArgs: argsType,\n crawleeOne: crawleeOneFn,\n } = addImports('crawlee-one', [\n 'AllActorInputs',\n 'CrawleeOneActorRouterCtx',\n 'CrawleeOneActorInst',\n 'CrawleeOneRoute',\n 'CrawleeOneRouteHandler',\n 'CrawleeOneRouteWrapper',\n 'CrawleeOneRouteMatcher',\n 'CrawleeOneRouteMatcherFn',\n 'CrawleeOneIO',\n 'CrawleeOneTelemetry',\n 'CrawleeOneCtx',\n 'CrawleeOneArgs',\n 'crawleeOne',\n ]);\n addImports('crawlee', Object.values(crawlingContextNameByType), { typeOnly: true });\n\n // 2. Define utils\n const maybeP = define('MaybePromise', 'T | Promise<T>', { typeArgs: ['T'] });\n\n Object.entries(schema.crawlers).forEach(([crawlerName, crawler]) => {\n const crawlerType = crawler.type;\n\n // 2. Get `CrawlingContext`, e.g. 'cheerio' => `CheerioCrawlingContext`;\n const crawlingContextTypeName = crawlingContextNameByType[crawlerType];\n\n // 3. Generate type for route labels\n // type `CrawlerName`Label = \"detailPage\" | \"otherLabel\" | ...;\n const labelKey = define(`${crawlerName}Label`, () => makeUnion(crawler.routes));\n\n // enum `CrawlerName`LabelEnum { \"detailPage\" = \"detailPage\", \"otherLabel\" = \"otherLabel\", ... };\n const labelEnumKey = define(`${crawlerName}LabelEnum`, () => makeEnum(crawler.routes), {\n kind: 'enum',\n });\n\n const ctxTypeArgs = [\n `TInput extends Record<string, any> = ${actorInput}`,\n `TIO extends ${ioType} = ${ioType}`,\n `Telem extends ${telemType}<any, any> = ${telemType}<any, any>`,\n ];\n\n // 4. Create CrawleeOne context\n // type `CrawlerName`Ctx = <TIO, Telem>CrawleeOneCtx<CheerioCrawlingContext, `CrawlerName`Label, AllActorInputs, TIO, Telem>\n const ctxKey = define(\n `${crawlerName}Ctx`,\n `${ctxType}<${crawlingContextTypeName}, ${labelKey}, TInput, TIO, Telem>`,\n { typeArgs: ctxTypeArgs }\n );\n\n // 5. Create CrawleeOne instance\n // const customCrawler = <TIO, Telem>(args: CrawleeOneArgs<TType, T>) => crawleeOne(args);\n const crawlerKey = define(\n `${crawlerName}Crawler`,\n `(args: Omit<${argsType}<\"${crawlerType}\", ${ctxKey}<TInput, TIO, Telem>>, 'type'>) => ${crawleeOneFn}<\"${crawlerType}\", ${ctxKey}<TInput, TIO, Telem>>({ ...args, type: \"${crawlerType}\"});`,\n { kind: 'func', typeArgs: ctxTypeArgs }\n );\n\n // 6. Get actor router context (`CrawleeOneActorRouterCtx`)\n // NOTE: We use `AllActorInput` for the Actor input, because this type definition\n // will be used by developers.\n const routerCtxKey = define(\n `${crawlerName}RouterContext`,\n `${actorRouterCtx}<${ctxKey}<TInput, TIO, Telem>>`,\n { typeArgs: ctxTypeArgs }\n );\n\n // 7. Get actor context (`CrawleeOneActorInst`)\n // NOTE: We use `AllActorInput` for the Actor input, because this type definition\n // will be used by developers.\n const actorCtxKey = define(\n `${crawlerName}ActorCtx`,\n `${actorCtx}<${ctxKey}<TInput, TIO, Telem>>`,\n { typeArgs: ctxTypeArgs }\n );\n\n // 8. Create Route types\n // E.g. `type `crawlerName`Route = CrawleeOneRout<`CrawlerName`Ctx>`\n const routeKey = define(\n `${crawlerName}Route`,\n `${routeType}<${ctxKey}<TInput, TIO, Telem>, ${routerCtxKey}<TInput, TIO, Telem>>`,\n {\n typeArgs: ctxTypeArgs,\n }\n );\n\n // E.g. `type `crawlerName`RouteHandler = CrawleeOneRouteHandler<`CrawlerName`Ctx, CrawlerName`ActorRouterCtx>`\n const routeHandlerValue = `${routeHandler}<${ctxKey}<TInput, TIO, Telem>, ${routerCtxKey}<TInput, TIO, Telem>>`;\n const routeHandlerKey = define(`${crawlerName}RouteHandler`, routeHandlerValue, {\n typeArgs: ctxTypeArgs,\n });\n\n // E.g. `type `crawlerName`RouteWrapper = CrawleeOneRouteWrapper<`CrawlerName`Ctx, CrawlerName`ActorRouterCtx>`\n const routeWrapperKey = define(\n `${crawlerName}RouteWrapper`,\n `${routeWrapper}<${ctxKey}<TInput, TIO, Telem>, ${routerCtxKey}<TInput, TIO, Telem>>`,\n { typeArgs: ctxTypeArgs }\n );\n\n // E.g. `type `crawlerName`Matcher = CrawleeOneRouteMatcher<`CrawlerName`Ctx, CrawlerName`ActorRouterCtx>`\n const routeMatcherKey = define(\n `${crawlerName}RouteMatcher`,\n `${routeMatcher}<${ctxKey}<TInput, TIO, Telem>, ${routerCtxKey}<TInput, TIO, Telem>>`,\n { typeArgs: ctxTypeArgs }\n );\n\n // E.g. `type `crawlerName`Matcher = CrawleeOneRouteMatcher<`CrawlerName`Ctx, CrawlerName`ActorRouterCtx>`\n const routeMatcherFnKey = define(\n `${crawlerName}RouteMatcherFn`,\n `${routeMatcherFn}<${ctxKey}<TInput, TIO, Telem>, ${routerCtxKey}<TInput, TIO, Telem>>`,\n { typeArgs: ctxTypeArgs }\n );\n\n // 9. Create Crawler hooks\n\n // NOTE: Type for before/after handler is the same as for handlers\n // E.g. `type `CrawlerName`OnBeforeHandler = CrawleeOneRouteHandler<CheerioCrawlingContext, ProfesiaRouterContext>`\n // E.g. `type `CrawlerName`OnAfterHandler = CrawleeOneRouteHandler<CheerioCrawlingContext, ProfesiaRouterContext>`\n const onBeforeHandlerKey = define(`${crawlerName}OnBeforeHandler`, routeHandlerValue, {\n typeArgs: ctxTypeArgs,\n });\n const onAfterHandlerKey = define(`${crawlerName}OnAfterHandler`, routeHandlerValue, {\n typeArgs: ctxTypeArgs,\n });\n\n // type `CrawlerName`OnReady = <TIO, Telem>(actor: CrawleeOneActorInst<`CrawlerName`Label, AllActorInputs, TIO, Telem, `type`CrawlingContext>) => MaybePromise<void>;\n const onReadyKey = define(\n `${crawlerName}OnReady`,\n `(actor: ${actorCtxKey}<TInput, TIO, Telem>) => ${maybeP}<void>;`,\n { kind: 'typeFunc', typeArgs: ctxTypeArgs }\n );\n });\n\n const finalImports = Object.entries(imports).reduce<Record<string, string>>(\n (agg, [pkg, entriesSet]) => {\n const entries = [...entriesSet.values()];\n const formattedEntries = entries.map((e) => e.name).join(', ');\n const typeStr = entries.every((e) => e.typeOnly) ? 'type ' : '';\n agg[pkg] = `import ${typeStr}{ ${formattedEntries} } from \"${pkg}\"`;\n return agg;\n },\n {}\n );\n\n return { imports: finalImports, definitions };\n};\n\n/**\n * Generate types for CrawleeOne given a config.\n *\n * Config can be passed directly, or as the path to the config file.\n * If the config is omitted, it is automatically searched for using CosmicConfig.\n */\nexport const generateTypes = async (outfile: string, configOrPath?: CrawleeOneConfig | string) => {\n const config =\n !configOrPath || typeof configOrPath === 'string'\n ? await loadConfig(configOrPath)\n : configOrPath;\n validateConfig(config);\n\n /* eslint-disable-next-line @typescript-eslint/no-non-null-assertion */\n const { imports, definitions } = await parseTypesFromSchema(config!.schema);\n const fileContent =\n Object.values(imports).join('\\n') + '\\n\\n\\n' + Object.values(definitions).join('\\n\\n');\n\n const outdir = path.dirname(outfile);\n await fsp.mkdir(outdir, { recursive: true });\n await fsp.writeFile(outfile, fileContent, 'utf-8');\n\n console.log(`Done generating types to ${outfile}`);\n};\n"]}
1
+ {"version":3,"file":"codegen.js","sourceRoot":"","sources":["../../../../src/cli/commands/codegen.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;AAAA,sDAAsD;AACtD,2DAA8B;AAC9B,gDAAwB;AAGxB,+CAA4D;AAC5D,qCAAsD;AActD,MAAM,SAAS,GAAG,CAAC,KAAe,EAAE,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;AAC9E,MAAM,QAAQ,GAAG,CAAC,KAAe,EAAE,EAAE,CACnC,KAAK,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC;AAEpE,MAAM,UAAU,GAAG;IACjB,IAAI,EAAE,CAAC,IAAY,EAAE,KAAa,EAAE,QAAmB,EAAE,EAAE;QACzD,MAAM,WAAW,GAAG,CAAA,QAAQ,aAAR,QAAQ,uBAAR,QAAQ,CAAE,MAAM,EAAC,CAAC,CAAC,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QACvE,OAAO,eAAe,IAAI,GAAG,WAAW,MAAM,KAAK,GAAG,CAAC;IACzD,CAAC;IACD,QAAQ,EAAE,CAAC,IAAY,EAAE,KAAa,EAAE,QAAmB,EAAE,EAAE;QAC7D,MAAM,WAAW,GAAG,CAAA,QAAQ,aAAR,QAAQ,uBAAR,QAAQ,CAAE,MAAM,EAAC,CAAC,CAAC,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QACvE,OAAO,eAAe,IAAI,MAAM,WAAW,GAAG,KAAK,GAAG,CAAC;IACzD,CAAC;IACD,IAAI,EAAE,CAAC,IAAY,EAAE,KAAa,EAAE,QAAmB,EAAE,EAAE;QACzD,MAAM,WAAW,GAAG,CAAA,QAAQ,aAAR,QAAQ,uBAAR,QAAQ,CAAE,MAAM,EAAC,CAAC,CAAC,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QACvE,OAAO,gBAAgB,IAAI,MAAM,WAAW,GAAG,KAAK,GAAG,CAAC;IAC1D,CAAC;IACD,iGAAiG;IACjG,IAAI,EAAE,CAAC,IAAY,EAAE,KAAa,EAAE,QAAmB,EAAE,EAAE;QACzD,OAAO,eAAe,IAAI,IAAI,KAAK,EAAE,CAAC;IACxC,CAAC;CACiF,CAAC;AAErF,MAAM,oBAAoB,GAAG,CAAC,MAA8B,EAAE,EAAE;IAC9D,mDAAmD;IACnD,MAAM,WAAW,GAA2B,EAAE,CAAC;IAE/C,8DAA8D;IAC9D,MAAM,OAAO,GAA8D,EAAE,CAAC;IAE9E,MAAM,UAAU,GAAG,CACjB,GAAW,EACX,UAAe,EACf,OAAgC,EAChC,EAAE;QACF,MAAM,EAAE,QAAQ,EAAE,GAAG,OAAO,aAAP,OAAO,cAAP,OAAO,GAAI,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,OAAO,CAAC,GAAG,CAAC,IAAI,IAAI,GAAG,EAAE,CAAC,CAAC;QAC3D,UAAU,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAC,CAAC;QAC9D,qEAAqE;QACrE,0CAA0C;QAC1C,OAAO,UAAU,CAAC,MAAM,CAAsB,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE;YACzD,GAAG,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC;YACf,OAAO,GAAG,CAAC;QACb,CAAC,EAAE,EAAS,CAAC,CAAC;IAChB,CAAC,CAAC;IAEF,MAAM,MAAM,GAAG,CACb,GAAW,EACX,KAA8B,EAC9B,OAAiE,EACjE,EAAE;;QACF,MAAM,IAAI,GAAG,MAAA,OAAO,aAAP,OAAO,uBAAP,OAAO,CAAE,IAAI,mCAAI,MAAM,CAAC;QACrC,MAAM,QAAQ,GAAG,MAAA,OAAO,aAAP,OAAO,uBAAP,OAAO,CAAE,QAAQ,mCAAI,EAAE,CAAC;QACzC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,EAAE,CAAC;YACtB,MAAM,WAAW,GAAG,OAAO,KAAK,KAAK,UAAU,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;YAClE,MAAM,SAAS,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC;YACnC,MAAM,YAAY,GAAG,SAAS,CAAC,GAAG,EAAE,WAAW,EAAE,QAAQ,CAAC,CAAC;YAC3D,WAAW,CAAC,GAAG,CAAC,GAAG,YAAY,CAAC;QAClC,CAAC;QACD,qDAAqD;QACrD,OAAO,GAAG,CAAC;IACb,CAAC,CAAC;IAEF,oBAAoB;IACpB,MAAM,EACJ,cAAc,EAAE,UAAU,EAC1B,wBAAwB,EAAE,cAAc,EACxC,mBAAmB,EAAE,QAAQ,EAC7B,eAAe,EAAE,SAAS,EAC1B,sBAAsB,EAAE,YAAY,EACpC,sBAAsB,EAAE,YAAY,EACpC,sBAAsB,EAAE,YAAY,EACpC,wBAAwB,EAAE,cAAc,EACxC,YAAY,EAAE,MAAM,EACpB,mBAAmB,EAAE,SAAS,EAC9B,aAAa,EAAE,OAAO,EACtB,cAAc,EAAE,QAAQ,EACxB,UAAU,EAAE,YAAY,GACzB,GAAG,UAAU,CAAC,aAAa,EAAE;QAC5B,gBAAgB;QAChB,0BAA0B;QAC1B,qBAAqB;QACrB,iBAAiB;QACjB,wBAAwB;QACxB,wBAAwB;QACxB,wBAAwB;QACxB,0BAA0B;QAC1B,cAAc;QACd,qBAAqB;QACrB,eAAe;QACf,gBAAgB;QAChB,YAAY;KACb,CAAC,CAAC;IACH,UAAU,CAAC,SAAS,EAAE,MAAM,CAAC,MAAM,CAAC,qCAAyB,CAAC,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;IAEpF,kBAAkB;IAClB,MAAM,MAAM,GAAG,MAAM,CAAC,cAAc,EAAE,gBAAgB,EAAE,EAAE,QAAQ,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAE7E,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,WAAW,EAAE,OAAO,CAAC,EAAE,EAAE;QACjE,MAAM,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC;QAEjC,wEAAwE;QACxE,MAAM,uBAAuB,GAAG,qCAAyB,CAAC,WAAW,CAAC,CAAC;QAEvE,oCAAoC;QACpC,+DAA+D;QAC/D,MAAM,QAAQ,GAAG,MAAM,CAAC,GAAG,WAAW,OAAO,EAAE,GAAG,EAAE,CAAC,SAAS,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC;QAEhF,iGAAiG;QACjG,MAAM,YAAY,GAAG,MAAM,CAAC,GAAG,WAAW,WAAW,EAAE,GAAG,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE;YACrF,IAAI,EAAE,MAAM;SACb,CAAC,CAAC;QAEH,MAAM,WAAW,GAAG;YAClB,wCAAwC,UAAU,EAAE;YACpD,eAAe,MAAM,MAAM,MAAM,EAAE;YACnC,iBAAiB,SAAS,gBAAgB,SAAS,YAAY;SAChE,CAAC;QAEF,+BAA+B;QAC/B,4HAA4H;QAC5H,MAAM,MAAM,GAAG,MAAM,CACnB,GAAG,WAAW,KAAK,EACnB,GAAG,OAAO,IAAI,uBAAuB,KAAK,QAAQ,uBAAuB,EACzE,EAAE,QAAQ,EAAE,WAAW,EAAE,CAC1B,CAAC;QAEF,gCAAgC;QAChC,0FAA0F;QAC1F,MAAM,UAAU,GAAG,MAAM,CACvB,GAAG,WAAW,SAAS,EACvB,eAAe,QAAQ,KAAK,WAAW,MAAM,MAAM,sCAAsC,YAAY,KAAK,WAAW,MAAM,MAAM,2CAA2C,WAAW,MAAM,EAC7L,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,WAAW,EAAE,CACxC,CAAC;QAEF,2DAA2D;QAC3D,iFAAiF;QACjF,oCAAoC;QACpC,MAAM,YAAY,GAAG,MAAM,CACzB,GAAG,WAAW,eAAe,EAC7B,GAAG,cAAc,IAAI,MAAM,uBAAuB,EAClD,EAAE,QAAQ,EAAE,WAAW,EAAE,CAC1B,CAAC;QAEF,+CAA+C;QAC/C,iFAAiF;QACjF,oCAAoC;QACpC,MAAM,WAAW,GAAG,MAAM,CACxB,GAAG,WAAW,UAAU,EACxB,GAAG,QAAQ,IAAI,MAAM,uBAAuB,EAC5C,EAAE,QAAQ,EAAE,WAAW,EAAE,CAC1B,CAAC;QAEF,wBAAwB;QACxB,oEAAoE;QACpE,MAAM,QAAQ,GAAG,MAAM,CACrB,GAAG,WAAW,OAAO,EACrB,GAAG,SAAS,IAAI,MAAM,yBAAyB,YAAY,uBAAuB,EAClF;YACE,QAAQ,EAAE,WAAW;SACtB,CACF,CAAC;QAEF,+GAA+G;QAC/G,MAAM,iBAAiB,GAAG,GAAG,YAAY,IAAI,MAAM,yBAAyB,YAAY,uBAAuB,CAAC;QAChH,MAAM,eAAe,GAAG,MAAM,CAAC,GAAG,WAAW,cAAc,EAAE,iBAAiB,EAAE;YAC9E,QAAQ,EAAE,WAAW;SACtB,CAAC,CAAC;QAEH,+GAA+G;QAC/G,MAAM,eAAe,GAAG,MAAM,CAC5B,GAAG,WAAW,cAAc,EAC5B,GAAG,YAAY,IAAI,MAAM,yBAAyB,YAAY,uBAAuB,EACrF,EAAE,QAAQ,EAAE,WAAW,EAAE,CAC1B,CAAC;QAEF,0GAA0G;QAC1G,MAAM,eAAe,GAAG,MAAM,CAC5B,GAAG,WAAW,cAAc,EAC5B,GAAG,YAAY,IAAI,MAAM,yBAAyB,YAAY,uBAAuB,EACrF,EAAE,QAAQ,EAAE,WAAW,EAAE,CAC1B,CAAC;QAEF,0GAA0G;QAC1G,MAAM,iBAAiB,GAAG,MAAM,CAC9B,GAAG,WAAW,gBAAgB,EAC9B,GAAG,cAAc,IAAI,MAAM,yBAAyB,YAAY,uBAAuB,EACvF,EAAE,QAAQ,EAAE,WAAW,EAAE,CAC1B,CAAC;QAEF,0BAA0B;QAE1B,kEAAkE;QAClE,mHAAmH;QACnH,kHAAkH;QAClH,MAAM,kBAAkB,GAAG,MAAM,CAAC,GAAG,WAAW,iBAAiB,EAAE,iBAAiB,EAAE;YACpF,QAAQ,EAAE,WAAW;SACtB,CAAC,CAAC;QACH,MAAM,iBAAiB,GAAG,MAAM,CAAC,GAAG,WAAW,gBAAgB,EAAE,iBAAiB,EAAE;YAClF,QAAQ,EAAE,WAAW;SACtB,CAAC,CAAC;QAEH,qKAAqK;QACrK,MAAM,UAAU,GAAG,MAAM,CACvB,GAAG,WAAW,SAAS,EACvB,WAAW,WAAW,4BAA4B,MAAM,SAAS,EACjE,EAAE,IAAI,EAAE,UAAU,EAAE,QAAQ,EAAE,WAAW,EAAE,CAC5C,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,MAAM,YAAY,GAAG,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,CACjD,CAAC,GAAG,EAAE,CAAC,GAAG,EAAE,UAAU,CAAC,EAAE,EAAE;QACzB,MAAM,OAAO,GAAG,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC;QACzC,MAAM,gBAAgB,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC/D,MAAM,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC;QAChE,GAAG,CAAC,GAAG,CAAC,GAAG,UAAU,OAAO,KAAK,gBAAgB,YAAY,GAAG,GAAG,CAAC;QACpE,OAAO,GAAG,CAAC;IACb,CAAC,EACD,EAAE,CACH,CAAC;IAEF,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,CAAC;AAChD,CAAC,CAAC;AAEF;;;;;GAKG;AACI,MAAM,aAAa,GAAG,CAAO,OAAe,EAAE,YAAwC,EAAE,EAAE;IAC/F,MAAM,MAAM,GACV,CAAC,YAAY,IAAI,OAAO,YAAY,KAAK,QAAQ;QAC/C,CAAC,CAAC,MAAM,IAAA,mBAAU,EAAC,YAAY,CAAC;QAChC,CAAC,CAAC,YAAY,CAAC;IACnB,IAAA,uBAAc,EAAC,MAAM,CAAC,CAAC;IAEvB,uEAAuE;IACvE,MAAM,EAAE,OAAO,EAAE,WAAW,EAAE,GAAG,MAAM,oBAAoB,CAAC,MAAO,CAAC,MAAM,CAAC,CAAC;IAC5E,MAAM,WAAW,GACf,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAEzF,MAAM,MAAM,GAAG,cAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;IACrC,MAAM,kBAAG,CAAC,KAAK,CAAC,MAAM,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC7C,MAAM,kBAAG,CAAC,SAAS,CAAC,OAAO,EAAE,WAAW,EAAE,OAAO,CAAC,CAAC;IAEnD,OAAO,CAAC,GAAG,CAAC,4BAA4B,OAAO,EAAE,CAAC,CAAC;AACrD,CAAC,CAAA,CAAC;AAjBW,QAAA,aAAa,iBAiBxB","sourcesContent":["/* eslint-disable @typescript-eslint/no-unused-vars */\nimport fsp from 'fs/promises';\nimport path from 'path';\n\nimport type { CrawleeOneConfig, CrawleeOneConfigSchema } from '../../types/config';\nimport { crawlingContextNameByType } from '../../constants';\nimport { loadConfig, validateConfig } from './config';\n// NOTE: We intentionally import these to know when their names change\nimport type { AllActorInputs } from '../../lib/input';\nimport type { CrawleeOneActorInst, CrawleeOneActorRouterCtx } from '../../lib/actor/types';\nimport type {\n CrawleeOneRoute,\n CrawleeOneRouteHandler,\n CrawleeOneRouteMatcher,\n CrawleeOneRouteMatcherFn,\n CrawleeOneRouteWrapper,\n} from '../../lib/router/types';\nimport type { MaybePromise } from '../../utils/types';\nimport type { CrawleeOneArgs, crawleeOne } from '../../api';\n\nconst makeUnion = (items: string[]) => items.map((s) => `\"${s}\"`).join(` | `);\nconst makeEnum = (items: string[]) =>\n '{\\n' + items.map((s) => ` '${s}' = '${s}'`).join(`,\\n`) + '\\n}';\n\nconst formatters = {\n type: (name: string, value: string, typeArgs?: string[]) => {\n const typeArgsStr = typeArgs?.length ? `<${typeArgs.join(', ')}>` : '';\n return `export type ${name}${typeArgsStr} = ${value};`;\n },\n typeFunc: (name: string, value: string, typeArgs?: string[]) => {\n const typeArgsStr = typeArgs?.length ? `<${typeArgs.join(', ')}>` : '';\n return `export type ${name} = ${typeArgsStr}${value};`;\n },\n func: (name: string, value: string, typeArgs?: string[]) => {\n const typeArgsStr = typeArgs?.length ? `<${typeArgs.join(', ')}>` : '';\n return `export const ${name} = ${typeArgsStr}${value};`;\n },\n // enum `CrawlerName`LabelEnum { \"detailPage\" = \"detailPage\", \"otherLabel\" = \"otherLabel\", ... };\n enum: (name: string, value: string, typeArgs?: string[]) => {\n return `export enum ${name} ${value}`;\n },\n} satisfies Record<string, (name: string, value: string, args?: string[]) => string>;\n\nconst parseTypesFromSchema = (schema: CrawleeOneConfigSchema) => {\n /** Remember which types we've already generated */\n const definitions: Record<string, string> = {};\n\n /** Remember what values need to be imported and from where */\n const imports: Record<string, Set<{ name: string; typeOnly?: boolean }>> = {};\n\n const addImports = <T extends string>(\n pkg: string,\n newEntries: T[],\n options?: { typeOnly?: boolean }\n ) => {\n const { typeOnly } = options ?? {};\n const entries = (imports[pkg] = imports[pkg] || new Set());\n newEntries.forEach((name) => entries.add({ name, typeOnly }));\n // Return the entries as variables, so we can define them in a single\n // place but still reference them in code.\n return newEntries.reduce<{ [Key in T]: Key }>((agg, key) => {\n agg[key] = key;\n return agg;\n }, {} as any);\n };\n\n const define = (\n key: string,\n value: string | (() => string),\n options?: { typeArgs?: string[]; kind?: keyof typeof formatters }\n ) => {\n const kind = options?.kind ?? 'type';\n const typeArgs = options?.typeArgs ?? [];\n if (!definitions[key]) {\n const resolvedVal = typeof value === 'function' ? value() : value;\n const formatter = formatters[kind];\n const valFormatted = formatter(key, resolvedVal, typeArgs);\n definitions[key] = valFormatted;\n }\n // Return the key as variable, so we can reference it\n return key;\n };\n\n // 1. Define imports\n const {\n AllActorInputs: actorInput,\n CrawleeOneActorRouterCtx: actorRouterCtx,\n CrawleeOneActorInst: actorCtx,\n CrawleeOneRoute: routeType,\n CrawleeOneRouteHandler: routeHandler,\n CrawleeOneRouteWrapper: routeWrapper,\n CrawleeOneRouteMatcher: routeMatcher,\n CrawleeOneRouteMatcherFn: routeMatcherFn,\n CrawleeOneIO: ioType,\n CrawleeOneTelemetry: telemType,\n CrawleeOneCtx: ctxType,\n CrawleeOneArgs: argsType,\n crawleeOne: crawleeOneFn,\n } = addImports('crawlee-one', [\n 'AllActorInputs',\n 'CrawleeOneActorRouterCtx',\n 'CrawleeOneActorInst',\n 'CrawleeOneRoute',\n 'CrawleeOneRouteHandler',\n 'CrawleeOneRouteWrapper',\n 'CrawleeOneRouteMatcher',\n 'CrawleeOneRouteMatcherFn',\n 'CrawleeOneIO',\n 'CrawleeOneTelemetry',\n 'CrawleeOneCtx',\n 'CrawleeOneArgs',\n 'crawleeOne',\n ]);\n addImports('crawlee', Object.values(crawlingContextNameByType), { typeOnly: true });\n\n // 2. Define utils\n const maybeP = define('MaybePromise', 'T | Promise<T>', { typeArgs: ['T'] });\n\n Object.entries(schema.crawlers).forEach(([crawlerName, crawler]) => {\n const crawlerType = crawler.type;\n\n // 2. Get `CrawlingContext`, e.g. 'cheerio' => `CheerioCrawlingContext`;\n const crawlingContextTypeName = crawlingContextNameByType[crawlerType];\n\n // 3. Generate type for route labels\n // type `CrawlerName`Label = \"detailPage\" | \"otherLabel\" | ...;\n const labelKey = define(`${crawlerName}Label`, () => makeUnion(crawler.routes));\n\n // enum `CrawlerName`LabelEnum { \"detailPage\" = \"detailPage\", \"otherLabel\" = \"otherLabel\", ... };\n const labelEnumKey = define(`${crawlerName}LabelEnum`, () => makeEnum(crawler.routes), {\n kind: 'enum',\n });\n\n const ctxTypeArgs = [\n `TInput extends Record<string, any> = ${actorInput}`,\n `TIO extends ${ioType} = ${ioType}`,\n `Telem extends ${telemType}<any, any> = ${telemType}<any, any>`,\n ];\n\n // 4. Create CrawleeOne context\n // type `CrawlerName`Ctx = <TIO, Telem>CrawleeOneCtx<CheerioCrawlingContext, `CrawlerName`Label, AllActorInputs, TIO, Telem>\n const ctxKey = define(\n `${crawlerName}Ctx`,\n `${ctxType}<${crawlingContextTypeName}, ${labelKey}, TInput, TIO, Telem>`,\n { typeArgs: ctxTypeArgs }\n );\n\n // 5. Create CrawleeOne instance\n // const customCrawler = <TIO, Telem>(args: CrawleeOneArgs<TType, T>) => crawleeOne(args);\n const crawlerKey = define(\n `${crawlerName}Crawler`,\n `(args: Omit<${argsType}<\"${crawlerType}\", ${ctxKey}<TInput, TIO, Telem>>, 'type'>) => ${crawleeOneFn}<\"${crawlerType}\", ${ctxKey}<TInput, TIO, Telem>>({ ...args, type: \"${crawlerType}\"});`,\n { kind: 'func', typeArgs: ctxTypeArgs }\n );\n\n // 6. Get actor router context (`CrawleeOneActorRouterCtx`)\n // NOTE: We use `AllActorInput` for the Actor input, because this type definition\n // will be used by developers.\n const routerCtxKey = define(\n `${crawlerName}RouterContext`,\n `${actorRouterCtx}<${ctxKey}<TInput, TIO, Telem>>`,\n { typeArgs: ctxTypeArgs }\n );\n\n // 7. Get actor context (`CrawleeOneActorInst`)\n // NOTE: We use `AllActorInput` for the Actor input, because this type definition\n // will be used by developers.\n const actorCtxKey = define(\n `${crawlerName}ActorCtx`,\n `${actorCtx}<${ctxKey}<TInput, TIO, Telem>>`,\n { typeArgs: ctxTypeArgs }\n );\n\n // 8. Create Route types\n // E.g. `type `crawlerName`Route = CrawleeOneRout<`CrawlerName`Ctx>`\n const routeKey = define(\n `${crawlerName}Route`,\n `${routeType}<${ctxKey}<TInput, TIO, Telem>, ${routerCtxKey}<TInput, TIO, Telem>>`,\n {\n typeArgs: ctxTypeArgs,\n }\n );\n\n // E.g. `type `crawlerName`RouteHandler = CrawleeOneRouteHandler<`CrawlerName`Ctx, CrawlerName`ActorRouterCtx>`\n const routeHandlerValue = `${routeHandler}<${ctxKey}<TInput, TIO, Telem>, ${routerCtxKey}<TInput, TIO, Telem>>`;\n const routeHandlerKey = define(`${crawlerName}RouteHandler`, routeHandlerValue, {\n typeArgs: ctxTypeArgs,\n });\n\n // E.g. `type `crawlerName`RouteWrapper = CrawleeOneRouteWrapper<`CrawlerName`Ctx, CrawlerName`ActorRouterCtx>`\n const routeWrapperKey = define(\n `${crawlerName}RouteWrapper`,\n `${routeWrapper}<${ctxKey}<TInput, TIO, Telem>, ${routerCtxKey}<TInput, TIO, Telem>>`,\n { typeArgs: ctxTypeArgs }\n );\n\n // E.g. `type `crawlerName`Matcher = CrawleeOneRouteMatcher<`CrawlerName`Ctx, CrawlerName`ActorRouterCtx>`\n const routeMatcherKey = define(\n `${crawlerName}RouteMatcher`,\n `${routeMatcher}<${ctxKey}<TInput, TIO, Telem>, ${routerCtxKey}<TInput, TIO, Telem>>`,\n { typeArgs: ctxTypeArgs }\n );\n\n // E.g. `type `crawlerName`Matcher = CrawleeOneRouteMatcher<`CrawlerName`Ctx, CrawlerName`ActorRouterCtx>`\n const routeMatcherFnKey = define(\n `${crawlerName}RouteMatcherFn`,\n `${routeMatcherFn}<${ctxKey}<TInput, TIO, Telem>, ${routerCtxKey}<TInput, TIO, Telem>>`,\n { typeArgs: ctxTypeArgs }\n );\n\n // 9. Create Crawler hooks\n\n // NOTE: Type for before/after handler is the same as for handlers\n // E.g. `type `CrawlerName`OnBeforeHandler = CrawleeOneRouteHandler<CheerioCrawlingContext, ProfesiaRouterContext>`\n // E.g. `type `CrawlerName`OnAfterHandler = CrawleeOneRouteHandler<CheerioCrawlingContext, ProfesiaRouterContext>`\n const onBeforeHandlerKey = define(`${crawlerName}OnBeforeHandler`, routeHandlerValue, {\n typeArgs: ctxTypeArgs,\n });\n const onAfterHandlerKey = define(`${crawlerName}OnAfterHandler`, routeHandlerValue, {\n typeArgs: ctxTypeArgs,\n });\n\n // type `CrawlerName`OnReady = <TIO, Telem>(actor: CrawleeOneActorInst<`CrawlerName`Label, AllActorInputs, TIO, Telem, `type`CrawlingContext>) => MaybePromise<void>;\n const onReadyKey = define(\n `${crawlerName}OnReady`,\n `(actor: ${actorCtxKey}<TInput, TIO, Telem>) => ${maybeP}<void>;`,\n { kind: 'typeFunc', typeArgs: ctxTypeArgs }\n );\n });\n\n const finalImports = Object.entries(imports).reduce<Record<string, string>>(\n (agg, [pkg, entriesSet]) => {\n const entries = [...entriesSet.values()];\n const formattedEntries = entries.map((e) => e.name).join(', ');\n const typeStr = entries.every((e) => e.typeOnly) ? 'type ' : '';\n agg[pkg] = `import ${typeStr}{ ${formattedEntries} } from \"${pkg}\"`;\n return agg;\n },\n {}\n );\n\n return { imports: finalImports, definitions };\n};\n\n/**\n * Generate types for CrawleeOne given a config.\n *\n * Config can be passed directly, or as the path to the config file.\n * If the config is omitted, it is automatically searched for using CosmicConfig.\n */\nexport const generateTypes = async (outfile: string, configOrPath?: CrawleeOneConfig | string) => {\n const config =\n !configOrPath || typeof configOrPath === 'string'\n ? await loadConfig(configOrPath)\n : configOrPath;\n validateConfig(config);\n\n /* eslint-disable-next-line @typescript-eslint/no-non-null-assertion */\n const { imports, definitions } = await parseTypesFromSchema(config!.schema);\n const fileContent =\n Object.values(imports).join('\\n') + '\\n\\n\\n' + Object.values(definitions).join('\\n\\n');\n\n const outdir = path.dirname(outfile);\n await fsp.mkdir(outdir, { recursive: true });\n await fsp.writeFile(outfile, fileContent, 'utf-8');\n\n console.log(`Done generating types to ${outfile}`);\n};\n"]}
@@ -1 +1 @@
1
- {"version":3,"file":"scrapeListing.js","sourceRoot":"","sources":["../../../../src/lib/actions/scrapeListing.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,mCAAuC;AAEvC,6CAA+D;AAC/D,yCAA8C;AA8G9C;;;;GAIG;AACH,MAAM,mBAAmB,GAAG,CAA8B,EACxD,OAAO,EACP,OAAO,GAAG,EAAE,EACZ,iBAAiB,EACjB,cAAc,EACd,eAAe,EACf,GAAG,GACsC,EAA2B,EAAE;IACtE,IAAI,YAAY,GAAwB,OAAO,CAAC;IAEhD,MAAM,6BAA6B,GAAG,GAAS,EAAE;QAC/C,MAAM,aAAa,GAAG,MAAM,IAAA,sBAAc,EAAC,YAAY,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,YAAY,EAAE,CAAC,CAAC;QAC5F,OAAO,IAAA,sBAAa,EAAC,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;IAChD,CAAC,CAAA,CAAC;IAEF,MAAM,QAAQ,GAAG,GAAS,EAAE;QAC1B,MAAM,SAAS,GAAG,MAAM,IAAA,sBAAc,EAAC,YAAY,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC;QACpF,OAAO,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACjC,CAAC,CAAA,CAAC;IAEF,MAAM,YAAY,GAAG,GAAS,EAAE;QAC9B,MAAM,0BAA0B,GAAG,MAAM,6BAA6B,EAAE,CAAC;QACzE,OAAO,0BAA0B,GAAG,CAAC,CAAC,CAAC;IACzC,CAAC,CAAA,CAAC;IAEF,MAAM,SAAS,GAAG,GAAS,EAAE;QAC3B,6EAA6E;QAC7E,gDAAgD;QAChD,wCAAwC;QACxC,mDAAmD;QACnD,wCAAwC;QACxC,wCAAwC;QACxC,wCAAwC;QACxC,iCAAiC;QACjC,wCAAwC;QACxC,EAAE;QACF,uEAAuE;QACvE,8DAA8D;QAC9D,sCAAsC;QACtC,wCAAwC;QACxC,EAAE;QACF,sEAAsE;QACtE,iBAAiB;QACjB,0CAA0C;QAC1C,0CAA0C;QAE1C,MAAM,UAAU,GAAG,MAAM,IAAA,sBAAc,EAAC,YAAY,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;QACtF,IAAI,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC;YAAE,OAAO,GAAG,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;QAErE,MAAM,0BAA0B,GAAG,MAAM,6BAA6B,EAAE,CAAC;QACzE,IAAI,0BAA0B,KAAK,CAAC,CAAC;YACnC,MAAM,KAAK,CAAC,uDAAuD,CAAC,CAAC;QAEvE,MAAM,iBAAiB,GAAG,YAAY,CAAC,0BAA0B,CAAC,CAAC;QACnE,MAAM,cAAc,GAAG,YAAY,CAAC,KAAK,CAAC,0BAA0B,GAAG,CAAC,CAAC,CAAC;QAE1E,GAAG,CAAC,IAAI,CAAC,+BAA+B,CAAC,CAAC;QAC1C,MAAM,iBAAiB,CAAC,SAAS,EAAE,CAAC;QACpC,KAAK,MAAM,MAAM,IAAI,cAAc,EAAE;YACnC,MAAM,MAAM,CAAC,UAAU,EAAE,CAAC;YAC1B,MAAM,MAAM,CAAC,SAAS,EAAE,CAAC;SAC1B;IACH,CAAC,CAAA,CAAC;IAEF,+CAA+C;IAC/C,MAAM,SAAS,GAAG,GAAS,EAAE;QAC3B,MAAM,UAAU,EAAE,CAAC;QAEnB,8CAA8C;QAC9C,YAAY,GAAG,EAAE,CAAC;QAClB,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE;YAC5B,MAAM,eAAe,GAAG,iBAAiB;gBACvC,CAAC,CAAC,MAAM,iBAAiB,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC;gBACnD,CAAC,CAAC,IAAI,CAAC;YACT,IAAI,CAAC,eAAe,EAAE;gBACpB,GAAG,CAAC,IAAI,CAAC,wBAAwB,MAAM,CAAC,IAAI,sBAAsB,CAAC,CAAC;gBACpE,MAAM;aACP;YAED,IAAI,CAAC,MAAM,CAAC,QAAQ,EAAE;gBACpB,GAAG,CAAC,IAAI,CAAC,oBAAoB,MAAM,CAAC,IAAI,GAAG,CAAC,CAAC;gBAC7C,MAAM,MAAM,CAAC,SAAS,EAAE,CAAC;aAC1B;iBAAM;gBACL,GAAG,CAAC,IAAI,CAAC,WAAW,MAAM,CAAC,IAAI,qDAAqD,CAAC,CAAC;aACvF;YAED,YAAY,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;SAC3B;QAED,GAAG,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAC;QACjC,MAAM,CAAA,eAAe,aAAf,eAAe,uBAAf,eAAe,CAAG,OAAO,CAAC,CAAA,CAAC;IACnC,CAAC,CAAA,CAAC;IAEF,yBAAyB;IACzB,MAAM,UAAU,GAAG,GAAS,EAAE;QAC5B,GAAG,CAAC,IAAI,CAAC,wBAAwB,CAAC,CAAC;QACnC,MAAM,CAAA,cAAc,aAAd,cAAc,uBAAd,cAAc,CAAG,OAAO,CAAC,CAAA,CAAC;QAChC,YAAY,GAAG,OAAO,CAAC;QACvB,GAAG,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC;IAC1C,CAAC,CAAA,CAAC;IAEF,OAAO;QACL,SAAS;QACT,SAAS;QACT,YAAY;QACZ,QAAQ;KACT,CAAC;AACJ,CAAC,CAAC;AAEF,yFAAyF;AAClF,MAAM,oBAAoB,GAAG,CAClC,OAAgD,EAChD,EAAE;IACF,MAAM,EACJ,OAAO,EACP,SAAS,EACT,gBAAgB,GAAG,KAAK,EACxB,GAAG,EACH,MAAM,EACN,UAAU,EACV,iBAAiB,EAEjB,OAAO,GAAG,EAAE,EACZ,iBAAiB,EACjB,kBAAkB,GAAG,CAAC,EACtB,kBAAkB,GAAG,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,EACnD,eAAe,EACf,cAAc,EAEd,cAAc,EACd,qBAAqB,GAAG,CAAC,EACzB,qBAAqB,GAAG,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,EACtD,oBAAoB,EAEpB,cAAc,EACd,YAAY,GAAG,GAAG,GACnB,GAAG,OAAO,CAAC;IAEZ,4DAA4D;IAC5D,MAAM,KAAK,GAAc,EAAE,CAAC;IAE5B,MAAM,IAAA,sBAAc,EAAC,SAAS,EAAE,CAAO,QAAQ,EAAE,KAAK,EAAE,EAAE;QACxD,IAAI,gBAAgB,IAAI,KAAK,GAAG,CAAC;YAAE,OAAO;QAE1C,MAAM,KAAK,GAAG,GAAG,QAAQ,KAAK,KAAK,GAAG,CAAC,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC;QAE/D,IAAI,eAAe,GAAG,KAAK,CAAC;QAC5B,MAAM,KAAK,GAAG,GAAG,EAAE,GAAG,eAAe,GAAG,IAAI,CAAA,CAAC,CAAC,CAAC,CAAC,kBAAkB;QAElE,0CAA0C;QAC1C,IAAI,SAAS,GAAmC,IAAI,CAAC;QACrD,MAAM,SAAS,GAAG,GAA4C,EAAE;;YAAC,OAAA,CAAC;gBAChE,OAAO;gBACP,GAAG;gBACH,QAAQ;gBACR,OAAO;gBACP,eAAe,EAAE,MAAA,SAAS,aAAT,SAAS,uBAAT,SAAS,CAAE,SAAS,mCAAI,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC;gBACnD,KAAK;aACN,CAAC,CAAA;SAAA,CAAC;QAEH,GAAG,CAAC,KAAK,CAAC,kBAAkB,KAAK,EAAE,CAAC,CAAC;QACrC,IAAA,iBAAW,EAAC,QAAkB,CAAC,CAAC;QAChC,GAAG,CAAC,IAAI,CAAC,kBAAkB,KAAK,EAAE,CAAC,CAAC;QACpC,MAAM,CAAA,UAAU,aAAV,UAAU,uBAAV,UAAU,CAAG,SAAS,EAAE,EAAE,QAAQ,CAAC,CAAA,CAAC;QAC1C,GAAG,CAAC,KAAK,CAAC,0BAA0B,KAAK,EAAE,CAAC,CAAC;QAE7C,SAAS,GAAG,mBAAmB,CAAC;YAC9B,OAAO,EAAE,SAAS,EAAE;YACpB,OAAO;YACP,iBAAiB;YACjB,eAAe;YACf,cAAc;YACd,GAAG;SACJ,CAAC,CAAC;QAEH,GAAG,CAAC,KAAK,CAAC,2CAA2C,KAAK,EAAE,CAAC,CAAC,CAAC,kBAAkB;QACjF,MAAM,CAAA,iBAAiB,aAAjB,iBAAiB,uBAAjB,iBAAiB,CAAG,SAAS,EAAE,CAAC,CAAA,CAAC;QACvC,GAAG,CAAC,KAAK,CAAC,gDAAgD,KAAK,GAAG,CAAC,CAAC,CAAC,kBAAkB;QAEvF,MAAM,cAAc,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAElE,IAAI,wBAAwB,GAAG,IAAI,CAAC;QACpC,OAAO,wBAAwB,IAAI,CAAC,eAAe,EAAE;YACnD,cAAc;YACd,0CAA0C;YAC1C,GAAG,CAAC,IAAI,CAAC,8BAA8B,KAAK,EAAE,CAAC,CAAC;YAChD,MAAM,IAAA,kBAAU,EACd,GAAS,EAAE;gBACT,IAAI,CAAC,SAAS;oBAAE,MAAM,KAAK,CAAC,+DAA+D,KAAK,EAAE,CAAC,CAAC,CAAC,kBAAkB;gBAEvH,MAAM,cAAc,GAAG,MAAM,SAAS,CAAC,QAAQ,EAAE,CAAC;gBAClD,IAAI,CAAC,cAAc,IAAI,CAAC,cAAc,EAAE;oBACtC,GAAG,CAAC,IAAI,CAAC,+BAA+B,KAAK,EAAE,CAAC,CAAC;oBACjD,OAAO;iBACR;gBAED,GAAG,CAAC,KAAK,CAAC,2BAA2B,KAAK,EAAE,CAAC,CAAC;gBAC9C,MAAM,SAAS,CAAC,SAAS,EAAE,CAAC;gBAC5B,MAAM,SAAS,CAAC,SAAS,EAAE,CAAC;gBAC5B,GAAG,CAAC,KAAK,CAAC,gCAAgC,KAAK,EAAE,CAAC,CAAC;YACrD,CAAC,CAAA,EACD;gBACE,UAAU,EAAE,kBAAkB;gBAC9B,OAAO,EAAE,CAAC,GAAG,EAAE,UAAU,EAAE,EAAE,CAAC,kBAAkB,CAAC,SAAS,EAAE,EAAE,GAAG,EAAE,UAAU,CAAC;aAC/E,CACF,CAAC;YAEF,IAAI,iBAAiB,GAAG,IAAI,CAAC;YAC7B,OAAO,iBAAiB,IAAI,CAAC,eAAe,EAAE;gBAC5C,kBAAkB;gBAClB,IAAI,UAAU,GAAG,WAAW,CAAC;gBAC7B,IAAI,MAAM,EAAE;oBACV,GAAG,CAAC,KAAK,CAAC,0BAA0B,KAAK,EAAE,CAAC,CAAC;oBAC7C,UAAU,GAAG,MAAM,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;oBACvC,GAAG,CAAC,KAAK,CAAC,+BAA+B,KAAK,EAAE,CAAC,CAAC;iBACnD;gBACD,MAAM,SAAS,GAAG,GAAG,KAAK,KAAK,UAAU,GAAG,CAAC;gBAE7C,qBAAqB;gBACrB,GAAG,CAAC,IAAI,CAAC,8BAA8B,SAAS,EAAE,CAAC,CAAC;gBACpD,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,IAAA,kBAAU,EACjC,CAAO,UAAU,EAAE,EAAE,kDAAC,OAAA,cAAc,CAAC,SAAS,EAAE,EAAE,UAAU,CAAC,CAAA,GAAA,EAC7D;oBACE,UAAU,EAAE,qBAAqB;oBACjC,OAAO,EAAE,CAAC,GAAG,EAAE,UAAU,EAAE,EAAE,CAAC,qBAAqB,CAAC,SAAS,EAAE,EAAE,GAAG,EAAE,UAAU,CAAC;iBAClF,CACF,CAAC;gBACF,GAAG,CAAC,KAAK,CAAC,mCAAmC,SAAS,EAAE,CAAC,CAAC;gBAE1D,MAAM,SAAS,GAAG,MAAM,aAAN,MAAM,cAAN,MAAM,GAAI,EAAE,CAAC;gBAC/B,KAAK,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,CAAC;gBACzB,GAAG,CAAC,IAAI,CAAC,SAAS,SAAS,CAAC,MAAM,kBAAkB,SAAS,EAAE,CAAC,CAAC;gBAEjE,6CAA6C;gBAC7C,IAAI,gBAAgB,IAAI,eAAe,EAAE;oBACvC,iBAAiB,GAAG,KAAK,CAAC;oBAC1B,IAAI,gBAAgB;wBAAE,GAAG,CAAC,IAAI,CAAC,6DAA6D,SAAS,EAAE,CAAC,CAAC,CAAC,kBAAkB;yBACvH,IAAI,eAAe;wBAAE,GAAG,CAAC,IAAI,CAAC,iBAAiB,SAAS,EAAE,CAAC,CAAC;oBACjE,SAAS;iBACV;gBAED,GAAG,CAAC,KAAK,CAAC,8CAA8C,SAAS,EAAE,CAAC,CAAC,CAAC,kBAAkB;gBACxF,MAAM,CAAA,oBAAoB,aAApB,oBAAoB,uBAApB,oBAAoB,CAAG,SAAS,EAAE,EAAE,SAAS,CAAC,CAAA,CAAC;gBACrD,GAAG,CAAC,KAAK,CAAC,mDAAmD,SAAS,EAAE,CAAC,CAAC,CAAC,kBAAkB;gBAE7F,IAAI,cAAc,IAAI,CAAC,eAAe,EAAE;oBACtC,wFAAwF;oBACxF,IAAI;wBACF,GAAG,CAAC,IAAI,CAAC,oCAAoC,SAAS,EAAE,CAAC,CAAC;wBAC1D,MAAM,cAAc,CAAC,SAAS,EAAE,EAAE,SAAS,CAAC,CAAC;wBAC7C,GAAG,CAAC,KAAK,CAAC,yCAAyC,SAAS,EAAE,CAAC,CAAC,CAAC,kBAAkB;qBACpF;oBAAC,OAAO,CAAC,EAAE;wBACV,GAAG,CAAC,IAAI,CAAC,2CAA2C,SAAS,EAAE,CAAC,CAAC,CAAC,kBAAkB;wBACpF,GAAG,CAAC,KAAK,CAAE,CAAW,CAAC,QAAQ,EAAE,CAAC,CAAC;wBACnC,iBAAiB,GAAG,KAAK,CAAC;qBAC3B;iBACF;qBAAM;oBACL,IAAI,eAAe;wBAAE,GAAG,CAAC,IAAI,CAAC,iBAAiB,SAAS,EAAE,CAAC,CAAC;oBAC5D,iBAAiB,GAAG,KAAK,CAAC;iBAC3B;gBAED,8CAA8C;gBAC9C,MAAM,IAAI,OAAO,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,UAAU,CAAC,GAAG,EAAE,YAAY,CAAC,CAAC,CAAC;aAC3D;YAED,sEAAsE;YACtE,GAAG,CAAC,KAAK,CAAC,8DAA8D,KAAK,EAAE,CAAC,CAAC;YACjF,wBAAwB,GAAG,cAAc,IAAI,CAAC,MAAM,SAAS,CAAC,YAAY,EAAE,CAAC,CAAC;YAC9E,GAAG,CAAC,KAAK,CAAC,mEAAmE,KAAK,EAAE,CAAC,CAAC;YAEtF,IAAI,wBAAwB,EAAE;gBAC5B,IAAI,CAAC,eAAe;oBAAE,GAAG,CAAC,IAAI,CAAC,oEAAoE,KAAK,EAAE,CAAC,CAAC,CAAC,kBAAkB;;oBAC1H,GAAG,CAAC,IAAI,CAAC,+FAA+F,KAAK,EAAE,CAAC,CAAC,CAAC,kBAAkB;aAC1I;;gBAAM,GAAG,CAAC,IAAI,CAAC,uDAAuD,KAAK,EAAE,CAAC,CAAC,CAAC,kBAAkB;SACpG;QACD,GAAG,CAAC,IAAI,CAAC,gBAAgB,KAAK,EAAE,CAAC,CAAC;IACpC,CAAC,CAAA,CAAC,CAAC;IACH,OAAO,KAAK,CAAC;AACf,CAAC,CAAA,CAAC;AAxKW,QAAA,oBAAoB,wBAwK/B","sourcesContent":["import { findLastIndex } from 'lodash';\n\nimport { serialAsyncMap, retryAsync } from '../../utils/async';\nimport { validateUrl } from '../../utils/url';\nimport type { MaybePromise } from '../../utils/types';\n\n// TODO - Clean this up and merge it into PageLib\n\nexport interface ListingLogger {\n debug: (msg: string, data?: any) => void;\n info: (msg: string, data?: any) => void;\n warning: (msg: string, data?: any) => void;\n error: (msg: string, data?: any) => void;\n}\n\nexport interface ListingPageFilter {\n name: string;\n disabled?: boolean;\n initState: () => MaybePromise<boolean>;\n resetState: () => MaybePromise<void>;\n nextState: () => MaybePromise<void>;\n hasNextState: () => MaybePromise<boolean>;\n hasState: () => MaybePromise<boolean>;\n loadState: () => MaybePromise<void>;\n}\n\nexport interface ListingFiltersSetupOptions<Ctx extends object, UrlType> {\n context: ListingPageScraperContext<Ctx, UrlType>;\n filters?: ListingPageFilter[];\n shouldApplyFilter?: (\n context: ListingPageScraperContext<Ctx, UrlType>,\n filter: ListingPageFilter,\n filters: ListingPageFilter[]\n ) => MaybePromise<boolean>;\n onResetFilters?: (context: ListingPageScraperContext<Ctx, UrlType>) => MaybePromise<void>;\n onFiltersLoaded?: (context: ListingPageScraperContext<Ctx, UrlType>) => MaybePromise<void>;\n log: ListingLogger;\n}\n\ntype ListingFilterController = Pick<ListingPageFilter, 'loadState' | 'nextState' | 'hasNextState' | 'hasState'>; // prettier-ignore\n\nexport interface ListingPageScraperContext<Ctx extends object, UrlType> {\n context: Ctx;\n log: ListingLogger;\n startUrl: UrlType;\n filters: ListingPageFilter[];\n /** Use this if you need to load filters again (eg after reloading page manually) */\n loadFilterState: () => MaybePromise<void>;\n /** Call this function from any callback to stop scraping */\n abort: () => void;\n}\n\n// prettier-ignore\nexport interface ListingPageScraperOptions<Ctx extends object, UrlType> extends Omit<ListingFiltersSetupOptions<Ctx, UrlType>, 'context'> {\n context: Ctx;\n startUrls: UrlType[];\n listingCountOnly?: boolean;\n /** Get ID of the current page in the pagination, so it can be logged */\n pageId?: (context: ListingPageScraperContext<Ctx, UrlType>) => MaybePromise<string>;\n log: ListingLogger;\n\n onNavigate?: (context: ListingPageScraperContext<Ctx, UrlType>, url: UrlType) => MaybePromise<void>;\n /**\n * Hook triggered after navigating to the url using Page.goto().\n *\n * One use of this hook is to conditionally disable/enable filters based on the page content.\n **/\n onAfterNavigation?: (context: ListingPageScraperContext<Ctx, UrlType>) => MaybePromise<void>;\n\n /** How many attempts are retried after filters failed to load. Defaults to 3 */\n loadFiltersRetries?: number;\n /**\n * Hook triggered after a failed attempt at loading listings page filters.\n *\n * One use of this hook is to reload the page on failed attemp in case something didn't load correctly.\n **/\n onLoadFiltersError?: (\n context: ListingPageScraperContext<Ctx, UrlType>,\n error: any,\n retryIndex: number\n ) => MaybePromise<void>;\n\n /** Main logic to extract entries from a page */\n extractEntries: (context: ListingPageScraperContext<Ctx, UrlType>, retryIndex: number) => MaybePromise<UrlType[]>;\n /** How many attempts are retried after failed to scrape entries from a listing. Defaults to 3 */\n extractEntriesRetries?: number;\n /**\n * Hook triggered after a failed attempt at scraping entries from a listing.\n *\n * One use of this hook is to reload the page on failed attemp in case something didn't load correctly.\n **/\n onExtractEntriesError?: (\n context: ListingPageScraperContext<Ctx, UrlType>,\n error: any,\n retryIndex: number\n ) => MaybePromise<void>;\n onExtractEntriesDone?: (\n context: ListingPageScraperContext<Ctx, UrlType>,\n entries: UrlType[] | null\n ) => MaybePromise<void>;\n\n /**\n * If goToNextPage hook is defined, it will be called after each page. To indicate that there's no more\n * pages left, throw an error.\n **/\n onGoToNextPage?: (\n context: ListingPageScraperContext<Ctx, UrlType>,\n entries: UrlType[] | null\n ) => MaybePromise<void>;\n /** How long to wait after we've navigated to the next page and before we start extracting? */\n nextPageWait?: number;\n}\n\n/**\n * Given configuration for listing page filters, set up functions to\n * navigate through the different states of filters, to allow to paginate\n * through all states.\n */\nconst setupListingFilters = <Ctx extends object, UrlType>({\n context,\n filters = [],\n shouldApplyFilter,\n onResetFilters,\n onFiltersLoaded,\n log,\n}: ListingFiltersSetupOptions<Ctx, UrlType>): ListingFilterController => {\n let filtersStack: ListingPageFilter[] = filters;\n\n const getNextFilterStateChangeIndex = async () => {\n const hasNextStates = await serialAsyncMap(filtersStack, (filter) => filter.hasNextState());\n return findLastIndex(hasNextStates, (x) => x);\n };\n\n const hasState = async () => {\n const hasStates = await serialAsyncMap(filtersStack, (filter) => filter.hasState());\n return hasStates.some(Boolean);\n };\n\n const hasNextState = async () => {\n const nextFilterStateChangeIndex = await getNextFilterStateChangeIndex();\n return nextFilterStateChangeIndex > -1;\n };\n\n const nextState = async () => {\n // Imagine we have 4 filters, each has 3 states (eg 3 options to select from)\n // We start with all filters in the first state:\n // State 1: F1(1), F2(1), F3(1), F4(1)\n // As we progress, we increment it akin to numbers:\n // State 2: F1(1), F2(1), F3(1), F4(2)\n // State 3: F1(1), F2(1), F3(1), F4(3)\n // State 4: F1(1), F2(1), F3(2), F4(1)\n // All the way to the last state:\n // State n: F1(3), F2(3), F3(3), F4(3)\n //\n // When we want move to a next state, we identify the RIGHT-most filter\n // whose state can be incremented (in this case we select F2):\n // YES YES NO NO\n // State x: F1(1), F2(2), F3(3), F4(3)\n //\n // When we increment a filter state, all the other filter to the RIGHT\n // will be reset:\n // State x: F1(1), F2(2), F3(3), F4(3)\n // State x+1: F1(1), F2(3), F3(1), F4(1)\n\n const initStates = await serialAsyncMap(filtersStack, (filter) => filter.initState());\n if (initStates.some(Boolean)) return log.info('Initialised filters');\n\n const nextFilterStateChangeIndex = await getNextFilterStateChangeIndex();\n if (nextFilterStateChangeIndex === -1)\n throw Error('Cannot select next filter state - reached end of list');\n\n const filterToNextState = filtersStack[nextFilterStateChangeIndex];\n const filtersToReset = filtersStack.slice(nextFilterStateChangeIndex + 1);\n\n log.info('Setting filters to next state');\n await filterToNextState.nextState();\n for (const filter of filtersToReset) {\n await filter.resetState();\n await filter.nextState();\n }\n };\n\n /** Load current filter state in the webpage */\n const loadState = async () => {\n await resetState();\n\n // Load filters one by one, and only if needed\n filtersStack = [];\n for (const filter of filters) {\n const shouldUseFilter = shouldApplyFilter\n ? await shouldApplyFilter(context, filter, filters)\n : true;\n if (!shouldUseFilter) {\n log.info(`Not applying filter \"${filter.name}\" or further filters`);\n break;\n }\n\n if (!filter.disabled) {\n log.info(`Applying filter \"${filter.name}\"`);\n await filter.loadState();\n } else {\n log.info(`Filter \"${filter.name}\" recognised but not applied because it is disabled`);\n }\n\n filtersStack.push(filter);\n }\n\n log.info(`Done loading filters`);\n await onFiltersLoaded?.(context);\n };\n\n /** Reset filter state */\n const resetState = async () => {\n log.info(`Resetting filter state`);\n await onResetFilters?.(context);\n filtersStack = filters;\n log.info(`Resetting filter state done`);\n };\n\n return {\n loadState,\n nextState,\n hasNextState,\n hasState,\n };\n};\n\n/** Get entries from a listing page (eg URLs to profiles that should be scraped later) */\nexport const scrapeListingEntries = async <Ctx extends object, UrlType>(\n options: ListingPageScraperOptions<Ctx, UrlType>\n) => {\n const {\n context,\n startUrls,\n listingCountOnly = false,\n log,\n pageId,\n onNavigate,\n onAfterNavigation,\n\n filters = [],\n shouldApplyFilter,\n loadFiltersRetries = 3,\n onLoadFiltersError = (_, err) => console.error(err),\n onFiltersLoaded,\n onResetFilters,\n\n extractEntries,\n extractEntriesRetries = 3,\n onExtractEntriesError = (_, err) => console.error(err),\n onExtractEntriesDone,\n\n onGoToNextPage,\n nextPageWait = 500,\n } = options;\n\n /** Collection of ALL urls across all pages and startUrls */\n const links: UrlType[] = [];\n\n await serialAsyncMap(startUrls, async (startUrl, index) => {\n if (listingCountOnly && index > 0) return;\n\n const logId = `${startUrl} (${index + 1}/${startUrls.length})`;\n\n let userAskedToStop = false;\n const abort = () => { userAskedToStop = true }; // prettier-ignore\n\n // Prepare context shared across all hooks\n let filterObj: ListingFilterController | null = null;\n const genCtxArg = (): ListingPageScraperContext<Ctx, UrlType> => ({\n context,\n log,\n startUrl,\n filters,\n loadFilterState: filterObj?.loadState ?? (() => {}),\n abort,\n });\n\n log.debug(`Validating URL ${logId}`);\n validateUrl(startUrl as string);\n log.info(`Navigating URL ${logId}`);\n await onNavigate?.(genCtxArg(), startUrl);\n log.debug(`Done navigating to URL ${logId}`);\n\n filterObj = setupListingFilters({\n context: genCtxArg(),\n filters,\n shouldApplyFilter,\n onFiltersLoaded,\n onResetFilters,\n log,\n });\n\n log.debug(`Calling onAfterNavigation callback. URL ${logId}`); // prettier-ignore\n await onAfterNavigation?.(genCtxArg());\n log.debug(`Done calling onAfterNavigation callback. URL ${logId})`); // prettier-ignore\n\n const isUsingFilters = filters.some((filter) => !filter.disabled);\n\n let hasFilterStatesToProcess = true;\n while (hasFilterStatesToProcess && !userAskedToStop) {\n // Filter loop\n // Load filters before we start paginating\n log.info(`Setting up filters for URL ${logId}`);\n await retryAsync(\n async () => {\n if (!filterObj) throw Error(`Filter controller is missing. This should never happen. URL ${logId}`); // prettier-ignore\n\n const filterHasState = await filterObj.hasState();\n if (!isUsingFilters || !filterHasState) {\n log.info(`Not loading filters for URL ${logId}`);\n return;\n }\n\n log.debug(`Loading filters for URL ${logId}`);\n await filterObj.nextState();\n await filterObj.loadState();\n log.debug(`Done loading filters for URL ${logId}`);\n },\n {\n maxRetries: loadFiltersRetries,\n onError: (err, retryIndex) => onLoadFiltersError(genCtxArg(), err, retryIndex),\n }\n );\n\n let nextPageAvailable = true;\n while (nextPageAvailable && !userAskedToStop) {\n // Pagination loop\n let currPageId = 'next page';\n if (pageId) {\n log.debug(`Loading pageId for URL ${logId}`);\n currPageId = await pageId(genCtxArg());\n log.debug(`Done loading pageId for URL ${logId}`);\n }\n const pageLogId = `${logId} (${currPageId})`;\n\n // Extract page links\n log.info(`Extracting links from page ${pageLogId}`);\n const { result } = await retryAsync(\n async (retryIndex) => extractEntries(genCtxArg(), retryIndex),\n {\n maxRetries: extractEntriesRetries,\n onError: (err, retryIndex) => onExtractEntriesError(genCtxArg(), err, retryIndex),\n }\n );\n log.debug(`Done extracting links from page ${pageLogId}`);\n\n const pageLinks = result ?? [];\n links.push(...pageLinks);\n log.info(`Found ${pageLinks.length} links on page ${pageLogId}`);\n\n // Leave after printing the count or on abort\n if (listingCountOnly || userAskedToStop) {\n nextPageAvailable = false;\n if (listingCountOnly) log.info(`Debugging mode. Entries are not scraped. Leaving now. URL ${pageLogId}`); // prettier-ignore\n else if (userAskedToStop) log.info(`Aborting. URL ${pageLogId}`);\n continue;\n }\n\n log.debug(`Calling onExtractEntriesDone callback. URL ${pageLogId}`); // prettier-ignore\n await onExtractEntriesDone?.(genCtxArg(), pageLinks);\n log.debug(`Done calling onExtractEntriesDone callback. URL ${pageLogId}`); // prettier-ignore\n\n if (onGoToNextPage && !userAskedToStop) {\n // If goToNextPage hook is defined, this will be called after each page, until it errors\n try {\n log.info(`Navigating to next page from URL ${pageLogId}`);\n await onGoToNextPage(genCtxArg(), pageLinks);\n log.debug(`Done navigating to next page from URL ${pageLogId}`); // prettier-ignore\n } catch (e) {\n log.info(`Failed navigating to next page from URL ${pageLogId}`); // prettier-ignore\n log.error((e as Error).toString());\n nextPageAvailable = false;\n }\n } else {\n if (userAskedToStop) log.info(`Aborting. URL ${pageLogId}`);\n nextPageAvailable = false;\n }\n\n // Wait before we start scraping the next page\n await new Promise((res) => setTimeout(res, nextPageWait));\n }\n\n // Break out if we're not using filters or we've gone through them all\n log.debug(`Checking if there are more filter states available for URL ${logId}`);\n hasFilterStatesToProcess = isUsingFilters && (await filterObj.hasNextState());\n log.debug(`Done checking if there are more filter states available for URL ${logId}`);\n\n if (hasFilterStatesToProcess) {\n if (!userAskedToStop) log.info(`Will repeat scraping this URL with different filter setting. URL ${logId}`); // prettier-ignore\n else log.info(`There are unprocessed filter setting remaining for this URL, but stopping due to abort. URL ${logId}`); // prettier-ignore\n } else log.info(`No filter setting remain for scraping this URL. URL ${logId}`); // prettier-ignore\n }\n log.info(`Finished URL ${logId}`);\n });\n return links;\n};\n"]}
1
+ {"version":3,"file":"scrapeListing.js","sourceRoot":"","sources":["../../../../src/lib/actions/scrapeListing.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,mCAAuC;AAEvC,6CAA+D;AAC/D,yCAA8C;AA8G9C;;;;GAIG;AACH,MAAM,mBAAmB,GAAG,CAA8B,EACxD,OAAO,EACP,OAAO,GAAG,EAAE,EACZ,iBAAiB,EACjB,cAAc,EACd,eAAe,EACf,GAAG,GACsC,EAA2B,EAAE;IACtE,IAAI,YAAY,GAAwB,OAAO,CAAC;IAEhD,MAAM,6BAA6B,GAAG,GAAS,EAAE;QAC/C,MAAM,aAAa,GAAG,MAAM,IAAA,sBAAc,EAAC,YAAY,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,YAAY,EAAE,CAAC,CAAC;QAC5F,OAAO,IAAA,sBAAa,EAAC,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;IAChD,CAAC,CAAA,CAAC;IAEF,MAAM,QAAQ,GAAG,GAAS,EAAE;QAC1B,MAAM,SAAS,GAAG,MAAM,IAAA,sBAAc,EAAC,YAAY,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC;QACpF,OAAO,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACjC,CAAC,CAAA,CAAC;IAEF,MAAM,YAAY,GAAG,GAAS,EAAE;QAC9B,MAAM,0BAA0B,GAAG,MAAM,6BAA6B,EAAE,CAAC;QACzE,OAAO,0BAA0B,GAAG,CAAC,CAAC,CAAC;IACzC,CAAC,CAAA,CAAC;IAEF,MAAM,SAAS,GAAG,GAAS,EAAE;QAC3B,6EAA6E;QAC7E,gDAAgD;QAChD,wCAAwC;QACxC,mDAAmD;QACnD,wCAAwC;QACxC,wCAAwC;QACxC,wCAAwC;QACxC,iCAAiC;QACjC,wCAAwC;QACxC,EAAE;QACF,uEAAuE;QACvE,8DAA8D;QAC9D,sCAAsC;QACtC,wCAAwC;QACxC,EAAE;QACF,sEAAsE;QACtE,iBAAiB;QACjB,0CAA0C;QAC1C,0CAA0C;QAE1C,MAAM,UAAU,GAAG,MAAM,IAAA,sBAAc,EAAC,YAAY,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;QACtF,IAAI,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC;YAAE,OAAO,GAAG,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;QAErE,MAAM,0BAA0B,GAAG,MAAM,6BAA6B,EAAE,CAAC;QACzE,IAAI,0BAA0B,KAAK,CAAC,CAAC;YACnC,MAAM,KAAK,CAAC,uDAAuD,CAAC,CAAC;QAEvE,MAAM,iBAAiB,GAAG,YAAY,CAAC,0BAA0B,CAAC,CAAC;QACnE,MAAM,cAAc,GAAG,YAAY,CAAC,KAAK,CAAC,0BAA0B,GAAG,CAAC,CAAC,CAAC;QAE1E,GAAG,CAAC,IAAI,CAAC,+BAA+B,CAAC,CAAC;QAC1C,MAAM,iBAAiB,CAAC,SAAS,EAAE,CAAC;QACpC,KAAK,MAAM,MAAM,IAAI,cAAc,EAAE,CAAC;YACpC,MAAM,MAAM,CAAC,UAAU,EAAE,CAAC;YAC1B,MAAM,MAAM,CAAC,SAAS,EAAE,CAAC;QAC3B,CAAC;IACH,CAAC,CAAA,CAAC;IAEF,+CAA+C;IAC/C,MAAM,SAAS,GAAG,GAAS,EAAE;QAC3B,MAAM,UAAU,EAAE,CAAC;QAEnB,8CAA8C;QAC9C,YAAY,GAAG,EAAE,CAAC;QAClB,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,MAAM,eAAe,GAAG,iBAAiB;gBACvC,CAAC,CAAC,MAAM,iBAAiB,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC;gBACnD,CAAC,CAAC,IAAI,CAAC;YACT,IAAI,CAAC,eAAe,EAAE,CAAC;gBACrB,GAAG,CAAC,IAAI,CAAC,wBAAwB,MAAM,CAAC,IAAI,sBAAsB,CAAC,CAAC;gBACpE,MAAM;YACR,CAAC;YAED,IAAI,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC;gBACrB,GAAG,CAAC,IAAI,CAAC,oBAAoB,MAAM,CAAC,IAAI,GAAG,CAAC,CAAC;gBAC7C,MAAM,MAAM,CAAC,SAAS,EAAE,CAAC;YAC3B,CAAC;iBAAM,CAAC;gBACN,GAAG,CAAC,IAAI,CAAC,WAAW,MAAM,CAAC,IAAI,qDAAqD,CAAC,CAAC;YACxF,CAAC;YAED,YAAY,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC5B,CAAC;QAED,GAAG,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAC;QACjC,MAAM,CAAA,eAAe,aAAf,eAAe,uBAAf,eAAe,CAAG,OAAO,CAAC,CAAA,CAAC;IACnC,CAAC,CAAA,CAAC;IAEF,yBAAyB;IACzB,MAAM,UAAU,GAAG,GAAS,EAAE;QAC5B,GAAG,CAAC,IAAI,CAAC,wBAAwB,CAAC,CAAC;QACnC,MAAM,CAAA,cAAc,aAAd,cAAc,uBAAd,cAAc,CAAG,OAAO,CAAC,CAAA,CAAC;QAChC,YAAY,GAAG,OAAO,CAAC;QACvB,GAAG,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC;IAC1C,CAAC,CAAA,CAAC;IAEF,OAAO;QACL,SAAS;QACT,SAAS;QACT,YAAY;QACZ,QAAQ;KACT,CAAC;AACJ,CAAC,CAAC;AAEF,yFAAyF;AAClF,MAAM,oBAAoB,GAAG,CAClC,OAAgD,EAChD,EAAE;IACF,MAAM,EACJ,OAAO,EACP,SAAS,EACT,gBAAgB,GAAG,KAAK,EACxB,GAAG,EACH,MAAM,EACN,UAAU,EACV,iBAAiB,EAEjB,OAAO,GAAG,EAAE,EACZ,iBAAiB,EACjB,kBAAkB,GAAG,CAAC,EACtB,kBAAkB,GAAG,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,EACnD,eAAe,EACf,cAAc,EAEd,cAAc,EACd,qBAAqB,GAAG,CAAC,EACzB,qBAAqB,GAAG,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,EACtD,oBAAoB,EAEpB,cAAc,EACd,YAAY,GAAG,GAAG,GACnB,GAAG,OAAO,CAAC;IAEZ,4DAA4D;IAC5D,MAAM,KAAK,GAAc,EAAE,CAAC;IAE5B,MAAM,IAAA,sBAAc,EAAC,SAAS,EAAE,CAAO,QAAQ,EAAE,KAAK,EAAE,EAAE;QACxD,IAAI,gBAAgB,IAAI,KAAK,GAAG,CAAC;YAAE,OAAO;QAE1C,MAAM,KAAK,GAAG,GAAG,QAAQ,KAAK,KAAK,GAAG,CAAC,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC;QAE/D,IAAI,eAAe,GAAG,KAAK,CAAC;QAC5B,MAAM,KAAK,GAAG,GAAG,EAAE,GAAG,eAAe,GAAG,IAAI,CAAA,CAAC,CAAC,CAAC,CAAC,kBAAkB;QAElE,0CAA0C;QAC1C,IAAI,SAAS,GAAmC,IAAI,CAAC;QACrD,MAAM,SAAS,GAAG,GAA4C,EAAE;;YAAC,OAAA,CAAC;gBAChE,OAAO;gBACP,GAAG;gBACH,QAAQ;gBACR,OAAO;gBACP,eAAe,EAAE,MAAA,SAAS,aAAT,SAAS,uBAAT,SAAS,CAAE,SAAS,mCAAI,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC;gBACnD,KAAK;aACN,CAAC,CAAA;SAAA,CAAC;QAEH,GAAG,CAAC,KAAK,CAAC,kBAAkB,KAAK,EAAE,CAAC,CAAC;QACrC,IAAA,iBAAW,EAAC,QAAkB,CAAC,CAAC;QAChC,GAAG,CAAC,IAAI,CAAC,kBAAkB,KAAK,EAAE,CAAC,CAAC;QACpC,MAAM,CAAA,UAAU,aAAV,UAAU,uBAAV,UAAU,CAAG,SAAS,EAAE,EAAE,QAAQ,CAAC,CAAA,CAAC;QAC1C,GAAG,CAAC,KAAK,CAAC,0BAA0B,KAAK,EAAE,CAAC,CAAC;QAE7C,SAAS,GAAG,mBAAmB,CAAC;YAC9B,OAAO,EAAE,SAAS,EAAE;YACpB,OAAO;YACP,iBAAiB;YACjB,eAAe;YACf,cAAc;YACd,GAAG;SACJ,CAAC,CAAC;QAEH,GAAG,CAAC,KAAK,CAAC,2CAA2C,KAAK,EAAE,CAAC,CAAC,CAAC,kBAAkB;QACjF,MAAM,CAAA,iBAAiB,aAAjB,iBAAiB,uBAAjB,iBAAiB,CAAG,SAAS,EAAE,CAAC,CAAA,CAAC;QACvC,GAAG,CAAC,KAAK,CAAC,gDAAgD,KAAK,GAAG,CAAC,CAAC,CAAC,kBAAkB;QAEvF,MAAM,cAAc,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAElE,IAAI,wBAAwB,GAAG,IAAI,CAAC;QACpC,OAAO,wBAAwB,IAAI,CAAC,eAAe,EAAE,CAAC;YACpD,cAAc;YACd,0CAA0C;YAC1C,GAAG,CAAC,IAAI,CAAC,8BAA8B,KAAK,EAAE,CAAC,CAAC;YAChD,MAAM,IAAA,kBAAU,EACd,GAAS,EAAE;gBACT,IAAI,CAAC,SAAS;oBAAE,MAAM,KAAK,CAAC,+DAA+D,KAAK,EAAE,CAAC,CAAC,CAAC,kBAAkB;gBAEvH,MAAM,cAAc,GAAG,MAAM,SAAS,CAAC,QAAQ,EAAE,CAAC;gBAClD,IAAI,CAAC,cAAc,IAAI,CAAC,cAAc,EAAE,CAAC;oBACvC,GAAG,CAAC,IAAI,CAAC,+BAA+B,KAAK,EAAE,CAAC,CAAC;oBACjD,OAAO;gBACT,CAAC;gBAED,GAAG,CAAC,KAAK,CAAC,2BAA2B,KAAK,EAAE,CAAC,CAAC;gBAC9C,MAAM,SAAS,CAAC,SAAS,EAAE,CAAC;gBAC5B,MAAM,SAAS,CAAC,SAAS,EAAE,CAAC;gBAC5B,GAAG,CAAC,KAAK,CAAC,gCAAgC,KAAK,EAAE,CAAC,CAAC;YACrD,CAAC,CAAA,EACD;gBACE,UAAU,EAAE,kBAAkB;gBAC9B,OAAO,EAAE,CAAC,GAAG,EAAE,UAAU,EAAE,EAAE,CAAC,kBAAkB,CAAC,SAAS,EAAE,EAAE,GAAG,EAAE,UAAU,CAAC;aAC/E,CACF,CAAC;YAEF,IAAI,iBAAiB,GAAG,IAAI,CAAC;YAC7B,OAAO,iBAAiB,IAAI,CAAC,eAAe,EAAE,CAAC;gBAC7C,kBAAkB;gBAClB,IAAI,UAAU,GAAG,WAAW,CAAC;gBAC7B,IAAI,MAAM,EAAE,CAAC;oBACX,GAAG,CAAC,KAAK,CAAC,0BAA0B,KAAK,EAAE,CAAC,CAAC;oBAC7C,UAAU,GAAG,MAAM,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;oBACvC,GAAG,CAAC,KAAK,CAAC,+BAA+B,KAAK,EAAE,CAAC,CAAC;gBACpD,CAAC;gBACD,MAAM,SAAS,GAAG,GAAG,KAAK,KAAK,UAAU,GAAG,CAAC;gBAE7C,qBAAqB;gBACrB,GAAG,CAAC,IAAI,CAAC,8BAA8B,SAAS,EAAE,CAAC,CAAC;gBACpD,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,IAAA,kBAAU,EACjC,CAAO,UAAU,EAAE,EAAE,kDAAC,OAAA,cAAc,CAAC,SAAS,EAAE,EAAE,UAAU,CAAC,CAAA,GAAA,EAC7D;oBACE,UAAU,EAAE,qBAAqB;oBACjC,OAAO,EAAE,CAAC,GAAG,EAAE,UAAU,EAAE,EAAE,CAAC,qBAAqB,CAAC,SAAS,EAAE,EAAE,GAAG,EAAE,UAAU,CAAC;iBAClF,CACF,CAAC;gBACF,GAAG,CAAC,KAAK,CAAC,mCAAmC,SAAS,EAAE,CAAC,CAAC;gBAE1D,MAAM,SAAS,GAAG,MAAM,aAAN,MAAM,cAAN,MAAM,GAAI,EAAE,CAAC;gBAC/B,KAAK,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,CAAC;gBACzB,GAAG,CAAC,IAAI,CAAC,SAAS,SAAS,CAAC,MAAM,kBAAkB,SAAS,EAAE,CAAC,CAAC;gBAEjE,6CAA6C;gBAC7C,IAAI,gBAAgB,IAAI,eAAe,EAAE,CAAC;oBACxC,iBAAiB,GAAG,KAAK,CAAC;oBAC1B,IAAI,gBAAgB;wBAAE,GAAG,CAAC,IAAI,CAAC,6DAA6D,SAAS,EAAE,CAAC,CAAC,CAAC,kBAAkB;yBACvH,IAAI,eAAe;wBAAE,GAAG,CAAC,IAAI,CAAC,iBAAiB,SAAS,EAAE,CAAC,CAAC;oBACjE,SAAS;gBACX,CAAC;gBAED,GAAG,CAAC,KAAK,CAAC,8CAA8C,SAAS,EAAE,CAAC,CAAC,CAAC,kBAAkB;gBACxF,MAAM,CAAA,oBAAoB,aAApB,oBAAoB,uBAApB,oBAAoB,CAAG,SAAS,EAAE,EAAE,SAAS,CAAC,CAAA,CAAC;gBACrD,GAAG,CAAC,KAAK,CAAC,mDAAmD,SAAS,EAAE,CAAC,CAAC,CAAC,kBAAkB;gBAE7F,IAAI,cAAc,IAAI,CAAC,eAAe,EAAE,CAAC;oBACvC,wFAAwF;oBACxF,IAAI,CAAC;wBACH,GAAG,CAAC,IAAI,CAAC,oCAAoC,SAAS,EAAE,CAAC,CAAC;wBAC1D,MAAM,cAAc,CAAC,SAAS,EAAE,EAAE,SAAS,CAAC,CAAC;wBAC7C,GAAG,CAAC,KAAK,CAAC,yCAAyC,SAAS,EAAE,CAAC,CAAC,CAAC,kBAAkB;oBACrF,CAAC;oBAAC,OAAO,CAAC,EAAE,CAAC;wBACX,GAAG,CAAC,IAAI,CAAC,2CAA2C,SAAS,EAAE,CAAC,CAAC,CAAC,kBAAkB;wBACpF,GAAG,CAAC,KAAK,CAAE,CAAW,CAAC,QAAQ,EAAE,CAAC,CAAC;wBACnC,iBAAiB,GAAG,KAAK,CAAC;oBAC5B,CAAC;gBACH,CAAC;qBAAM,CAAC;oBACN,IAAI,eAAe;wBAAE,GAAG,CAAC,IAAI,CAAC,iBAAiB,SAAS,EAAE,CAAC,CAAC;oBAC5D,iBAAiB,GAAG,KAAK,CAAC;gBAC5B,CAAC;gBAED,8CAA8C;gBAC9C,MAAM,IAAI,OAAO,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,UAAU,CAAC,GAAG,EAAE,YAAY,CAAC,CAAC,CAAC;YAC5D,CAAC;YAED,sEAAsE;YACtE,GAAG,CAAC,KAAK,CAAC,8DAA8D,KAAK,EAAE,CAAC,CAAC;YACjF,wBAAwB,GAAG,cAAc,IAAI,CAAC,MAAM,SAAS,CAAC,YAAY,EAAE,CAAC,CAAC;YAC9E,GAAG,CAAC,KAAK,CAAC,mEAAmE,KAAK,EAAE,CAAC,CAAC;YAEtF,IAAI,wBAAwB,EAAE,CAAC;gBAC7B,IAAI,CAAC,eAAe;oBAAE,GAAG,CAAC,IAAI,CAAC,oEAAoE,KAAK,EAAE,CAAC,CAAC,CAAC,kBAAkB;;oBAC1H,GAAG,CAAC,IAAI,CAAC,+FAA+F,KAAK,EAAE,CAAC,CAAC,CAAC,kBAAkB;YAC3I,CAAC;;gBAAM,GAAG,CAAC,IAAI,CAAC,uDAAuD,KAAK,EAAE,CAAC,CAAC,CAAC,kBAAkB;QACrG,CAAC;QACD,GAAG,CAAC,IAAI,CAAC,gBAAgB,KAAK,EAAE,CAAC,CAAC;IACpC,CAAC,CAAA,CAAC,CAAC;IACH,OAAO,KAAK,CAAC;AACf,CAAC,CAAA,CAAC;AAxKW,QAAA,oBAAoB,wBAwK/B","sourcesContent":["import { findLastIndex } from 'lodash';\n\nimport { serialAsyncMap, retryAsync } from '../../utils/async';\nimport { validateUrl } from '../../utils/url';\nimport type { MaybePromise } from '../../utils/types';\n\n// TODO - Clean this up and merge it into PageLib\n\nexport interface ListingLogger {\n debug: (msg: string, data?: any) => void;\n info: (msg: string, data?: any) => void;\n warning: (msg: string, data?: any) => void;\n error: (msg: string, data?: any) => void;\n}\n\nexport interface ListingPageFilter {\n name: string;\n disabled?: boolean;\n initState: () => MaybePromise<boolean>;\n resetState: () => MaybePromise<void>;\n nextState: () => MaybePromise<void>;\n hasNextState: () => MaybePromise<boolean>;\n hasState: () => MaybePromise<boolean>;\n loadState: () => MaybePromise<void>;\n}\n\nexport interface ListingFiltersSetupOptions<Ctx extends object, UrlType> {\n context: ListingPageScraperContext<Ctx, UrlType>;\n filters?: ListingPageFilter[];\n shouldApplyFilter?: (\n context: ListingPageScraperContext<Ctx, UrlType>,\n filter: ListingPageFilter,\n filters: ListingPageFilter[]\n ) => MaybePromise<boolean>;\n onResetFilters?: (context: ListingPageScraperContext<Ctx, UrlType>) => MaybePromise<void>;\n onFiltersLoaded?: (context: ListingPageScraperContext<Ctx, UrlType>) => MaybePromise<void>;\n log: ListingLogger;\n}\n\ntype ListingFilterController = Pick<ListingPageFilter, 'loadState' | 'nextState' | 'hasNextState' | 'hasState'>; // prettier-ignore\n\nexport interface ListingPageScraperContext<Ctx extends object, UrlType> {\n context: Ctx;\n log: ListingLogger;\n startUrl: UrlType;\n filters: ListingPageFilter[];\n /** Use this if you need to load filters again (eg after reloading page manually) */\n loadFilterState: () => MaybePromise<void>;\n /** Call this function from any callback to stop scraping */\n abort: () => void;\n}\n\n// prettier-ignore\nexport interface ListingPageScraperOptions<Ctx extends object, UrlType> extends Omit<ListingFiltersSetupOptions<Ctx, UrlType>, 'context'> {\n context: Ctx;\n startUrls: UrlType[];\n listingCountOnly?: boolean;\n /** Get ID of the current page in the pagination, so it can be logged */\n pageId?: (context: ListingPageScraperContext<Ctx, UrlType>) => MaybePromise<string>;\n log: ListingLogger;\n\n onNavigate?: (context: ListingPageScraperContext<Ctx, UrlType>, url: UrlType) => MaybePromise<void>;\n /**\n * Hook triggered after navigating to the url using Page.goto().\n *\n * One use of this hook is to conditionally disable/enable filters based on the page content.\n **/\n onAfterNavigation?: (context: ListingPageScraperContext<Ctx, UrlType>) => MaybePromise<void>;\n\n /** How many attempts are retried after filters failed to load. Defaults to 3 */\n loadFiltersRetries?: number;\n /**\n * Hook triggered after a failed attempt at loading listings page filters.\n *\n * One use of this hook is to reload the page on failed attemp in case something didn't load correctly.\n **/\n onLoadFiltersError?: (\n context: ListingPageScraperContext<Ctx, UrlType>,\n error: any,\n retryIndex: number\n ) => MaybePromise<void>;\n\n /** Main logic to extract entries from a page */\n extractEntries: (context: ListingPageScraperContext<Ctx, UrlType>, retryIndex: number) => MaybePromise<UrlType[]>;\n /** How many attempts are retried after failed to scrape entries from a listing. Defaults to 3 */\n extractEntriesRetries?: number;\n /**\n * Hook triggered after a failed attempt at scraping entries from a listing.\n *\n * One use of this hook is to reload the page on failed attemp in case something didn't load correctly.\n **/\n onExtractEntriesError?: (\n context: ListingPageScraperContext<Ctx, UrlType>,\n error: any,\n retryIndex: number\n ) => MaybePromise<void>;\n onExtractEntriesDone?: (\n context: ListingPageScraperContext<Ctx, UrlType>,\n entries: UrlType[] | null\n ) => MaybePromise<void>;\n\n /**\n * If goToNextPage hook is defined, it will be called after each page. To indicate that there's no more\n * pages left, throw an error.\n **/\n onGoToNextPage?: (\n context: ListingPageScraperContext<Ctx, UrlType>,\n entries: UrlType[] | null\n ) => MaybePromise<void>;\n /** How long to wait after we've navigated to the next page and before we start extracting? */\n nextPageWait?: number;\n}\n\n/**\n * Given configuration for listing page filters, set up functions to\n * navigate through the different states of filters, to allow to paginate\n * through all states.\n */\nconst setupListingFilters = <Ctx extends object, UrlType>({\n context,\n filters = [],\n shouldApplyFilter,\n onResetFilters,\n onFiltersLoaded,\n log,\n}: ListingFiltersSetupOptions<Ctx, UrlType>): ListingFilterController => {\n let filtersStack: ListingPageFilter[] = filters;\n\n const getNextFilterStateChangeIndex = async () => {\n const hasNextStates = await serialAsyncMap(filtersStack, (filter) => filter.hasNextState());\n return findLastIndex(hasNextStates, (x) => x);\n };\n\n const hasState = async () => {\n const hasStates = await serialAsyncMap(filtersStack, (filter) => filter.hasState());\n return hasStates.some(Boolean);\n };\n\n const hasNextState = async () => {\n const nextFilterStateChangeIndex = await getNextFilterStateChangeIndex();\n return nextFilterStateChangeIndex > -1;\n };\n\n const nextState = async () => {\n // Imagine we have 4 filters, each has 3 states (eg 3 options to select from)\n // We start with all filters in the first state:\n // State 1: F1(1), F2(1), F3(1), F4(1)\n // As we progress, we increment it akin to numbers:\n // State 2: F1(1), F2(1), F3(1), F4(2)\n // State 3: F1(1), F2(1), F3(1), F4(3)\n // State 4: F1(1), F2(1), F3(2), F4(1)\n // All the way to the last state:\n // State n: F1(3), F2(3), F3(3), F4(3)\n //\n // When we want move to a next state, we identify the RIGHT-most filter\n // whose state can be incremented (in this case we select F2):\n // YES YES NO NO\n // State x: F1(1), F2(2), F3(3), F4(3)\n //\n // When we increment a filter state, all the other filter to the RIGHT\n // will be reset:\n // State x: F1(1), F2(2), F3(3), F4(3)\n // State x+1: F1(1), F2(3), F3(1), F4(1)\n\n const initStates = await serialAsyncMap(filtersStack, (filter) => filter.initState());\n if (initStates.some(Boolean)) return log.info('Initialised filters');\n\n const nextFilterStateChangeIndex = await getNextFilterStateChangeIndex();\n if (nextFilterStateChangeIndex === -1)\n throw Error('Cannot select next filter state - reached end of list');\n\n const filterToNextState = filtersStack[nextFilterStateChangeIndex];\n const filtersToReset = filtersStack.slice(nextFilterStateChangeIndex + 1);\n\n log.info('Setting filters to next state');\n await filterToNextState.nextState();\n for (const filter of filtersToReset) {\n await filter.resetState();\n await filter.nextState();\n }\n };\n\n /** Load current filter state in the webpage */\n const loadState = async () => {\n await resetState();\n\n // Load filters one by one, and only if needed\n filtersStack = [];\n for (const filter of filters) {\n const shouldUseFilter = shouldApplyFilter\n ? await shouldApplyFilter(context, filter, filters)\n : true;\n if (!shouldUseFilter) {\n log.info(`Not applying filter \"${filter.name}\" or further filters`);\n break;\n }\n\n if (!filter.disabled) {\n log.info(`Applying filter \"${filter.name}\"`);\n await filter.loadState();\n } else {\n log.info(`Filter \"${filter.name}\" recognised but not applied because it is disabled`);\n }\n\n filtersStack.push(filter);\n }\n\n log.info(`Done loading filters`);\n await onFiltersLoaded?.(context);\n };\n\n /** Reset filter state */\n const resetState = async () => {\n log.info(`Resetting filter state`);\n await onResetFilters?.(context);\n filtersStack = filters;\n log.info(`Resetting filter state done`);\n };\n\n return {\n loadState,\n nextState,\n hasNextState,\n hasState,\n };\n};\n\n/** Get entries from a listing page (eg URLs to profiles that should be scraped later) */\nexport const scrapeListingEntries = async <Ctx extends object, UrlType>(\n options: ListingPageScraperOptions<Ctx, UrlType>\n) => {\n const {\n context,\n startUrls,\n listingCountOnly = false,\n log,\n pageId,\n onNavigate,\n onAfterNavigation,\n\n filters = [],\n shouldApplyFilter,\n loadFiltersRetries = 3,\n onLoadFiltersError = (_, err) => console.error(err),\n onFiltersLoaded,\n onResetFilters,\n\n extractEntries,\n extractEntriesRetries = 3,\n onExtractEntriesError = (_, err) => console.error(err),\n onExtractEntriesDone,\n\n onGoToNextPage,\n nextPageWait = 500,\n } = options;\n\n /** Collection of ALL urls across all pages and startUrls */\n const links: UrlType[] = [];\n\n await serialAsyncMap(startUrls, async (startUrl, index) => {\n if (listingCountOnly && index > 0) return;\n\n const logId = `${startUrl} (${index + 1}/${startUrls.length})`;\n\n let userAskedToStop = false;\n const abort = () => { userAskedToStop = true }; // prettier-ignore\n\n // Prepare context shared across all hooks\n let filterObj: ListingFilterController | null = null;\n const genCtxArg = (): ListingPageScraperContext<Ctx, UrlType> => ({\n context,\n log,\n startUrl,\n filters,\n loadFilterState: filterObj?.loadState ?? (() => {}),\n abort,\n });\n\n log.debug(`Validating URL ${logId}`);\n validateUrl(startUrl as string);\n log.info(`Navigating URL ${logId}`);\n await onNavigate?.(genCtxArg(), startUrl);\n log.debug(`Done navigating to URL ${logId}`);\n\n filterObj = setupListingFilters({\n context: genCtxArg(),\n filters,\n shouldApplyFilter,\n onFiltersLoaded,\n onResetFilters,\n log,\n });\n\n log.debug(`Calling onAfterNavigation callback. URL ${logId}`); // prettier-ignore\n await onAfterNavigation?.(genCtxArg());\n log.debug(`Done calling onAfterNavigation callback. URL ${logId})`); // prettier-ignore\n\n const isUsingFilters = filters.some((filter) => !filter.disabled);\n\n let hasFilterStatesToProcess = true;\n while (hasFilterStatesToProcess && !userAskedToStop) {\n // Filter loop\n // Load filters before we start paginating\n log.info(`Setting up filters for URL ${logId}`);\n await retryAsync(\n async () => {\n if (!filterObj) throw Error(`Filter controller is missing. This should never happen. URL ${logId}`); // prettier-ignore\n\n const filterHasState = await filterObj.hasState();\n if (!isUsingFilters || !filterHasState) {\n log.info(`Not loading filters for URL ${logId}`);\n return;\n }\n\n log.debug(`Loading filters for URL ${logId}`);\n await filterObj.nextState();\n await filterObj.loadState();\n log.debug(`Done loading filters for URL ${logId}`);\n },\n {\n maxRetries: loadFiltersRetries,\n onError: (err, retryIndex) => onLoadFiltersError(genCtxArg(), err, retryIndex),\n }\n );\n\n let nextPageAvailable = true;\n while (nextPageAvailable && !userAskedToStop) {\n // Pagination loop\n let currPageId = 'next page';\n if (pageId) {\n log.debug(`Loading pageId for URL ${logId}`);\n currPageId = await pageId(genCtxArg());\n log.debug(`Done loading pageId for URL ${logId}`);\n }\n const pageLogId = `${logId} (${currPageId})`;\n\n // Extract page links\n log.info(`Extracting links from page ${pageLogId}`);\n const { result } = await retryAsync(\n async (retryIndex) => extractEntries(genCtxArg(), retryIndex),\n {\n maxRetries: extractEntriesRetries,\n onError: (err, retryIndex) => onExtractEntriesError(genCtxArg(), err, retryIndex),\n }\n );\n log.debug(`Done extracting links from page ${pageLogId}`);\n\n const pageLinks = result ?? [];\n links.push(...pageLinks);\n log.info(`Found ${pageLinks.length} links on page ${pageLogId}`);\n\n // Leave after printing the count or on abort\n if (listingCountOnly || userAskedToStop) {\n nextPageAvailable = false;\n if (listingCountOnly) log.info(`Debugging mode. Entries are not scraped. Leaving now. URL ${pageLogId}`); // prettier-ignore\n else if (userAskedToStop) log.info(`Aborting. URL ${pageLogId}`);\n continue;\n }\n\n log.debug(`Calling onExtractEntriesDone callback. URL ${pageLogId}`); // prettier-ignore\n await onExtractEntriesDone?.(genCtxArg(), pageLinks);\n log.debug(`Done calling onExtractEntriesDone callback. URL ${pageLogId}`); // prettier-ignore\n\n if (onGoToNextPage && !userAskedToStop) {\n // If goToNextPage hook is defined, this will be called after each page, until it errors\n try {\n log.info(`Navigating to next page from URL ${pageLogId}`);\n await onGoToNextPage(genCtxArg(), pageLinks);\n log.debug(`Done navigating to next page from URL ${pageLogId}`); // prettier-ignore\n } catch (e) {\n log.info(`Failed navigating to next page from URL ${pageLogId}`); // prettier-ignore\n log.error((e as Error).toString());\n nextPageAvailable = false;\n }\n } else {\n if (userAskedToStop) log.info(`Aborting. URL ${pageLogId}`);\n nextPageAvailable = false;\n }\n\n // Wait before we start scraping the next page\n await new Promise((res) => setTimeout(res, nextPageWait));\n }\n\n // Break out if we're not using filters or we've gone through them all\n log.debug(`Checking if there are more filter states available for URL ${logId}`);\n hasFilterStatesToProcess = isUsingFilters && (await filterObj.hasNextState());\n log.debug(`Done checking if there are more filter states available for URL ${logId}`);\n\n if (hasFilterStatesToProcess) {\n if (!userAskedToStop) log.info(`Will repeat scraping this URL with different filter setting. URL ${logId}`); // prettier-ignore\n else log.info(`There are unprocessed filter setting remaining for this URL, but stopping due to abort. URL ${logId}`); // prettier-ignore\n } else log.info(`No filter setting remain for scraping this URL. URL ${logId}`); // prettier-ignore\n }\n log.info(`Finished URL ${logId}`);\n });\n return links;\n};\n"]}
@@ -67,19 +67,19 @@ export interface RunCrawleeOneOptions<TType extends CrawlerType, T extends Crawl
67
67
  * 9) Apify context (e.g. calling `Actor.getInput`) can be replaced with custom
68
68
  * implementation using the `io` option.
69
69
  */
70
- export declare const runCrawleeOne: <TType extends "basic" | "http" | "cheerio" | "jsdom" | "playwright" | "puppeteer", T extends CrawleeOneCtx<CrawlerMeta<TType>["context"], string, Record<string, any>, CrawleeOneIO<object, object, object>, import("../..").CrawleeOneTelemetry<any, any>>>(args: RunCrawleeOneOptions<TType, T>) => Promise<void>;
70
+ export declare const runCrawleeOne: <TType extends "basic" | "http" | "jsdom" | "cheerio" | "playwright" | "puppeteer", T extends CrawleeOneCtx<CrawlerMeta<TType>["context"], string, Record<string, any>, CrawleeOneIO<object, object, object>, import("../..").CrawleeOneTelemetry<any, any>>>(args: RunCrawleeOneOptions<TType, T>) => Promise<void>;
71
71
  /** Given the actor input, create common crawler options. */
72
- export declare const createHttpCrawlerOptions: <T extends CrawleeOneCtx<import("crawlee").CrawlingContext<import("crawlee").BasicCrawler<import("crawlee").BasicCrawlingContext<import("crawlee").Dictionary>> | import("crawlee").PuppeteerCrawler | import("crawlee").PlaywrightCrawler | import("crawlee").JSDOMCrawler | import("crawlee").CheerioCrawler | import("crawlee").HttpCrawler<import("crawlee").InternalHttpCrawlingContext<any, any, import("crawlee").HttpCrawler<any>>>, import("crawlee").Dictionary>, string, Record<string, any>, CrawleeOneIO<object, object, object>, import("../..").CrawleeOneTelemetry<any, any>>, TOpts extends BasicCrawlerOptions<T["context"]>>({ input, defaults, overrides, }: {
72
+ export declare const createHttpCrawlerOptions: <T extends CrawleeOneCtx<import("crawlee").CrawlingContext<import("crawlee").JSDOMCrawler | import("crawlee").CheerioCrawler | import("crawlee").PlaywrightCrawler | import("crawlee").PuppeteerCrawler | import("crawlee").BasicCrawler<import("crawlee").BasicCrawlingContext<import("crawlee").Dictionary>> | import("crawlee").HttpCrawler<import("crawlee").InternalHttpCrawlingContext<any, any, import("crawlee").HttpCrawler<any>>>, import("crawlee").Dictionary>, string, Record<string, any>, CrawleeOneIO<object, object, object>, import("../..").CrawleeOneTelemetry<any, any>>, TOpts extends BasicCrawlerOptions<T["context"]>>({ input, defaults, overrides, }: {
73
73
  /** Actor input */
74
- input: T["input"] | null;
74
+ input: T['input'] | null;
75
75
  /**
76
76
  * Default config options set by us. These may be overriden
77
77
  * by values from actor input (set by user).
78
78
  */
79
- defaults?: TOpts | undefined;
79
+ defaults?: TOpts;
80
80
  /**
81
81
  * These config options will overwrite both the default and user
82
82
  * options. This is useful for hard-setting values e.g. in tests.
83
83
  */
84
- overrides?: TOpts | undefined;
84
+ overrides?: TOpts;
85
85
  }) => Partial<TOpts> & import("lodash").Dictionary<TOpts["requestHandler"] | TOpts["handleRequestFunction"] | TOpts["requestList"] | TOpts["requestQueue"] | TOpts["requestHandlerTimeoutSecs"] | TOpts["handleRequestTimeoutSecs"] | TOpts["errorHandler"] | TOpts["failedRequestHandler"] | TOpts["handleFailedRequestFunction"] | TOpts["maxRequestRetries"] | TOpts["maxRequestsPerCrawl"] | TOpts["autoscaledPoolOptions"] | TOpts["minConcurrency"] | TOpts["maxConcurrency"] | TOpts["maxRequestsPerMinute"] | TOpts["keepAlive"] | TOpts["useSessionPool"] | TOpts["sessionPoolOptions"] | TOpts["loggingInterval"] | TOpts["log"]>;
@@ -158,7 +158,7 @@ const createCrawleeOne = (config) => __awaiter(void 0, void 0, void 0, function*
158
158
  if (config.validateInput)
159
159
  yield config.validateInput(input);
160
160
  const { logLevel } = (input !== null && input !== void 0 ? input : {});
161
- const log = new crawlee_1.Log({ level: logLevel ? log_1.logLevelToCrawlee[logLevel] : undefined });
161
+ const log = new crawlee_1.Log({ level: logLevel ? log_1.logLevelToCrawlee[logLevel] : crawlee_1.LogLevel.INFO });
162
162
  // This is context that is available to options that use initialization function
163
163
  const getConfig = () => (Object.assign(Object.assign({}, config), { input, state, io }));
164
164
  // Set up proxy
@@ -295,7 +295,7 @@ const createScopedCrawlerRun = (getActor) => {
295
295
  // Clear cache if it was set from the input
296
296
  if (outputCacheStoreId && outputCacheActionOnResult === 'overwrite') {
297
297
  const store = yield actor.io.openKeyValueStore(outputCacheStoreId);
298
- yield store.drop();
298
+ yield store.clear();
299
299
  }
300
300
  yield ((_b = genHookFn(actor, outputTransformBefore, 'outputTransformBefore')) === null || _b === void 0 ? void 0 : _b()); // prettier-ignore
301
301
  yield ((_c = genHookFn(actor, outputFilterBefore, 'outputFilterBefore')) === null || _c === void 0 ? void 0 : _c()); // prettier-ignore