crawlee-one 2.0.0 → 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +30 -31
- package/dist/cjs/api.d.ts +2 -2
- package/dist/cjs/api.js.map +1 -1
- package/dist/cjs/cli/cli.js +4 -4
- package/dist/cjs/cli/cli.js.map +1 -1
- package/dist/cjs/cli/commands/codegen.js.map +1 -1
- package/dist/cjs/lib/actions/scrapeListing.js.map +1 -1
- package/dist/cjs/lib/actor/actor.d.ts +5 -5
- package/dist/cjs/lib/actor/actor.js +2 -2
- package/dist/cjs/lib/actor/actor.js.map +1 -1
- package/dist/cjs/lib/error/errorHandler.d.ts +6 -8
- package/dist/cjs/lib/error/errorHandler.js +3 -3
- package/dist/cjs/lib/error/errorHandler.js.map +1 -1
- package/dist/cjs/lib/input.d.ts +2 -2
- package/dist/cjs/lib/input.js +28 -28
- package/dist/cjs/lib/input.js.map +1 -1
- package/dist/cjs/lib/integrations/apify.js.map +1 -1
- package/dist/cjs/lib/io/dataset.js.map +1 -1
- package/dist/cjs/lib/io/pushData.js +3 -3
- package/dist/cjs/lib/io/pushData.js.map +1 -1
- package/dist/cjs/lib/io/pushRequests.d.ts +1 -1
- package/dist/cjs/lib/io/pushRequests.js.map +1 -1
- package/dist/cjs/lib/log.d.ts +1 -1
- package/dist/cjs/lib/migrate/localMigrator.js.map +1 -1
- package/dist/cjs/lib/router/router.d.ts +14 -18
- package/dist/cjs/lib/router/router.js +2 -2
- package/dist/cjs/lib/router/router.js.map +1 -1
- package/dist/cjs/lib/telemetry/sentry.d.ts +1 -1
- package/dist/cjs/lib/test/actor.d.ts +9 -9
- package/dist/cjs/lib/test/actor.js +2 -2
- package/dist/cjs/lib/test/actor.js.map +1 -1
- package/dist/cjs/lib/test/mockApifyClient.d.ts +6 -6
- package/dist/cjs/lib/test/mockApifyClient.js.map +1 -1
- package/dist/cjs/utils/async.js +1 -1
- package/dist/cjs/utils/async.js.map +1 -1
- package/dist/cjs/utils/error.d.ts +1 -1
- package/dist/cjs/utils/package.js.map +1 -1
- package/dist/cjs/utils/url.js.map +1 -1
- package/dist/cjs/utils/valueMonitor.js.map +1 -1
- package/package.json +21 -20
package/README.md
CHANGED
|
@@ -183,7 +183,7 @@ CrawleeOne also includes helpers and types for:
|
|
|
183
183
|
- Privacy compliance
|
|
184
184
|
- Metamorphing
|
|
185
185
|
|
|
186
|
-
CrawleeOne supports many common and advanced web scraping use cases. See the [Use cases](#use-cases) for the overview of the use cases.
|
|
186
|
+
CrawleeOne supports many common and advanced web scraping use cases. See the [Use cases](#playbook--use-cases) for the overview of the use cases.
|
|
187
187
|
|
|
188
188
|
See the section [Usage (for end users)](#usage-for-end-users) for how CrawleeOne looks from user's perspective.
|
|
189
189
|
|
|
@@ -260,6 +260,7 @@ await crawleeOne({
|
|
|
260
260
|
// - Downstream crawler with Apify's "metamorph".
|
|
261
261
|
//
|
|
262
262
|
// See the Actor input reference for all input fields.
|
|
263
|
+
// https://github.com/JuroOravec/crawlee-one/blob/main/docs/reference-input.md
|
|
263
264
|
//
|
|
264
265
|
// Specify input if you plan to use the crawler yourself,
|
|
265
266
|
// otherwise use `inputDefaults` or set `mergeInput`.
|
|
@@ -287,9 +288,9 @@ await crawleeOne({
|
|
|
287
288
|
// E.g. if `type: 'playwright'`, then this config is used as:
|
|
288
289
|
// `new PlaywrightCrawler(crawlerConfig);`
|
|
289
290
|
//
|
|
290
|
-
//
|
|
291
|
+
// Use `crawlerConfig` for config that cannot be configured via `input`,
|
|
291
292
|
// or when you need the crawler to use specific settings and you don't
|
|
292
|
-
// want users to override
|
|
293
|
+
// want users to override.
|
|
293
294
|
crawlerConfig: {
|
|
294
295
|
maxRequestsPerMinute: 120,
|
|
295
296
|
requestHandlerTimeoutSecs: 180,
|
|
@@ -314,9 +315,7 @@ await crawleeOne({
|
|
|
314
315
|
handler: async (ctx) => {
|
|
315
316
|
const { $, request, pushData, pushRequests } = ctx;
|
|
316
317
|
// Scrape data from the page
|
|
317
|
-
const data = [
|
|
318
|
-
/* ... */
|
|
319
|
-
];
|
|
318
|
+
const data = [ ... ];
|
|
320
319
|
|
|
321
320
|
// Save the scraped data. When you save data with `ctx.pushData`,
|
|
322
321
|
// then you can filter, transform, limit, redact, and more.
|
|
@@ -338,8 +337,8 @@ await crawleeOne({
|
|
|
338
337
|
},
|
|
339
338
|
|
|
340
339
|
hooks: {
|
|
341
|
-
// By default, CrawleeOne calls `Crawler.run()`
|
|
342
|
-
// If you
|
|
340
|
+
// By default, once ready, CrawleeOne calls `actor.runCrawler` (which calls `Crawler.run()`)
|
|
341
|
+
// If you supply your own `onReady` callback, you have to call `actor.runCrawler` yourself.
|
|
343
342
|
onReady: async (inst) => {
|
|
344
343
|
// E.g. in this example, user can select to scrape all entries
|
|
345
344
|
// or a certain kind by setting a custom `datasetType` input field.
|
|
@@ -357,7 +356,7 @@ await crawleeOne({
|
|
|
357
356
|
onAfterHandler: (ctx) => { /* ... */ },
|
|
358
357
|
|
|
359
358
|
// If you run the crawler on Apify, or otherwise provide the crawler to others,
|
|
360
|
-
//
|
|
359
|
+
// then it's a good practice to validate their input.
|
|
361
360
|
validateInput: (input) => {
|
|
362
361
|
const schema = Joi.object({ ... });
|
|
363
362
|
Joi.assert(input, schema);
|
|
@@ -367,7 +366,7 @@ await crawleeOne({
|
|
|
367
366
|
// Configure the Crawlee proxy. See Crawlee's `ProxyConfiguration`
|
|
368
367
|
// By default, no proxy is used.
|
|
369
368
|
//
|
|
370
|
-
// NOTE: DO NOT set proxy if you are deploying the crawler
|
|
369
|
+
// NOTE: DO NOT set proxy here if you are deploying the crawler to Apify
|
|
371
370
|
// and you want the user to specify the proxy!
|
|
372
371
|
proxy: Actor.createProxyConfiguration({ ... }),
|
|
373
372
|
|
|
@@ -393,8 +392,9 @@ await crawleeOne({
|
|
|
393
392
|
//
|
|
394
393
|
// You don't need to override this in most of the cases.
|
|
395
394
|
//
|
|
396
|
-
// By default, the data
|
|
397
|
-
//
|
|
395
|
+
// By default, Apify saves the data locally in `./storage` directory, for
|
|
396
|
+
// as long as the crawler is not running from within the Apify's platform.
|
|
397
|
+
// And if the crawler runs in Apify's platform then it uses
|
|
398
398
|
// Apify's cloud storage.
|
|
399
399
|
//
|
|
400
400
|
// See the docs for `CrawleeOneIO`.
|
|
@@ -406,16 +406,14 @@ await crawleeOne({
|
|
|
406
406
|
});
|
|
407
407
|
```
|
|
408
408
|
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
> - NOTE: When you use `pushData` from within a handler, you omit the first argument (`ctx`).
|
|
418
|
-
> - [pushRequests](./docs/typedoc/modules.md#pushrequests)
|
|
409
|
+
You can find the full type definition of `crawleeOne` and its arguments here:
|
|
410
|
+
- [crawleeOne](./docs/typedoc/modules.md#crawleeone)
|
|
411
|
+
- [CrawleeOneArgs](./docs/typedoc/interfaces/CrawleeOneArgs.md)
|
|
412
|
+
|
|
413
|
+
To learn more about `pushData` and `pushRequests`, see:
|
|
414
|
+
- [pushData](./docs/typedoc/modules.md#pushdata)
|
|
415
|
+
- NOTE: When you use `pushData` from within a handler, you omit the first argument (`ctx`).
|
|
416
|
+
- [pushRequests](./docs/typedoc/modules.md#pushrequests)
|
|
419
417
|
|
|
420
418
|
### Route handler context
|
|
421
419
|
|
|
@@ -442,13 +440,13 @@ await crawleeOne({
|
|
|
442
440
|
ctx.response
|
|
443
441
|
const $ = ctx.parseWithCheerio();
|
|
444
442
|
// And more...
|
|
445
|
-
|
|
443
|
+
|
|
446
444
|
// Extra props
|
|
447
|
-
|
|
445
|
+
|
|
448
446
|
// 1. CrawleeOne instance (type: CrawleeOneActorInst):
|
|
449
447
|
// - Save scraped items
|
|
450
448
|
await ctx.actor.pushData(scrapedItems);
|
|
451
|
-
|
|
449
|
+
|
|
452
450
|
// - Enqueue more URLs to scrape
|
|
453
451
|
const id = Math.floor(Math.random() * 100);
|
|
454
452
|
const url = `https://example.com/resource/${id}`;
|
|
@@ -493,7 +491,6 @@ See either of the two projects as examples:
|
|
|
493
491
|
- [SKCRIS Scraper](https://github.com/JuroOravec/apify-actor-skcris)
|
|
494
492
|
- [Profesia.sk Scraper](https://github.com/JuroOravec/apify-actor-profesia-sk)
|
|
495
493
|
|
|
496
|
-
|
|
497
494
|
#### 1. Write the crawler with CrawleeOne
|
|
498
495
|
|
|
499
496
|
Either use the example projects above or use your own boilerplate project, but remember that Apify requires you to Dockerize the
|
|
@@ -518,9 +515,9 @@ For that, you will need to:
|
|
|
518
515
|
```
|
|
519
516
|
|
|
520
517
|
[`apify-actor-config`](https://github.com/JuroOravec/apify-actor-config) is a sister package focused solely on working with and generating
|
|
521
|
-
|
|
518
|
+
Apify's `actor.json` config files.
|
|
522
519
|
|
|
523
|
-
2. Write a JS/TS file where you will only define your config and export it as the
|
|
520
|
+
2. Write a JS/TS file where you will only define your config and export it as the _default_ export.
|
|
524
521
|
|
|
525
522
|
[See here the example config file from Profesia.sk Scraper](https://github.com/JuroOravec/apify-actor-profesia-sk/blob/main/src/config.ts).
|
|
526
523
|
|
|
@@ -545,7 +542,7 @@ For that, you will need to:
|
|
|
545
542
|
// ...
|
|
546
543
|
input: inputSchema,
|
|
547
544
|
});
|
|
548
|
-
|
|
545
|
+
|
|
549
546
|
export default config;
|
|
550
547
|
```
|
|
551
548
|
|
|
@@ -559,7 +556,7 @@ For that, you will need to:
|
|
|
559
556
|
3. Build / transpile the config to vanilla JS if necessary.
|
|
560
557
|
|
|
561
558
|
In Profesia.sk Scraper, the config is defined as a TypeScript file, but `apify-actor-config` currently supports only JS files.
|
|
562
|
-
|
|
559
|
+
|
|
563
560
|
So if you are also using anything other than plain JavaScript, then you will need to build / transpile your project. Do so only once you're happy with the input fields and their defaults.
|
|
564
561
|
|
|
565
562
|
4. Generate `actor.json` file
|
|
@@ -792,6 +789,7 @@ interface CrawleeOneTelemetry {
|
|
|
792
789
|
```
|
|
793
790
|
|
|
794
791
|
See existing integrations for inspiration:
|
|
792
|
+
|
|
795
793
|
- [Sentry](./src/lib/telemetry/sentry.ts)
|
|
796
794
|
|
|
797
795
|
Based on the above, here's an example of a custom telemetry implementation
|
|
@@ -871,10 +869,11 @@ interface CrawleeOneIO {
|
|
|
871
869
|
```
|
|
872
870
|
|
|
873
871
|
See existing integrations for inspiration:
|
|
872
|
+
|
|
874
873
|
- [Apify](./src/lib/integrations/apify.ts)
|
|
875
874
|
|
|
876
875
|
Based on the above, here's an example of a custom CrawleeOneIO implementation
|
|
877
|
-
that overrides the datasets to send them to a custom HTTP endpoint.
|
|
876
|
+
that overrides the datasets to send them to a custom HTTP endpoint.
|
|
878
877
|
|
|
879
878
|
```ts
|
|
880
879
|
import type { CrawleeOneIO, apifyIO } from 'crawlee-one';
|
package/dist/cjs/api.d.ts
CHANGED
|
@@ -3,7 +3,7 @@ import type { AllActorInputs } from './lib/input';
|
|
|
3
3
|
import type { CrawleeOneRouteHandler, CrawleeOneRoute } from './lib/router/types';
|
|
4
4
|
import type { CrawlerMeta, CrawlerType } from './types';
|
|
5
5
|
import type { MaybePromise } from './utils/types';
|
|
6
|
-
/** Args
|
|
6
|
+
/** Args object passed to `crawleeOne` */
|
|
7
7
|
export interface CrawleeOneArgs<TType extends CrawlerType, T extends CrawleeOneCtx<CrawlerMeta<TType>['context']>> {
|
|
8
8
|
/** Type specifying the Crawlee crawler class, input options, and more. */
|
|
9
9
|
type: CrawlerType;
|
|
@@ -104,4 +104,4 @@ export interface CrawleeOneArgs<TType extends CrawlerType, T extends CrawleeOneC
|
|
|
104
104
|
};
|
|
105
105
|
routes: Record<T['labels'], CrawleeOneRoute<T, CrawleeOneActorRouterCtx<T>>>;
|
|
106
106
|
}
|
|
107
|
-
export declare const crawleeOne: <TType extends "basic" | "http" | "
|
|
107
|
+
export declare const crawleeOne: <TType extends "basic" | "http" | "jsdom" | "cheerio" | "playwright" | "puppeteer", T extends CrawleeOneCtx<CrawlerMeta<TType>["context"], string, Record<string, any>, import(".").CrawleeOneIO<object, object, object>, import(".").CrawleeOneTelemetry<any, any>> = CrawleeOneCtx<CrawlerMeta<TType>["context"], string, Record<string, any>, import(".").CrawleeOneIO<object, object, object>, import(".").CrawleeOneTelemetry<any, any>>>(args: CrawleeOneArgs<TType, T>) => Promise<void>;
|
package/dist/cjs/api.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"api.js","sourceRoot":"","sources":["../../src/api.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,6CAAkD;AAQlD,mCAAmD;AAmH5C,MAAM,UAAU,GAAG,CAMxB,IAA8B,EAC9B,EAAE;;IACF,MAAM,kBAAkB,GAAG,CAAC,OAA+D,EAAE,EAAE;QAC7F,MAAM,YAAY,GAAG,CAAO,
|
|
1
|
+
{"version":3,"file":"api.js","sourceRoot":"","sources":["../../src/api.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,6CAAkD;AAQlD,mCAAmD;AAmH5C,MAAM,UAAU,GAAG,CAMxB,IAA8B,EAC9B,EAAE;;IACF,MAAM,kBAAkB,GAAG,CAAC,OAA+D,EAAE,EAAE;QAC7F,MAAM,YAAY,GAAG,CAAO,GAAQ,EAAE,EAAE;;YACtC,MAAM,CAAA,MAAA,MAAA,IAAI,CAAC,KAAK,0CAAE,eAAe,mDAAG,GAAU,CAAC,CAAA,CAAC;YAChD,MAAM,OAAO,CAAC,GAAG,CAAC,CAAC;YACnB,MAAM,CAAA,MAAA,MAAA,IAAI,CAAC,KAAK,0CAAE,cAAc,mDAAG,GAAU,CAAC,CAAA,CAAC;QACjD,CAAC,CAAA,CAAC;QACF,OAAO,YAAY,CAAC;IACtB,CAAC,CAAC;IAEF,OAAO,IAAA,qBAAa,EAAW;QAC7B,SAAS,EAAE,IAAI,CAAC,IAAI;QACpB,SAAS,EAAE,IAAI,CAAC,IAAa;QAC7B,qBAAqB,EAAE,IAAI,CAAC,qBAAqB;QACjD,sBAAsB,EAAE,IAAI,CAAC,aAAa;QAC1C,WAAW,EAAE;YACX,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,EAAE,EAAE,IAAI,CAAC,EAAE;YAEX,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,aAAa,EAAE,IAAI,CAAC,aAAa;YACjC,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,aAAa,EAAE,MAAA,IAAI,CAAC,KAAK,0CAAE,aAAa;YAExC,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,oBAAoB,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE;;gBAAC,OAAA;oBACnC,IAAA,4BAAsB,EAAC,MAAA,KAAK,aAAL,KAAK,uBAAL,KAAK,CAAE,QAAQ,mCAAI,MAAM,CAAC;oBACjD,kBAAyB;iBAC1B,CAAA;aAAA;SACF;QACD,OAAO,EAAE,CAAO,KAAK,EAAE,EAAE;;YACvB,MAAM,OAAO,GAAG,MAAA,MAAA,IAAI,CAAC,KAAK,0CAAE,OAAO,mCAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,UAAU,EAAE,CAAC,CAAC;YACvE,MAAM,OAAO,CAAC,KAAK,CAAC,CAAC;QACvB,CAAC,CAAA;KACF,CAAC,CAAC;AACL,CAAC,CAAC;AA5CW,QAAA,UAAU,cA4CrB","sourcesContent":["import { runCrawleeOne } from './lib/actor/actor';\nimport type {\n CrawleeOneActorInst,\n CrawleeOneActorDef,\n CrawleeOneActorRouterCtx,\n CrawleeOneCtx,\n} from './lib/actor/types';\nimport type { AllActorInputs } from './lib/input';\nimport { logLevelHandlerWrapper } from './lib/log';\nimport type { CrawleeOneRouteHandler, CrawleeOneRoute } from './lib/router/types';\nimport type { CrawlerMeta, CrawlerType } from './types';\nimport type { MaybePromise } from './utils/types';\n\n/** Args object passed to `crawleeOne` */\nexport interface CrawleeOneArgs<\n TType extends CrawlerType,\n T extends CrawleeOneCtx<CrawlerMeta<TType>['context']>\n> {\n /** Type specifying the Crawlee crawler class, input options, and more. */\n type: CrawlerType;\n /** Unique name of the crawler instance. The name may be used in codegen and logging. */\n name?: string;\n\n /** Crawlee crawler configuration that CANNOT be overriden via `input` and `crawlerConfigDefaults` */\n crawlerConfig?: Omit<CrawlerMeta<TType>['options'], 'requestHandler'>;\n /** Crawlee crawler configuration that CAN be overriden via `input` and `crawlerConfig` */\n crawlerConfigDefaults?: Omit<CrawlerMeta<TType>['options'], 'requestHandler'>;\n\n /**\n * If `mergeInput` is truthy, will merge input settings from `inputDefaults`, `input`,\n * and `io.getInput()`.\n * \n * ```js\n * { ...inputDefaults, ...io.getInput(), ...input }\n * ```\n * \n * If `mergeInput` is falsy, `io.getInput()` is ignored if `input` is provided. So the input is either:\n * \n * ```js\n * { ...inputDefaults, ...io.getInput() } // If `input` is not defined\n * ```\n * \n * OR\n * \n * ```js\n * { ...inputDefaults, ...input } // If `input` is defined\n * ```\n * \n * Alternatively, you can supply your own function that merges the sources:\n * \n * ```js\n * {\n * // `mergeInput` can be also async\n * mergeInput: ({ defaults, overrides, env }) => {\n * // This is same as `mergeInput: true`\n * return { ...defaults, ...env, ...overrides };\n * },\n * }\n * ```\n */\n mergeInput?: boolean | ((sources: {\n defaults: Partial<AllActorInputs>;\n overrides: Partial<AllActorInputs>;\n env: Partial<AllActorInputs>;\n }) => MaybePromise<Partial<AllActorInputs>>);\n /** Input configuration that CANNOT be overriden via `inputDefaults` and `io.getInput()` */\n input?: Partial<AllActorInputs>;\n /** Input configuration that CAN be overriden via `input` and `io.getInput()` */\n inputDefaults?: Partial<AllActorInputs>;\n\n // /////// Override services /////////\n /**\n * Configure the Crawlee proxy.\n *\n * See {@link ProxyConfiguration}\n */\n proxy?: CrawleeOneActorDef<T>['proxy'];\n /**\n * Provide a telemetry instance that is used for tracking errors.\n *\n * See {@link CrawleeOneTelemetry}\n */\n telemetry?: CrawleeOneActorDef<T>['telemetry'];\n /**\n * Provide an instance that is responsible for state management:\n * - Adding scraped data to datasets\n * - Adding and removing requests to/from queues\n * - Cache storage\n * \n * This is an API based on Apify's `Actor` utility class, which is also\n * the default.\n * \n * You don't need to override this in most of the cases.\n * \n * By default, the data is saved and kept locally in\n * `./storage` directory. And if the cralwer runs in Apify's platform\n * then it will use Apify's cloud for storage.\n *\n * See {@link CrawleeOneIO}\n */\n io?: CrawleeOneActorDef<T>['io'];\n /**\n * Provide a custom router instance.\n * \n * By default, router is created as:\n * ```ts\n * import { Router } from 'crawlee';\n * Router.create(),\n * ```\n *\n * See {@link Router}\n */\n router?: CrawleeOneActorDef<T>['router'];\n\n hooks?: {\n onReady?: (actor: CrawleeOneActorInst<T>) => MaybePromise<void>;\n validateInput?: (input: AllActorInputs | null) => MaybePromise<void>;\n onBeforeHandler?: CrawleeOneRouteHandler<T, CrawleeOneActorRouterCtx<T>>;\n onAfterHandler?: CrawleeOneRouteHandler<T, CrawleeOneActorRouterCtx<T>>;\n };\n routes: Record<T['labels'], CrawleeOneRoute<T, CrawleeOneActorRouterCtx<T>>>;\n} // prettier-ignore\n\nexport const crawleeOne = <\n TType extends CrawlerType,\n T extends CrawleeOneCtx<CrawlerMeta<TType>['context']> = CrawleeOneCtx<\n CrawlerMeta<TType>['context']\n >\n>(\n args: CrawleeOneArgs<TType, T>\n) => {\n const hookHandlerWrapper = (handler: CrawleeOneRouteHandler<T, CrawleeOneActorRouterCtx<T>>) => {\n const innerHandler = async (ctx: any) => {\n await args.hooks?.onBeforeHandler?.(ctx as any);\n await handler(ctx);\n await args.hooks?.onAfterHandler?.(ctx as any);\n };\n return innerHandler;\n };\n\n return runCrawleeOne<TType, T>({\n actorName: args.name,\n actorType: args.type as TType,\n crawlerConfigDefaults: args.crawlerConfigDefaults,\n crawlerConfigOverrides: args.crawlerConfig,\n actorConfig: {\n telemetry: args.telemetry,\n router: args.router,\n proxy: args.proxy,\n io: args.io,\n\n input: args.input,\n inputDefaults: args.inputDefaults,\n mergeInput: args.mergeInput,\n validateInput: args.hooks?.validateInput,\n\n routes: args.routes,\n routeHandlerWrappers: ({ input }) => [\n logLevelHandlerWrapper(input?.logLevel ?? 'info'),\n hookHandlerWrapper as any,\n ],\n },\n onReady: async (actor) => {\n const onReady = args.hooks?.onReady ?? ((actor) => actor.runCrawler());\n await onReady(actor);\n },\n });\n};\n"]}
|
package/dist/cjs/cli/cli.js
CHANGED
|
@@ -33,7 +33,7 @@ commander_1.program
|
|
|
33
33
|
|
|
34
34
|
Example call:
|
|
35
35
|
$ crawlee-one generate -c ./path/to/config-file -o ./path/to/output.ts`)
|
|
36
|
-
.action((
|
|
36
|
+
.action((_a) => __awaiter(void 0, [_a], void 0, function* ({ config: configFile, out: outFile }) {
|
|
37
37
|
yield (0, codegen_1.generateTypes)(outFile, configFile);
|
|
38
38
|
}));
|
|
39
39
|
commander_1.program
|
|
@@ -44,7 +44,7 @@ commander_1.program
|
|
|
44
44
|
|
|
45
45
|
Example call:
|
|
46
46
|
$ crawlee-one validate -c ./path/to/config`)
|
|
47
|
-
.action((
|
|
47
|
+
.action((_b) => __awaiter(void 0, [_b], void 0, function* ({ config: configPath }) {
|
|
48
48
|
const config = yield (0, config_1.loadConfig)(configPath);
|
|
49
49
|
(0, config_1.validateConfig)(config);
|
|
50
50
|
console.log('CrawleeOne config is OK!');
|
|
@@ -60,7 +60,7 @@ commander_1.program
|
|
|
60
60
|
|
|
61
61
|
Example call:
|
|
62
62
|
$ crawlee-one migrate -d ./path/to/migrations-dir -t v1`)
|
|
63
|
-
.action((
|
|
63
|
+
.action((_c) => __awaiter(void 0, [_c], void 0, function* ({ dir, target, extension, delimeter }) {
|
|
64
64
|
const migrationsDir = path_1.default.resolve(process.cwd(), dir);
|
|
65
65
|
const { migrate } = (0, localMigrator_1.createLocalMigrator)({ migrationsDir, extension, delimeter });
|
|
66
66
|
yield migrate(target);
|
|
@@ -76,7 +76,7 @@ commander_1.program
|
|
|
76
76
|
|
|
77
77
|
Example call:
|
|
78
78
|
$ crawlee-one unmigrate -d ./path/to/migrations-dir -t v1`)
|
|
79
|
-
.action((
|
|
79
|
+
.action((_d) => __awaiter(void 0, [_d], void 0, function* ({ dir, target, extension, delimeter }) {
|
|
80
80
|
const migrationsDir = path_1.default.resolve(process.cwd(), dir);
|
|
81
81
|
const { unmigrate } = (0, localMigrator_1.createLocalMigrator)({ migrationsDir, extension, delimeter });
|
|
82
82
|
yield unmigrate(target);
|
package/dist/cjs/cli/cli.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"cli.js","sourceRoot":"","sources":["../../../src/cli/cli.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;AAAA,yCAAoC;AACpC,gDAAwB;AAExB,8CAAsD;AACtD,gEAAmE;AACnE,8CAA+D;AAC/D,gDAAmD;AAEnD,MAAM,OAAO,GAAG,IAAA,4BAAkB,EAAC,MAAM,EAAE,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC,CAAC;AAEhE,mBAAO,CAAC,EAAE;KACP,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC;KAClB,WAAW,CAAC,8BAA8B,CAAC;KAC3C,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;AAE5B,mBAAO;KACJ,OAAO,CAAC,UAAU,CAAC;KACnB,WAAW,CAAC,2CAA2C,CAAC;KACxD,MAAM,CAAC,2BAA2B,EAAE,qBAAqB,CAAC;KAC1D,cAAc,CAAC,wBAAwB,EAAE,qBAAqB,CAAC;KAC/D,WAAW,CACV,OAAO,EACP;;;yEAGqE,CACtE;KACA,MAAM,CAAC,
|
|
1
|
+
{"version":3,"file":"cli.js","sourceRoot":"","sources":["../../../src/cli/cli.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;AAAA,yCAAoC;AACpC,gDAAwB;AAExB,8CAAsD;AACtD,gEAAmE;AACnE,8CAA+D;AAC/D,gDAAmD;AAEnD,MAAM,OAAO,GAAG,IAAA,4BAAkB,EAAC,MAAM,EAAE,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC,CAAC;AAEhE,mBAAO,CAAC,EAAE;KACP,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC;KAClB,WAAW,CAAC,8BAA8B,CAAC;KAC3C,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;AAE5B,mBAAO;KACJ,OAAO,CAAC,UAAU,CAAC;KACnB,WAAW,CAAC,2CAA2C,CAAC;KACxD,MAAM,CAAC,2BAA2B,EAAE,qBAAqB,CAAC;KAC1D,cAAc,CAAC,wBAAwB,EAAE,qBAAqB,CAAC;KAC/D,WAAW,CACV,OAAO,EACP;;;yEAGqE,CACtE;KACA,MAAM,CAAC,KAA6C,EAAE,4CAAxC,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,EAAE,OAAO,EAAE;IACjD,MAAM,IAAA,uBAAa,EAAC,OAAO,EAAE,UAAU,CAAC,CAAC;AAC3C,CAAC,CAAA,CAAC,CAAC;AAEL,mBAAO;KACJ,OAAO,CAAC,UAAU,CAAC;KACnB,WAAW,CAAC,4BAA4B,CAAC;KACzC,cAAc,CAAC,2BAA2B,EAAE,qBAAqB,CAAC;KAClE,WAAW,CACV,OAAO,EACP;;;6CAGyC,CAC1C;KACA,MAAM,CAAC,KAA+B,EAAE,4CAA1B,EAAE,MAAM,EAAE,UAAU,EAAE;IACnC,MAAM,MAAM,GAAG,MAAM,IAAA,mBAAU,EAAC,UAAU,CAAC,CAAC;IAC5C,IAAA,uBAAc,EAAC,MAAM,CAAC,CAAC;IACvB,OAAO,CAAC,GAAG,CAAC,0BAA0B,CAAC,CAAC;AAC1C,CAAC,CAAA,CAAC,CAAC;AAEL,mBAAO;KACJ,OAAO,CAAC,SAAS,CAAC;KAClB,WAAW,CAAC,wDAAwD,CAAC;KACrE,cAAc,CAAC,8BAA8B,EAAE,uCAAuC,CAAC;KACvF,cAAc,CAAC,iBAAiB,EAAE,kCAAkC,CAAC;KACrE,MAAM,CACL,yBAAyB,EACzB,mEAAmE,CACpE;KACA,MAAM,CACL,8BAA8B,EAC9B,+EAA+E,CAChF;KACA,WAAW,CACV,OAAO,EACP;;;0DAGsD,CACvD;KACA,MAAM,CAAC,KAA8C,EAAE,4CAAzC,EAAE,GAAG,EAAE,MAAM,EAAE,SAAS,EAAE,SAAS,EAAE;IAClD,MAAM,aAAa,GAAG,cAAI,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,GAAG,CAAC,CAAC;IACvD,MAAM,EAAE,OAAO,EAAE,GAAG,IAAA,mCAAmB,EAAC,EAAE,aAAa,EAAE,SAAS,EAAE,SAAS,EAAE,CAAC,CAAC;IACjF,MAAM,OAAO,CAAC,MAAM,CAAC,CAAC;AACxB,CAAC,CAAA,CAAC,CAAC;AAEL,mBAAO;KACJ,OAAO,CAAC,WAAW,CAAC;KACpB,WAAW,CAAC,4DAA4D,CAAC;KACzE,cAAc,CAAC,8BAA8B,EAAE,uCAAuC,CAAC;KACvF,cAAc,CAAC,iBAAiB,EAAE,kCAAkC,CAAC;KACrE,MAAM,CACL,yBAAyB,EACzB,mEAAmE,CACpE;KACA,MAAM,CACL,8BAA8B,EAC9B,+EAA+E,CAChF;KACA,WAAW,CACV,OAAO,EACP;;;4DAGwD,CACzD;KACA,MAAM,CAAC,KAA8C,EAAE,4CAAzC,EAAE,GAAG,EAAE,MAAM,EAAE,SAAS,EAAE,SAAS,EAAE;IAClD,MAAM,aAAa,GAAG,cAAI,CAAC,OAAO,CAAC,OAAO,CAAC,GAAG,EAAE,EAAE,GAAG,CAAC,CAAC;IACvD,MAAM,EAAE,SAAS,EAAE,GAAG,IAAA,mCAAmB,EAAC,EAAE,aAAa,EAAE,SAAS,EAAE,SAAS,EAAE,CAAC,CAAC;IACnF,MAAM,SAAS,CAAC,MAAM,CAAC,CAAC;AAC1B,CAAC,CAAA,CAAC,CAAC;AAEE,MAAM,GAAG,GAAG,GAAG,EAAE;IACtB,mBAAO,CAAC,KAAK,EAAE,CAAC;AAClB,CAAC,CAAC;AAFW,QAAA,GAAG,OAEd","sourcesContent":["import { program } from 'commander';\nimport path from 'path';\n\nimport { getPackageJsonInfo } from '../utils/package';\nimport { createLocalMigrator } from '../lib/migrate/localMigrator';\nimport { loadConfig, validateConfig } from './commands/config';\nimport { generateTypes } from './commands/codegen';\n\nconst pkgJson = getPackageJsonInfo(module, ['name', 'version']);\n\nprogram //\n .name(pkgJson.name)\n .description('CLI to run crawlee-one tools')\n .version(pkgJson.version);\n\nprogram\n .command('generate')\n .description('Generate CrawleeOne types based on config')\n .option('-c --config [config-file]', 'path to config file')\n .requiredOption('-o --out <output-file>', 'path to output file')\n .addHelpText(\n 'after',\n `\n\nExample call:\n $ crawlee-one generate -c ./path/to/config-file -o ./path/to/output.ts`\n )\n .action(async ({ config: configFile, out: outFile }) => {\n await generateTypes(outFile, configFile);\n });\n\nprogram\n .command('validate')\n .description('Validate CrawleeOne config')\n .requiredOption('-c --config <config-file>', 'path to config file')\n .addHelpText(\n 'after',\n `\n\nExample call:\n $ crawlee-one validate -c ./path/to/config`\n )\n .action(async ({ config: configPath }) => {\n const config = await loadConfig(configPath);\n validateConfig(config);\n console.log('CrawleeOne config is OK!');\n });\n\nprogram\n .command('migrate')\n .description('Run a migration script specified by the version number')\n .requiredOption('-t --target <target-version>', 'migration version to execute, eg \"v1\"')\n .requiredOption('-d --dir <path>', 'path to the migrations directory')\n .option(\n '--delimeter [delimeter]',\n 'delimeter between version and rest of file name, eg \"v1_filename\"'\n )\n .option(\n '--ext --extension [ext-glob]',\n 'glob pattern for valid extensions for migration files, eg \".js\" or \".{js,ts}\"'\n )\n .addHelpText(\n 'after',\n `\n\nExample call:\n $ crawlee-one migrate -d ./path/to/migrations-dir -t v1`\n )\n .action(async ({ dir, target, extension, delimeter }) => {\n const migrationsDir = path.resolve(process.cwd(), dir);\n const { migrate } = createLocalMigrator({ migrationsDir, extension, delimeter });\n await migrate(target);\n });\n\nprogram\n .command('unmigrate')\n .description('Run an un-migration script specified by the version number')\n .requiredOption('-t --target <target-version>', 'migration version to execute, eg \"v1\"')\n .requiredOption('-d --dir <path>', 'path to the migrations directory')\n .option(\n '--delimeter [delimeter]',\n 'delimeter between version and rest of file name, eg \"v1_filename\"'\n )\n .option(\n '--ext --extension [ext-glob]',\n 'glob pattern for valid extensions for migration files, eg \".js\" or \".{js,ts}\"'\n )\n .addHelpText(\n 'after',\n `\n\nExample call:\n $ crawlee-one unmigrate -d ./path/to/migrations-dir -t v1`\n )\n .action(async ({ dir, target, extension, delimeter }) => {\n const migrationsDir = path.resolve(process.cwd(), dir);\n const { unmigrate } = createLocalMigrator({ migrationsDir, extension, delimeter });\n await unmigrate(target);\n });\n\nexport const cli = () => {\n program.parse();\n};\n"]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"codegen.js","sourceRoot":"","sources":["../../../../src/cli/commands/codegen.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;AAAA,sDAAsD;AACtD,2DAA8B;AAC9B,gDAAwB;AAGxB,+CAA4D;AAC5D,qCAAsD;AActD,MAAM,SAAS,GAAG,CAAC,KAAe,EAAE,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;AAC9E,MAAM,QAAQ,GAAG,CAAC,KAAe,EAAE,EAAE,CACnC,KAAK,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC;AAEpE,MAAM,UAAU,GAAG;IACjB,IAAI,EAAE,CAAC,IAAY,EAAE,KAAa,EAAE,QAAmB,EAAE,EAAE;QACzD,MAAM,WAAW,GAAG,CAAA,QAAQ,aAAR,QAAQ,uBAAR,QAAQ,CAAE,MAAM,EAAC,CAAC,CAAC,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QACvE,OAAO,eAAe,IAAI,GAAG,WAAW,MAAM,KAAK,GAAG,CAAC;IACzD,CAAC;IACD,QAAQ,EAAE,CAAC,IAAY,EAAE,KAAa,EAAE,QAAmB,EAAE,EAAE;QAC7D,MAAM,WAAW,GAAG,CAAA,QAAQ,aAAR,QAAQ,uBAAR,QAAQ,CAAE,MAAM,EAAC,CAAC,CAAC,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QACvE,OAAO,eAAe,IAAI,MAAM,WAAW,GAAG,KAAK,GAAG,CAAC;IACzD,CAAC;IACD,IAAI,EAAE,CAAC,IAAY,EAAE,KAAa,EAAE,QAAmB,EAAE,EAAE;QACzD,MAAM,WAAW,GAAG,CAAA,QAAQ,aAAR,QAAQ,uBAAR,QAAQ,CAAE,MAAM,EAAC,CAAC,CAAC,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QACvE,OAAO,gBAAgB,IAAI,MAAM,WAAW,GAAG,KAAK,GAAG,CAAC;IAC1D,CAAC;IACD,iGAAiG;IACjG,IAAI,EAAE,CAAC,IAAY,EAAE,KAAa,EAAE,QAAmB,EAAE,EAAE;QACzD,OAAO,eAAe,IAAI,IAAI,KAAK,EAAE,CAAC;IACxC,CAAC;CACiF,CAAC;AAErF,MAAM,oBAAoB,GAAG,CAAC,MAA8B,EAAE,EAAE;IAC9D,mDAAmD;IACnD,MAAM,WAAW,GAA2B,EAAE,CAAC;IAE/C,8DAA8D;IAC9D,MAAM,OAAO,GAA8D,EAAE,CAAC;IAE9E,MAAM,UAAU,GAAG,CACjB,GAAW,EACX,UAAe,EACf,OAAgC,EAChC,EAAE;QACF,MAAM,EAAE,QAAQ,EAAE,GAAG,OAAO,aAAP,OAAO,cAAP,OAAO,GAAI,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,OAAO,CAAC,GAAG,CAAC,IAAI,IAAI,GAAG,EAAE,CAAC,CAAC;QAC3D,UAAU,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAC,CAAC;QAC9D,qEAAqE;QACrE,0CAA0C;QAC1C,OAAO,UAAU,CAAC,MAAM,CAAsB,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE;YACzD,GAAG,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC;YACf,OAAO,GAAG,CAAC;QACb,CAAC,EAAE,EAAS,CAAC,CAAC;IAChB,CAAC,CAAC;IAEF,MAAM,MAAM,GAAG,CACb,GAAW,EACX,KAA8B,EAC9B,OAAiE,EACjE,EAAE;;QACF,MAAM,IAAI,GAAG,MAAA,OAAO,aAAP,OAAO,uBAAP,OAAO,CAAE,IAAI,mCAAI,MAAM,CAAC;QACrC,MAAM,QAAQ,GAAG,MAAA,OAAO,aAAP,OAAO,uBAAP,OAAO,CAAE,QAAQ,mCAAI,EAAE,CAAC;QACzC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,EAAE;YACrB,MAAM,WAAW,GAAG,OAAO,KAAK,KAAK,UAAU,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;YAClE,MAAM,SAAS,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC;YACnC,MAAM,YAAY,GAAG,SAAS,CAAC,GAAG,EAAE,WAAW,EAAE,QAAQ,CAAC,CAAC;YAC3D,WAAW,CAAC,GAAG,CAAC,GAAG,YAAY,CAAC;SACjC;QACD,qDAAqD;QACrD,OAAO,GAAG,CAAC;IACb,CAAC,CAAC;IAEF,oBAAoB;IACpB,MAAM,EACJ,cAAc,EAAE,UAAU,EAC1B,wBAAwB,EAAE,cAAc,EACxC,mBAAmB,EAAE,QAAQ,EAC7B,eAAe,EAAE,SAAS,EAC1B,sBAAsB,EAAE,YAAY,EACpC,sBAAsB,EAAE,YAAY,EACpC,sBAAsB,EAAE,YAAY,EACpC,wBAAwB,EAAE,cAAc,EACxC,YAAY,EAAE,MAAM,EACpB,mBAAmB,EAAE,SAAS,EAC9B,aAAa,EAAE,OAAO,EACtB,cAAc,EAAE,QAAQ,EACxB,UAAU,EAAE,YAAY,GACzB,GAAG,UAAU,CAAC,aAAa,EAAE;QAC5B,gBAAgB;QAChB,0BAA0B;QAC1B,qBAAqB;QACrB,iBAAiB;QACjB,wBAAwB;QACxB,wBAAwB;QACxB,wBAAwB;QACxB,0BAA0B;QAC1B,cAAc;QACd,qBAAqB;QACrB,eAAe;QACf,gBAAgB;QAChB,YAAY;KACb,CAAC,CAAC;IACH,UAAU,CAAC,SAAS,EAAE,MAAM,CAAC,MAAM,CAAC,qCAAyB,CAAC,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;IAEpF,kBAAkB;IAClB,MAAM,MAAM,GAAG,MAAM,CAAC,cAAc,EAAE,gBAAgB,EAAE,EAAE,QAAQ,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAE7E,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,WAAW,EAAE,OAAO,CAAC,EAAE,EAAE;QACjE,MAAM,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC;QAEjC,wEAAwE;QACxE,MAAM,uBAAuB,GAAG,qCAAyB,CAAC,WAAW,CAAC,CAAC;QAEvE,oCAAoC;QACpC,+DAA+D;QAC/D,MAAM,QAAQ,GAAG,MAAM,CAAC,GAAG,WAAW,OAAO,EAAE,GAAG,EAAE,CAAC,SAAS,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC;QAEhF,iGAAiG;QACjG,MAAM,YAAY,GAAG,MAAM,CAAC,GAAG,WAAW,WAAW,EAAE,GAAG,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE;YACrF,IAAI,EAAE,MAAM;SACb,CAAC,CAAC;QAEH,MAAM,WAAW,GAAG;YAClB,wCAAwC,UAAU,EAAE;YACpD,eAAe,MAAM,MAAM,MAAM,EAAE;YACnC,iBAAiB,SAAS,gBAAgB,SAAS,YAAY;SAChE,CAAC;QAEF,+BAA+B;QAC/B,4HAA4H;QAC5H,MAAM,MAAM,GAAG,MAAM,CACnB,GAAG,WAAW,KAAK,EACnB,GAAG,OAAO,IAAI,uBAAuB,KAAK,QAAQ,uBAAuB,EACzE,EAAE,QAAQ,EAAE,WAAW,EAAE,CAC1B,CAAC;QAEF,gCAAgC;QAChC,0FAA0F;QAC1F,MAAM,UAAU,GAAG,MAAM,CACvB,GAAG,WAAW,SAAS,EACvB,eAAe,QAAQ,KAAK,WAAW,MAAM,MAAM,sCAAsC,YAAY,KAAK,WAAW,MAAM,MAAM,2CAA2C,WAAW,MAAM,EAC7L,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,WAAW,EAAE,CACxC,CAAC;QAEF,2DAA2D;QAC3D,iFAAiF;QACjF,oCAAoC;QACpC,MAAM,YAAY,GAAG,MAAM,CACzB,GAAG,WAAW,eAAe,EAC7B,GAAG,cAAc,IAAI,MAAM,uBAAuB,EAClD,EAAE,QAAQ,EAAE,WAAW,EAAE,CAC1B,CAAC;QAEF,+CAA+C;QAC/C,iFAAiF;QACjF,oCAAoC;QACpC,MAAM,WAAW,GAAG,MAAM,CACxB,GAAG,WAAW,UAAU,EACxB,GAAG,QAAQ,IAAI,MAAM,uBAAuB,EAC5C,EAAE,QAAQ,EAAE,WAAW,EAAE,CAC1B,CAAC;QAEF,wBAAwB;QACxB,oEAAoE;QACpE,MAAM,QAAQ,GAAG,MAAM,CACrB,GAAG,WAAW,OAAO,EACrB,GAAG,SAAS,IAAI,MAAM,yBAAyB,YAAY,uBAAuB,EAClF;YACE,QAAQ,EAAE,WAAW;SACtB,CACF,CAAC;QAEF,+GAA+G;QAC/G,MAAM,iBAAiB,GAAG,GAAG,YAAY,IAAI,MAAM,yBAAyB,YAAY,uBAAuB,CAAC;QAChH,MAAM,eAAe,GAAG,MAAM,CAAC,GAAG,WAAW,cAAc,EAAE,iBAAiB,EAAE;YAC9E,QAAQ,EAAE,WAAW;SACtB,CAAC,CAAC;QAEH,+GAA+G;QAC/G,MAAM,eAAe,GAAG,MAAM,CAC5B,GAAG,WAAW,cAAc,EAC5B,GAAG,YAAY,IAAI,MAAM,yBAAyB,YAAY,uBAAuB,EACrF,EAAE,QAAQ,EAAE,WAAW,EAAE,CAC1B,CAAC;QAEF,0GAA0G;QAC1G,MAAM,eAAe,GAAG,MAAM,CAC5B,GAAG,WAAW,cAAc,EAC5B,GAAG,YAAY,IAAI,MAAM,yBAAyB,YAAY,uBAAuB,EACrF,EAAE,QAAQ,EAAE,WAAW,EAAE,CAC1B,CAAC;QAEF,0GAA0G;QAC1G,MAAM,iBAAiB,GAAG,MAAM,CAC9B,GAAG,WAAW,gBAAgB,EAC9B,GAAG,cAAc,IAAI,MAAM,yBAAyB,YAAY,uBAAuB,EACvF,EAAE,QAAQ,EAAE,WAAW,EAAE,CAC1B,CAAC;QAEF,0BAA0B;QAE1B,kEAAkE;QAClE,mHAAmH;QACnH,kHAAkH;QAClH,MAAM,kBAAkB,GAAG,MAAM,CAAC,GAAG,WAAW,iBAAiB,EAAE,iBAAiB,EAAE;YACpF,QAAQ,EAAE,WAAW;SACtB,CAAC,CAAC;QACH,MAAM,iBAAiB,GAAG,MAAM,CAAC,GAAG,WAAW,gBAAgB,EAAE,iBAAiB,EAAE;YAClF,QAAQ,EAAE,WAAW;SACtB,CAAC,CAAC;QAEH,qKAAqK;QACrK,MAAM,UAAU,GAAG,MAAM,CACvB,GAAG,WAAW,SAAS,EACvB,WAAW,WAAW,4BAA4B,MAAM,SAAS,EACjE,EAAE,IAAI,EAAE,UAAU,EAAE,QAAQ,EAAE,WAAW,EAAE,CAC5C,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,MAAM,YAAY,GAAG,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,CACjD,CAAC,GAAG,EAAE,CAAC,GAAG,EAAE,UAAU,CAAC,EAAE,EAAE;QACzB,MAAM,OAAO,GAAG,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC;QACzC,MAAM,gBAAgB,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC/D,MAAM,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC;QAChE,GAAG,CAAC,GAAG,CAAC,GAAG,UAAU,OAAO,KAAK,gBAAgB,YAAY,GAAG,GAAG,CAAC;QACpE,OAAO,GAAG,CAAC;IACb,CAAC,EACD,EAAE,CACH,CAAC;IAEF,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,CAAC;AAChD,CAAC,CAAC;AAEF;;;;;GAKG;AACI,MAAM,aAAa,GAAG,CAAO,OAAe,EAAE,YAAwC,EAAE,EAAE;IAC/F,MAAM,MAAM,GACV,CAAC,YAAY,IAAI,OAAO,YAAY,KAAK,QAAQ;QAC/C,CAAC,CAAC,MAAM,IAAA,mBAAU,EAAC,YAAY,CAAC;QAChC,CAAC,CAAC,YAAY,CAAC;IACnB,IAAA,uBAAc,EAAC,MAAM,CAAC,CAAC;IAEvB,uEAAuE;IACvE,MAAM,EAAE,OAAO,EAAE,WAAW,EAAE,GAAG,MAAM,oBAAoB,CAAC,MAAO,CAAC,MAAM,CAAC,CAAC;IAC5E,MAAM,WAAW,GACf,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAEzF,MAAM,MAAM,GAAG,cAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;IACrC,MAAM,kBAAG,CAAC,KAAK,CAAC,MAAM,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC7C,MAAM,kBAAG,CAAC,SAAS,CAAC,OAAO,EAAE,WAAW,EAAE,OAAO,CAAC,CAAC;IAEnD,OAAO,CAAC,GAAG,CAAC,4BAA4B,OAAO,EAAE,CAAC,CAAC;AACrD,CAAC,CAAA,CAAC;AAjBW,QAAA,aAAa,iBAiBxB","sourcesContent":["/* eslint-disable @typescript-eslint/no-unused-vars */\nimport fsp from 'fs/promises';\nimport path from 'path';\n\nimport type { CrawleeOneConfig, CrawleeOneConfigSchema } from '../../types/config';\nimport { crawlingContextNameByType } from '../../constants';\nimport { loadConfig, validateConfig } from './config';\n// NOTE: We intentionally import these to know when their names change\nimport type { AllActorInputs } from '../../lib/input';\nimport type { CrawleeOneActorInst, CrawleeOneActorRouterCtx } from '../../lib/actor/types';\nimport type {\n CrawleeOneRoute,\n CrawleeOneRouteHandler,\n CrawleeOneRouteMatcher,\n CrawleeOneRouteMatcherFn,\n CrawleeOneRouteWrapper,\n} from '../../lib/router/types';\nimport type { MaybePromise } from '../../utils/types';\nimport type { CrawleeOneArgs, crawleeOne } from '../../api';\n\nconst makeUnion = (items: string[]) => items.map((s) => `\"${s}\"`).join(` | `);\nconst makeEnum = (items: string[]) =>\n '{\\n' + items.map((s) => ` '${s}' = '${s}'`).join(`,\\n`) + '\\n}';\n\nconst formatters = {\n type: (name: string, value: string, typeArgs?: string[]) => {\n const typeArgsStr = typeArgs?.length ? `<${typeArgs.join(', ')}>` : '';\n return `export type ${name}${typeArgsStr} = ${value};`;\n },\n typeFunc: (name: string, value: string, typeArgs?: string[]) => {\n const typeArgsStr = typeArgs?.length ? `<${typeArgs.join(', ')}>` : '';\n return `export type ${name} = ${typeArgsStr}${value};`;\n },\n func: (name: string, value: string, typeArgs?: string[]) => {\n const typeArgsStr = typeArgs?.length ? `<${typeArgs.join(', ')}>` : '';\n return `export const ${name} = ${typeArgsStr}${value};`;\n },\n // enum `CrawlerName`LabelEnum { \"detailPage\" = \"detailPage\", \"otherLabel\" = \"otherLabel\", ... };\n enum: (name: string, value: string, typeArgs?: string[]) => {\n return `export enum ${name} ${value}`;\n },\n} satisfies Record<string, (name: string, value: string, args?: string[]) => string>;\n\nconst parseTypesFromSchema = (schema: CrawleeOneConfigSchema) => {\n /** Remember which types we've already generated */\n const definitions: Record<string, string> = {};\n\n /** Remember what values need to be imported and from where */\n const imports: Record<string, Set<{ name: string; typeOnly?: boolean }>> = {};\n\n const addImports = <T extends string>(\n pkg: string,\n newEntries: T[],\n options?: { typeOnly?: boolean }\n ) => {\n const { typeOnly } = options ?? {};\n const entries = (imports[pkg] = imports[pkg] || new Set());\n newEntries.forEach((name) => entries.add({ name, typeOnly }));\n // Return the entries as variables, so we can define them in a single\n // place but still reference them in code.\n return newEntries.reduce<{ [Key in T]: Key }>((agg, key) => {\n agg[key] = key;\n return agg;\n }, {} as any);\n };\n\n const define = (\n key: string,\n value: string | (() => string),\n options?: { typeArgs?: string[]; kind?: keyof typeof formatters }\n ) => {\n const kind = options?.kind ?? 'type';\n const typeArgs = options?.typeArgs ?? [];\n if (!definitions[key]) {\n const resolvedVal = typeof value === 'function' ? value() : value;\n const formatter = formatters[kind];\n const valFormatted = formatter(key, resolvedVal, typeArgs);\n definitions[key] = valFormatted;\n }\n // Return the key as variable, so we can reference it\n return key;\n };\n\n // 1. Define imports\n const {\n AllActorInputs: actorInput,\n CrawleeOneActorRouterCtx: actorRouterCtx,\n CrawleeOneActorInst: actorCtx,\n CrawleeOneRoute: routeType,\n CrawleeOneRouteHandler: routeHandler,\n CrawleeOneRouteWrapper: routeWrapper,\n CrawleeOneRouteMatcher: routeMatcher,\n CrawleeOneRouteMatcherFn: routeMatcherFn,\n CrawleeOneIO: ioType,\n CrawleeOneTelemetry: telemType,\n CrawleeOneCtx: ctxType,\n CrawleeOneArgs: argsType,\n crawleeOne: crawleeOneFn,\n } = addImports('crawlee-one', [\n 'AllActorInputs',\n 'CrawleeOneActorRouterCtx',\n 'CrawleeOneActorInst',\n 'CrawleeOneRoute',\n 'CrawleeOneRouteHandler',\n 'CrawleeOneRouteWrapper',\n 'CrawleeOneRouteMatcher',\n 'CrawleeOneRouteMatcherFn',\n 'CrawleeOneIO',\n 'CrawleeOneTelemetry',\n 'CrawleeOneCtx',\n 'CrawleeOneArgs',\n 'crawleeOne',\n ]);\n addImports('crawlee', Object.values(crawlingContextNameByType), { typeOnly: true });\n\n // 2. Define utils\n const maybeP = define('MaybePromise', 'T | Promise<T>', { typeArgs: ['T'] });\n\n Object.entries(schema.crawlers).forEach(([crawlerName, crawler]) => {\n const crawlerType = crawler.type;\n\n // 2. Get `CrawlingContext`, e.g. 'cheerio' => `CheerioCrawlingContext`;\n const crawlingContextTypeName = crawlingContextNameByType[crawlerType];\n\n // 3. Generate type for route labels\n // type `CrawlerName`Label = \"detailPage\" | \"otherLabel\" | ...;\n const labelKey = define(`${crawlerName}Label`, () => makeUnion(crawler.routes));\n\n // enum `CrawlerName`LabelEnum { \"detailPage\" = \"detailPage\", \"otherLabel\" = \"otherLabel\", ... };\n const labelEnumKey = define(`${crawlerName}LabelEnum`, () => makeEnum(crawler.routes), {\n kind: 'enum',\n });\n\n const ctxTypeArgs = [\n `TInput extends Record<string, any> = ${actorInput}`,\n `TIO extends ${ioType} = ${ioType}`,\n `Telem extends ${telemType}<any, any> = ${telemType}<any, any>`,\n ];\n\n // 4. Create CrawleeOne context\n // type `CrawlerName`Ctx = <TIO, Telem>CrawleeOneCtx<CheerioCrawlingContext, `CrawlerName`Label, AllActorInputs, TIO, Telem>\n const ctxKey = define(\n `${crawlerName}Ctx`,\n `${ctxType}<${crawlingContextTypeName}, ${labelKey}, TInput, TIO, Telem>`,\n { typeArgs: ctxTypeArgs }\n );\n\n // 5. Create CrawleeOne instance\n // const customCrawler = <TIO, Telem>(args: CrawleeOneArgs<TType, T>) => crawleeOne(args);\n const crawlerKey = define(\n `${crawlerName}Crawler`,\n `(args: Omit<${argsType}<\"${crawlerType}\", ${ctxKey}<TInput, TIO, Telem>>, 'type'>) => ${crawleeOneFn}<\"${crawlerType}\", ${ctxKey}<TInput, TIO, Telem>>({ ...args, type: \"${crawlerType}\"});`,\n { kind: 'func', typeArgs: ctxTypeArgs }\n );\n\n // 6. Get actor router context (`CrawleeOneActorRouterCtx`)\n // NOTE: We use `AllActorInput` for the Actor input, because this type definition\n // will be used by developers.\n const routerCtxKey = define(\n `${crawlerName}RouterContext`,\n `${actorRouterCtx}<${ctxKey}<TInput, TIO, Telem>>`,\n { typeArgs: ctxTypeArgs }\n );\n\n // 7. Get actor context (`CrawleeOneActorInst`)\n // NOTE: We use `AllActorInput` for the Actor input, because this type definition\n // will be used by developers.\n const actorCtxKey = define(\n `${crawlerName}ActorCtx`,\n `${actorCtx}<${ctxKey}<TInput, TIO, Telem>>`,\n { typeArgs: ctxTypeArgs }\n );\n\n // 8. Create Route types\n // E.g. `type `crawlerName`Route = CrawleeOneRout<`CrawlerName`Ctx>`\n const routeKey = define(\n `${crawlerName}Route`,\n `${routeType}<${ctxKey}<TInput, TIO, Telem>, ${routerCtxKey}<TInput, TIO, Telem>>`,\n {\n typeArgs: ctxTypeArgs,\n }\n );\n\n // E.g. `type `crawlerName`RouteHandler = CrawleeOneRouteHandler<`CrawlerName`Ctx, CrawlerName`ActorRouterCtx>`\n const routeHandlerValue = `${routeHandler}<${ctxKey}<TInput, TIO, Telem>, ${routerCtxKey}<TInput, TIO, Telem>>`;\n const routeHandlerKey = define(`${crawlerName}RouteHandler`, routeHandlerValue, {\n typeArgs: ctxTypeArgs,\n });\n\n // E.g. `type `crawlerName`RouteWrapper = CrawleeOneRouteWrapper<`CrawlerName`Ctx, CrawlerName`ActorRouterCtx>`\n const routeWrapperKey = define(\n `${crawlerName}RouteWrapper`,\n `${routeWrapper}<${ctxKey}<TInput, TIO, Telem>, ${routerCtxKey}<TInput, TIO, Telem>>`,\n { typeArgs: ctxTypeArgs }\n );\n\n // E.g. `type `crawlerName`Matcher = CrawleeOneRouteMatcher<`CrawlerName`Ctx, CrawlerName`ActorRouterCtx>`\n const routeMatcherKey = define(\n `${crawlerName}RouteMatcher`,\n `${routeMatcher}<${ctxKey}<TInput, TIO, Telem>, ${routerCtxKey}<TInput, TIO, Telem>>`,\n { typeArgs: ctxTypeArgs }\n );\n\n // E.g. `type `crawlerName`Matcher = CrawleeOneRouteMatcher<`CrawlerName`Ctx, CrawlerName`ActorRouterCtx>`\n const routeMatcherFnKey = define(\n `${crawlerName}RouteMatcherFn`,\n `${routeMatcherFn}<${ctxKey}<TInput, TIO, Telem>, ${routerCtxKey}<TInput, TIO, Telem>>`,\n { typeArgs: ctxTypeArgs }\n );\n\n // 9. Create Crawler hooks\n\n // NOTE: Type for before/after handler is the same as for handlers\n // E.g. `type `CrawlerName`OnBeforeHandler = CrawleeOneRouteHandler<CheerioCrawlingContext, ProfesiaRouterContext>`\n // E.g. `type `CrawlerName`OnAfterHandler = CrawleeOneRouteHandler<CheerioCrawlingContext, ProfesiaRouterContext>`\n const onBeforeHandlerKey = define(`${crawlerName}OnBeforeHandler`, routeHandlerValue, {\n typeArgs: ctxTypeArgs,\n });\n const onAfterHandlerKey = define(`${crawlerName}OnAfterHandler`, routeHandlerValue, {\n typeArgs: ctxTypeArgs,\n });\n\n // type `CrawlerName`OnReady = <TIO, Telem>(actor: CrawleeOneActorInst<`CrawlerName`Label, AllActorInputs, TIO, Telem, `type`CrawlingContext>) => MaybePromise<void>;\n const onReadyKey = define(\n `${crawlerName}OnReady`,\n `(actor: ${actorCtxKey}<TInput, TIO, Telem>) => ${maybeP}<void>;`,\n { kind: 'typeFunc', typeArgs: ctxTypeArgs }\n );\n });\n\n const finalImports = Object.entries(imports).reduce<Record<string, string>>(\n (agg, [pkg, entriesSet]) => {\n const entries = [...entriesSet.values()];\n const formattedEntries = entries.map((e) => e.name).join(', ');\n const typeStr = entries.every((e) => e.typeOnly) ? 'type ' : '';\n agg[pkg] = `import ${typeStr}{ ${formattedEntries} } from \"${pkg}\"`;\n return agg;\n },\n {}\n );\n\n return { imports: finalImports, definitions };\n};\n\n/**\n * Generate types for CrawleeOne given a config.\n *\n * Config can be passed directly, or as the path to the config file.\n * If the config is omitted, it is automatically searched for using CosmicConfig.\n */\nexport const generateTypes = async (outfile: string, configOrPath?: CrawleeOneConfig | string) => {\n const config =\n !configOrPath || typeof configOrPath === 'string'\n ? await loadConfig(configOrPath)\n : configOrPath;\n validateConfig(config);\n\n /* eslint-disable-next-line @typescript-eslint/no-non-null-assertion */\n const { imports, definitions } = await parseTypesFromSchema(config!.schema);\n const fileContent =\n Object.values(imports).join('\\n') + '\\n\\n\\n' + Object.values(definitions).join('\\n\\n');\n\n const outdir = path.dirname(outfile);\n await fsp.mkdir(outdir, { recursive: true });\n await fsp.writeFile(outfile, fileContent, 'utf-8');\n\n console.log(`Done generating types to ${outfile}`);\n};\n"]}
|
|
1
|
+
{"version":3,"file":"codegen.js","sourceRoot":"","sources":["../../../../src/cli/commands/codegen.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;AAAA,sDAAsD;AACtD,2DAA8B;AAC9B,gDAAwB;AAGxB,+CAA4D;AAC5D,qCAAsD;AActD,MAAM,SAAS,GAAG,CAAC,KAAe,EAAE,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;AAC9E,MAAM,QAAQ,GAAG,CAAC,KAAe,EAAE,EAAE,CACnC,KAAK,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC;AAEpE,MAAM,UAAU,GAAG;IACjB,IAAI,EAAE,CAAC,IAAY,EAAE,KAAa,EAAE,QAAmB,EAAE,EAAE;QACzD,MAAM,WAAW,GAAG,CAAA,QAAQ,aAAR,QAAQ,uBAAR,QAAQ,CAAE,MAAM,EAAC,CAAC,CAAC,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QACvE,OAAO,eAAe,IAAI,GAAG,WAAW,MAAM,KAAK,GAAG,CAAC;IACzD,CAAC;IACD,QAAQ,EAAE,CAAC,IAAY,EAAE,KAAa,EAAE,QAAmB,EAAE,EAAE;QAC7D,MAAM,WAAW,GAAG,CAAA,QAAQ,aAAR,QAAQ,uBAAR,QAAQ,CAAE,MAAM,EAAC,CAAC,CAAC,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QACvE,OAAO,eAAe,IAAI,MAAM,WAAW,GAAG,KAAK,GAAG,CAAC;IACzD,CAAC;IACD,IAAI,EAAE,CAAC,IAAY,EAAE,KAAa,EAAE,QAAmB,EAAE,EAAE;QACzD,MAAM,WAAW,GAAG,CAAA,QAAQ,aAAR,QAAQ,uBAAR,QAAQ,CAAE,MAAM,EAAC,CAAC,CAAC,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;QACvE,OAAO,gBAAgB,IAAI,MAAM,WAAW,GAAG,KAAK,GAAG,CAAC;IAC1D,CAAC;IACD,iGAAiG;IACjG,IAAI,EAAE,CAAC,IAAY,EAAE,KAAa,EAAE,QAAmB,EAAE,EAAE;QACzD,OAAO,eAAe,IAAI,IAAI,KAAK,EAAE,CAAC;IACxC,CAAC;CACiF,CAAC;AAErF,MAAM,oBAAoB,GAAG,CAAC,MAA8B,EAAE,EAAE;IAC9D,mDAAmD;IACnD,MAAM,WAAW,GAA2B,EAAE,CAAC;IAE/C,8DAA8D;IAC9D,MAAM,OAAO,GAA8D,EAAE,CAAC;IAE9E,MAAM,UAAU,GAAG,CACjB,GAAW,EACX,UAAe,EACf,OAAgC,EAChC,EAAE;QACF,MAAM,EAAE,QAAQ,EAAE,GAAG,OAAO,aAAP,OAAO,cAAP,OAAO,GAAI,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,GAAG,OAAO,CAAC,GAAG,CAAC,IAAI,IAAI,GAAG,EAAE,CAAC,CAAC;QAC3D,UAAU,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAC,CAAC;QAC9D,qEAAqE;QACrE,0CAA0C;QAC1C,OAAO,UAAU,CAAC,MAAM,CAAsB,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE;YACzD,GAAG,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC;YACf,OAAO,GAAG,CAAC;QACb,CAAC,EAAE,EAAS,CAAC,CAAC;IAChB,CAAC,CAAC;IAEF,MAAM,MAAM,GAAG,CACb,GAAW,EACX,KAA8B,EAC9B,OAAiE,EACjE,EAAE;;QACF,MAAM,IAAI,GAAG,MAAA,OAAO,aAAP,OAAO,uBAAP,OAAO,CAAE,IAAI,mCAAI,MAAM,CAAC;QACrC,MAAM,QAAQ,GAAG,MAAA,OAAO,aAAP,OAAO,uBAAP,OAAO,CAAE,QAAQ,mCAAI,EAAE,CAAC;QACzC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,EAAE,CAAC;YACtB,MAAM,WAAW,GAAG,OAAO,KAAK,KAAK,UAAU,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;YAClE,MAAM,SAAS,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC;YACnC,MAAM,YAAY,GAAG,SAAS,CAAC,GAAG,EAAE,WAAW,EAAE,QAAQ,CAAC,CAAC;YAC3D,WAAW,CAAC,GAAG,CAAC,GAAG,YAAY,CAAC;QAClC,CAAC;QACD,qDAAqD;QACrD,OAAO,GAAG,CAAC;IACb,CAAC,CAAC;IAEF,oBAAoB;IACpB,MAAM,EACJ,cAAc,EAAE,UAAU,EAC1B,wBAAwB,EAAE,cAAc,EACxC,mBAAmB,EAAE,QAAQ,EAC7B,eAAe,EAAE,SAAS,EAC1B,sBAAsB,EAAE,YAAY,EACpC,sBAAsB,EAAE,YAAY,EACpC,sBAAsB,EAAE,YAAY,EACpC,wBAAwB,EAAE,cAAc,EACxC,YAAY,EAAE,MAAM,EACpB,mBAAmB,EAAE,SAAS,EAC9B,aAAa,EAAE,OAAO,EACtB,cAAc,EAAE,QAAQ,EACxB,UAAU,EAAE,YAAY,GACzB,GAAG,UAAU,CAAC,aAAa,EAAE;QAC5B,gBAAgB;QAChB,0BAA0B;QAC1B,qBAAqB;QACrB,iBAAiB;QACjB,wBAAwB;QACxB,wBAAwB;QACxB,wBAAwB;QACxB,0BAA0B;QAC1B,cAAc;QACd,qBAAqB;QACrB,eAAe;QACf,gBAAgB;QAChB,YAAY;KACb,CAAC,CAAC;IACH,UAAU,CAAC,SAAS,EAAE,MAAM,CAAC,MAAM,CAAC,qCAAyB,CAAC,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;IAEpF,kBAAkB;IAClB,MAAM,MAAM,GAAG,MAAM,CAAC,cAAc,EAAE,gBAAgB,EAAE,EAAE,QAAQ,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAE7E,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,WAAW,EAAE,OAAO,CAAC,EAAE,EAAE;QACjE,MAAM,WAAW,GAAG,OAAO,CAAC,IAAI,CAAC;QAEjC,wEAAwE;QACxE,MAAM,uBAAuB,GAAG,qCAAyB,CAAC,WAAW,CAAC,CAAC;QAEvE,oCAAoC;QACpC,+DAA+D;QAC/D,MAAM,QAAQ,GAAG,MAAM,CAAC,GAAG,WAAW,OAAO,EAAE,GAAG,EAAE,CAAC,SAAS,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC;QAEhF,iGAAiG;QACjG,MAAM,YAAY,GAAG,MAAM,CAAC,GAAG,WAAW,WAAW,EAAE,GAAG,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE;YACrF,IAAI,EAAE,MAAM;SACb,CAAC,CAAC;QAEH,MAAM,WAAW,GAAG;YAClB,wCAAwC,UAAU,EAAE;YACpD,eAAe,MAAM,MAAM,MAAM,EAAE;YACnC,iBAAiB,SAAS,gBAAgB,SAAS,YAAY;SAChE,CAAC;QAEF,+BAA+B;QAC/B,4HAA4H;QAC5H,MAAM,MAAM,GAAG,MAAM,CACnB,GAAG,WAAW,KAAK,EACnB,GAAG,OAAO,IAAI,uBAAuB,KAAK,QAAQ,uBAAuB,EACzE,EAAE,QAAQ,EAAE,WAAW,EAAE,CAC1B,CAAC;QAEF,gCAAgC;QAChC,0FAA0F;QAC1F,MAAM,UAAU,GAAG,MAAM,CACvB,GAAG,WAAW,SAAS,EACvB,eAAe,QAAQ,KAAK,WAAW,MAAM,MAAM,sCAAsC,YAAY,KAAK,WAAW,MAAM,MAAM,2CAA2C,WAAW,MAAM,EAC7L,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,WAAW,EAAE,CACxC,CAAC;QAEF,2DAA2D;QAC3D,iFAAiF;QACjF,oCAAoC;QACpC,MAAM,YAAY,GAAG,MAAM,CACzB,GAAG,WAAW,eAAe,EAC7B,GAAG,cAAc,IAAI,MAAM,uBAAuB,EAClD,EAAE,QAAQ,EAAE,WAAW,EAAE,CAC1B,CAAC;QAEF,+CAA+C;QAC/C,iFAAiF;QACjF,oCAAoC;QACpC,MAAM,WAAW,GAAG,MAAM,CACxB,GAAG,WAAW,UAAU,EACxB,GAAG,QAAQ,IAAI,MAAM,uBAAuB,EAC5C,EAAE,QAAQ,EAAE,WAAW,EAAE,CAC1B,CAAC;QAEF,wBAAwB;QACxB,oEAAoE;QACpE,MAAM,QAAQ,GAAG,MAAM,CACrB,GAAG,WAAW,OAAO,EACrB,GAAG,SAAS,IAAI,MAAM,yBAAyB,YAAY,uBAAuB,EAClF;YACE,QAAQ,EAAE,WAAW;SACtB,CACF,CAAC;QAEF,+GAA+G;QAC/G,MAAM,iBAAiB,GAAG,GAAG,YAAY,IAAI,MAAM,yBAAyB,YAAY,uBAAuB,CAAC;QAChH,MAAM,eAAe,GAAG,MAAM,CAAC,GAAG,WAAW,cAAc,EAAE,iBAAiB,EAAE;YAC9E,QAAQ,EAAE,WAAW;SACtB,CAAC,CAAC;QAEH,+GAA+G;QAC/G,MAAM,eAAe,GAAG,MAAM,CAC5B,GAAG,WAAW,cAAc,EAC5B,GAAG,YAAY,IAAI,MAAM,yBAAyB,YAAY,uBAAuB,EACrF,EAAE,QAAQ,EAAE,WAAW,EAAE,CAC1B,CAAC;QAEF,0GAA0G;QAC1G,MAAM,eAAe,GAAG,MAAM,CAC5B,GAAG,WAAW,cAAc,EAC5B,GAAG,YAAY,IAAI,MAAM,yBAAyB,YAAY,uBAAuB,EACrF,EAAE,QAAQ,EAAE,WAAW,EAAE,CAC1B,CAAC;QAEF,0GAA0G;QAC1G,MAAM,iBAAiB,GAAG,MAAM,CAC9B,GAAG,WAAW,gBAAgB,EAC9B,GAAG,cAAc,IAAI,MAAM,yBAAyB,YAAY,uBAAuB,EACvF,EAAE,QAAQ,EAAE,WAAW,EAAE,CAC1B,CAAC;QAEF,0BAA0B;QAE1B,kEAAkE;QAClE,mHAAmH;QACnH,kHAAkH;QAClH,MAAM,kBAAkB,GAAG,MAAM,CAAC,GAAG,WAAW,iBAAiB,EAAE,iBAAiB,EAAE;YACpF,QAAQ,EAAE,WAAW;SACtB,CAAC,CAAC;QACH,MAAM,iBAAiB,GAAG,MAAM,CAAC,GAAG,WAAW,gBAAgB,EAAE,iBAAiB,EAAE;YAClF,QAAQ,EAAE,WAAW;SACtB,CAAC,CAAC;QAEH,qKAAqK;QACrK,MAAM,UAAU,GAAG,MAAM,CACvB,GAAG,WAAW,SAAS,EACvB,WAAW,WAAW,4BAA4B,MAAM,SAAS,EACjE,EAAE,IAAI,EAAE,UAAU,EAAE,QAAQ,EAAE,WAAW,EAAE,CAC5C,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,MAAM,YAAY,GAAG,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,CACjD,CAAC,GAAG,EAAE,CAAC,GAAG,EAAE,UAAU,CAAC,EAAE,EAAE;QACzB,MAAM,OAAO,GAAG,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC;QACzC,MAAM,gBAAgB,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC/D,MAAM,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC;QAChE,GAAG,CAAC,GAAG,CAAC,GAAG,UAAU,OAAO,KAAK,gBAAgB,YAAY,GAAG,GAAG,CAAC;QACpE,OAAO,GAAG,CAAC;IACb,CAAC,EACD,EAAE,CACH,CAAC;IAEF,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,CAAC;AAChD,CAAC,CAAC;AAEF;;;;;GAKG;AACI,MAAM,aAAa,GAAG,CAAO,OAAe,EAAE,YAAwC,EAAE,EAAE;IAC/F,MAAM,MAAM,GACV,CAAC,YAAY,IAAI,OAAO,YAAY,KAAK,QAAQ;QAC/C,CAAC,CAAC,MAAM,IAAA,mBAAU,EAAC,YAAY,CAAC;QAChC,CAAC,CAAC,YAAY,CAAC;IACnB,IAAA,uBAAc,EAAC,MAAM,CAAC,CAAC;IAEvB,uEAAuE;IACvE,MAAM,EAAE,OAAO,EAAE,WAAW,EAAE,GAAG,MAAM,oBAAoB,CAAC,MAAO,CAAC,MAAM,CAAC,CAAC;IAC5E,MAAM,WAAW,GACf,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAEzF,MAAM,MAAM,GAAG,cAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;IACrC,MAAM,kBAAG,CAAC,KAAK,CAAC,MAAM,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC7C,MAAM,kBAAG,CAAC,SAAS,CAAC,OAAO,EAAE,WAAW,EAAE,OAAO,CAAC,CAAC;IAEnD,OAAO,CAAC,GAAG,CAAC,4BAA4B,OAAO,EAAE,CAAC,CAAC;AACrD,CAAC,CAAA,CAAC;AAjBW,QAAA,aAAa,iBAiBxB","sourcesContent":["/* eslint-disable @typescript-eslint/no-unused-vars */\nimport fsp from 'fs/promises';\nimport path from 'path';\n\nimport type { CrawleeOneConfig, CrawleeOneConfigSchema } from '../../types/config';\nimport { crawlingContextNameByType } from '../../constants';\nimport { loadConfig, validateConfig } from './config';\n// NOTE: We intentionally import these to know when their names change\nimport type { AllActorInputs } from '../../lib/input';\nimport type { CrawleeOneActorInst, CrawleeOneActorRouterCtx } from '../../lib/actor/types';\nimport type {\n CrawleeOneRoute,\n CrawleeOneRouteHandler,\n CrawleeOneRouteMatcher,\n CrawleeOneRouteMatcherFn,\n CrawleeOneRouteWrapper,\n} from '../../lib/router/types';\nimport type { MaybePromise } from '../../utils/types';\nimport type { CrawleeOneArgs, crawleeOne } from '../../api';\n\nconst makeUnion = (items: string[]) => items.map((s) => `\"${s}\"`).join(` | `);\nconst makeEnum = (items: string[]) =>\n '{\\n' + items.map((s) => ` '${s}' = '${s}'`).join(`,\\n`) + '\\n}';\n\nconst formatters = {\n type: (name: string, value: string, typeArgs?: string[]) => {\n const typeArgsStr = typeArgs?.length ? `<${typeArgs.join(', ')}>` : '';\n return `export type ${name}${typeArgsStr} = ${value};`;\n },\n typeFunc: (name: string, value: string, typeArgs?: string[]) => {\n const typeArgsStr = typeArgs?.length ? `<${typeArgs.join(', ')}>` : '';\n return `export type ${name} = ${typeArgsStr}${value};`;\n },\n func: (name: string, value: string, typeArgs?: string[]) => {\n const typeArgsStr = typeArgs?.length ? `<${typeArgs.join(', ')}>` : '';\n return `export const ${name} = ${typeArgsStr}${value};`;\n },\n // enum `CrawlerName`LabelEnum { \"detailPage\" = \"detailPage\", \"otherLabel\" = \"otherLabel\", ... };\n enum: (name: string, value: string, typeArgs?: string[]) => {\n return `export enum ${name} ${value}`;\n },\n} satisfies Record<string, (name: string, value: string, args?: string[]) => string>;\n\nconst parseTypesFromSchema = (schema: CrawleeOneConfigSchema) => {\n /** Remember which types we've already generated */\n const definitions: Record<string, string> = {};\n\n /** Remember what values need to be imported and from where */\n const imports: Record<string, Set<{ name: string; typeOnly?: boolean }>> = {};\n\n const addImports = <T extends string>(\n pkg: string,\n newEntries: T[],\n options?: { typeOnly?: boolean }\n ) => {\n const { typeOnly } = options ?? {};\n const entries = (imports[pkg] = imports[pkg] || new Set());\n newEntries.forEach((name) => entries.add({ name, typeOnly }));\n // Return the entries as variables, so we can define them in a single\n // place but still reference them in code.\n return newEntries.reduce<{ [Key in T]: Key }>((agg, key) => {\n agg[key] = key;\n return agg;\n }, {} as any);\n };\n\n const define = (\n key: string,\n value: string | (() => string),\n options?: { typeArgs?: string[]; kind?: keyof typeof formatters }\n ) => {\n const kind = options?.kind ?? 'type';\n const typeArgs = options?.typeArgs ?? [];\n if (!definitions[key]) {\n const resolvedVal = typeof value === 'function' ? value() : value;\n const formatter = formatters[kind];\n const valFormatted = formatter(key, resolvedVal, typeArgs);\n definitions[key] = valFormatted;\n }\n // Return the key as variable, so we can reference it\n return key;\n };\n\n // 1. Define imports\n const {\n AllActorInputs: actorInput,\n CrawleeOneActorRouterCtx: actorRouterCtx,\n CrawleeOneActorInst: actorCtx,\n CrawleeOneRoute: routeType,\n CrawleeOneRouteHandler: routeHandler,\n CrawleeOneRouteWrapper: routeWrapper,\n CrawleeOneRouteMatcher: routeMatcher,\n CrawleeOneRouteMatcherFn: routeMatcherFn,\n CrawleeOneIO: ioType,\n CrawleeOneTelemetry: telemType,\n CrawleeOneCtx: ctxType,\n CrawleeOneArgs: argsType,\n crawleeOne: crawleeOneFn,\n } = addImports('crawlee-one', [\n 'AllActorInputs',\n 'CrawleeOneActorRouterCtx',\n 'CrawleeOneActorInst',\n 'CrawleeOneRoute',\n 'CrawleeOneRouteHandler',\n 'CrawleeOneRouteWrapper',\n 'CrawleeOneRouteMatcher',\n 'CrawleeOneRouteMatcherFn',\n 'CrawleeOneIO',\n 'CrawleeOneTelemetry',\n 'CrawleeOneCtx',\n 'CrawleeOneArgs',\n 'crawleeOne',\n ]);\n addImports('crawlee', Object.values(crawlingContextNameByType), { typeOnly: true });\n\n // 2. Define utils\n const maybeP = define('MaybePromise', 'T | Promise<T>', { typeArgs: ['T'] });\n\n Object.entries(schema.crawlers).forEach(([crawlerName, crawler]) => {\n const crawlerType = crawler.type;\n\n // 2. Get `CrawlingContext`, e.g. 'cheerio' => `CheerioCrawlingContext`;\n const crawlingContextTypeName = crawlingContextNameByType[crawlerType];\n\n // 3. Generate type for route labels\n // type `CrawlerName`Label = \"detailPage\" | \"otherLabel\" | ...;\n const labelKey = define(`${crawlerName}Label`, () => makeUnion(crawler.routes));\n\n // enum `CrawlerName`LabelEnum { \"detailPage\" = \"detailPage\", \"otherLabel\" = \"otherLabel\", ... };\n const labelEnumKey = define(`${crawlerName}LabelEnum`, () => makeEnum(crawler.routes), {\n kind: 'enum',\n });\n\n const ctxTypeArgs = [\n `TInput extends Record<string, any> = ${actorInput}`,\n `TIO extends ${ioType} = ${ioType}`,\n `Telem extends ${telemType}<any, any> = ${telemType}<any, any>`,\n ];\n\n // 4. Create CrawleeOne context\n // type `CrawlerName`Ctx = <TIO, Telem>CrawleeOneCtx<CheerioCrawlingContext, `CrawlerName`Label, AllActorInputs, TIO, Telem>\n const ctxKey = define(\n `${crawlerName}Ctx`,\n `${ctxType}<${crawlingContextTypeName}, ${labelKey}, TInput, TIO, Telem>`,\n { typeArgs: ctxTypeArgs }\n );\n\n // 5. Create CrawleeOne instance\n // const customCrawler = <TIO, Telem>(args: CrawleeOneArgs<TType, T>) => crawleeOne(args);\n const crawlerKey = define(\n `${crawlerName}Crawler`,\n `(args: Omit<${argsType}<\"${crawlerType}\", ${ctxKey}<TInput, TIO, Telem>>, 'type'>) => ${crawleeOneFn}<\"${crawlerType}\", ${ctxKey}<TInput, TIO, Telem>>({ ...args, type: \"${crawlerType}\"});`,\n { kind: 'func', typeArgs: ctxTypeArgs }\n );\n\n // 6. Get actor router context (`CrawleeOneActorRouterCtx`)\n // NOTE: We use `AllActorInput` for the Actor input, because this type definition\n // will be used by developers.\n const routerCtxKey = define(\n `${crawlerName}RouterContext`,\n `${actorRouterCtx}<${ctxKey}<TInput, TIO, Telem>>`,\n { typeArgs: ctxTypeArgs }\n );\n\n // 7. Get actor context (`CrawleeOneActorInst`)\n // NOTE: We use `AllActorInput` for the Actor input, because this type definition\n // will be used by developers.\n const actorCtxKey = define(\n `${crawlerName}ActorCtx`,\n `${actorCtx}<${ctxKey}<TInput, TIO, Telem>>`,\n { typeArgs: ctxTypeArgs }\n );\n\n // 8. Create Route types\n // E.g. `type `crawlerName`Route = CrawleeOneRout<`CrawlerName`Ctx>`\n const routeKey = define(\n `${crawlerName}Route`,\n `${routeType}<${ctxKey}<TInput, TIO, Telem>, ${routerCtxKey}<TInput, TIO, Telem>>`,\n {\n typeArgs: ctxTypeArgs,\n }\n );\n\n // E.g. `type `crawlerName`RouteHandler = CrawleeOneRouteHandler<`CrawlerName`Ctx, CrawlerName`ActorRouterCtx>`\n const routeHandlerValue = `${routeHandler}<${ctxKey}<TInput, TIO, Telem>, ${routerCtxKey}<TInput, TIO, Telem>>`;\n const routeHandlerKey = define(`${crawlerName}RouteHandler`, routeHandlerValue, {\n typeArgs: ctxTypeArgs,\n });\n\n // E.g. `type `crawlerName`RouteWrapper = CrawleeOneRouteWrapper<`CrawlerName`Ctx, CrawlerName`ActorRouterCtx>`\n const routeWrapperKey = define(\n `${crawlerName}RouteWrapper`,\n `${routeWrapper}<${ctxKey}<TInput, TIO, Telem>, ${routerCtxKey}<TInput, TIO, Telem>>`,\n { typeArgs: ctxTypeArgs }\n );\n\n // E.g. `type `crawlerName`Matcher = CrawleeOneRouteMatcher<`CrawlerName`Ctx, CrawlerName`ActorRouterCtx>`\n const routeMatcherKey = define(\n `${crawlerName}RouteMatcher`,\n `${routeMatcher}<${ctxKey}<TInput, TIO, Telem>, ${routerCtxKey}<TInput, TIO, Telem>>`,\n { typeArgs: ctxTypeArgs }\n );\n\n // E.g. `type `crawlerName`Matcher = CrawleeOneRouteMatcher<`CrawlerName`Ctx, CrawlerName`ActorRouterCtx>`\n const routeMatcherFnKey = define(\n `${crawlerName}RouteMatcherFn`,\n `${routeMatcherFn}<${ctxKey}<TInput, TIO, Telem>, ${routerCtxKey}<TInput, TIO, Telem>>`,\n { typeArgs: ctxTypeArgs }\n );\n\n // 9. Create Crawler hooks\n\n // NOTE: Type for before/after handler is the same as for handlers\n // E.g. `type `CrawlerName`OnBeforeHandler = CrawleeOneRouteHandler<CheerioCrawlingContext, ProfesiaRouterContext>`\n // E.g. `type `CrawlerName`OnAfterHandler = CrawleeOneRouteHandler<CheerioCrawlingContext, ProfesiaRouterContext>`\n const onBeforeHandlerKey = define(`${crawlerName}OnBeforeHandler`, routeHandlerValue, {\n typeArgs: ctxTypeArgs,\n });\n const onAfterHandlerKey = define(`${crawlerName}OnAfterHandler`, routeHandlerValue, {\n typeArgs: ctxTypeArgs,\n });\n\n // type `CrawlerName`OnReady = <TIO, Telem>(actor: CrawleeOneActorInst<`CrawlerName`Label, AllActorInputs, TIO, Telem, `type`CrawlingContext>) => MaybePromise<void>;\n const onReadyKey = define(\n `${crawlerName}OnReady`,\n `(actor: ${actorCtxKey}<TInput, TIO, Telem>) => ${maybeP}<void>;`,\n { kind: 'typeFunc', typeArgs: ctxTypeArgs }\n );\n });\n\n const finalImports = Object.entries(imports).reduce<Record<string, string>>(\n (agg, [pkg, entriesSet]) => {\n const entries = [...entriesSet.values()];\n const formattedEntries = entries.map((e) => e.name).join(', ');\n const typeStr = entries.every((e) => e.typeOnly) ? 'type ' : '';\n agg[pkg] = `import ${typeStr}{ ${formattedEntries} } from \"${pkg}\"`;\n return agg;\n },\n {}\n );\n\n return { imports: finalImports, definitions };\n};\n\n/**\n * Generate types for CrawleeOne given a config.\n *\n * Config can be passed directly, or as the path to the config file.\n * If the config is omitted, it is automatically searched for using CosmicConfig.\n */\nexport const generateTypes = async (outfile: string, configOrPath?: CrawleeOneConfig | string) => {\n const config =\n !configOrPath || typeof configOrPath === 'string'\n ? await loadConfig(configOrPath)\n : configOrPath;\n validateConfig(config);\n\n /* eslint-disable-next-line @typescript-eslint/no-non-null-assertion */\n const { imports, definitions } = await parseTypesFromSchema(config!.schema);\n const fileContent =\n Object.values(imports).join('\\n') + '\\n\\n\\n' + Object.values(definitions).join('\\n\\n');\n\n const outdir = path.dirname(outfile);\n await fsp.mkdir(outdir, { recursive: true });\n await fsp.writeFile(outfile, fileContent, 'utf-8');\n\n console.log(`Done generating types to ${outfile}`);\n};\n"]}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"scrapeListing.js","sourceRoot":"","sources":["../../../../src/lib/actions/scrapeListing.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,mCAAuC;AAEvC,6CAA+D;AAC/D,yCAA8C;AA8G9C;;;;GAIG;AACH,MAAM,mBAAmB,GAAG,CAA8B,EACxD,OAAO,EACP,OAAO,GAAG,EAAE,EACZ,iBAAiB,EACjB,cAAc,EACd,eAAe,EACf,GAAG,GACsC,EAA2B,EAAE;IACtE,IAAI,YAAY,GAAwB,OAAO,CAAC;IAEhD,MAAM,6BAA6B,GAAG,GAAS,EAAE;QAC/C,MAAM,aAAa,GAAG,MAAM,IAAA,sBAAc,EAAC,YAAY,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,YAAY,EAAE,CAAC,CAAC;QAC5F,OAAO,IAAA,sBAAa,EAAC,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;IAChD,CAAC,CAAA,CAAC;IAEF,MAAM,QAAQ,GAAG,GAAS,EAAE;QAC1B,MAAM,SAAS,GAAG,MAAM,IAAA,sBAAc,EAAC,YAAY,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC;QACpF,OAAO,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACjC,CAAC,CAAA,CAAC;IAEF,MAAM,YAAY,GAAG,GAAS,EAAE;QAC9B,MAAM,0BAA0B,GAAG,MAAM,6BAA6B,EAAE,CAAC;QACzE,OAAO,0BAA0B,GAAG,CAAC,CAAC,CAAC;IACzC,CAAC,CAAA,CAAC;IAEF,MAAM,SAAS,GAAG,GAAS,EAAE;QAC3B,6EAA6E;QAC7E,gDAAgD;QAChD,wCAAwC;QACxC,mDAAmD;QACnD,wCAAwC;QACxC,wCAAwC;QACxC,wCAAwC;QACxC,iCAAiC;QACjC,wCAAwC;QACxC,EAAE;QACF,uEAAuE;QACvE,8DAA8D;QAC9D,sCAAsC;QACtC,wCAAwC;QACxC,EAAE;QACF,sEAAsE;QACtE,iBAAiB;QACjB,0CAA0C;QAC1C,0CAA0C;QAE1C,MAAM,UAAU,GAAG,MAAM,IAAA,sBAAc,EAAC,YAAY,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;QACtF,IAAI,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC;YAAE,OAAO,GAAG,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;QAErE,MAAM,0BAA0B,GAAG,MAAM,6BAA6B,EAAE,CAAC;QACzE,IAAI,0BAA0B,KAAK,CAAC,CAAC;YACnC,MAAM,KAAK,CAAC,uDAAuD,CAAC,CAAC;QAEvE,MAAM,iBAAiB,GAAG,YAAY,CAAC,0BAA0B,CAAC,CAAC;QACnE,MAAM,cAAc,GAAG,YAAY,CAAC,KAAK,CAAC,0BAA0B,GAAG,CAAC,CAAC,CAAC;QAE1E,GAAG,CAAC,IAAI,CAAC,+BAA+B,CAAC,CAAC;QAC1C,MAAM,iBAAiB,CAAC,SAAS,EAAE,CAAC;QACpC,KAAK,MAAM,MAAM,IAAI,cAAc,EAAE;YACnC,MAAM,MAAM,CAAC,UAAU,EAAE,CAAC;YAC1B,MAAM,MAAM,CAAC,SAAS,EAAE,CAAC;SAC1B;IACH,CAAC,CAAA,CAAC;IAEF,+CAA+C;IAC/C,MAAM,SAAS,GAAG,GAAS,EAAE;QAC3B,MAAM,UAAU,EAAE,CAAC;QAEnB,8CAA8C;QAC9C,YAAY,GAAG,EAAE,CAAC;QAClB,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE;YAC5B,MAAM,eAAe,GAAG,iBAAiB;gBACvC,CAAC,CAAC,MAAM,iBAAiB,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC;gBACnD,CAAC,CAAC,IAAI,CAAC;YACT,IAAI,CAAC,eAAe,EAAE;gBACpB,GAAG,CAAC,IAAI,CAAC,wBAAwB,MAAM,CAAC,IAAI,sBAAsB,CAAC,CAAC;gBACpE,MAAM;aACP;YAED,IAAI,CAAC,MAAM,CAAC,QAAQ,EAAE;gBACpB,GAAG,CAAC,IAAI,CAAC,oBAAoB,MAAM,CAAC,IAAI,GAAG,CAAC,CAAC;gBAC7C,MAAM,MAAM,CAAC,SAAS,EAAE,CAAC;aAC1B;iBAAM;gBACL,GAAG,CAAC,IAAI,CAAC,WAAW,MAAM,CAAC,IAAI,qDAAqD,CAAC,CAAC;aACvF;YAED,YAAY,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;SAC3B;QAED,GAAG,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAC;QACjC,MAAM,CAAA,eAAe,aAAf,eAAe,uBAAf,eAAe,CAAG,OAAO,CAAC,CAAA,CAAC;IACnC,CAAC,CAAA,CAAC;IAEF,yBAAyB;IACzB,MAAM,UAAU,GAAG,GAAS,EAAE;QAC5B,GAAG,CAAC,IAAI,CAAC,wBAAwB,CAAC,CAAC;QACnC,MAAM,CAAA,cAAc,aAAd,cAAc,uBAAd,cAAc,CAAG,OAAO,CAAC,CAAA,CAAC;QAChC,YAAY,GAAG,OAAO,CAAC;QACvB,GAAG,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC;IAC1C,CAAC,CAAA,CAAC;IAEF,OAAO;QACL,SAAS;QACT,SAAS;QACT,YAAY;QACZ,QAAQ;KACT,CAAC;AACJ,CAAC,CAAC;AAEF,yFAAyF;AAClF,MAAM,oBAAoB,GAAG,CAClC,OAAgD,EAChD,EAAE;IACF,MAAM,EACJ,OAAO,EACP,SAAS,EACT,gBAAgB,GAAG,KAAK,EACxB,GAAG,EACH,MAAM,EACN,UAAU,EACV,iBAAiB,EAEjB,OAAO,GAAG,EAAE,EACZ,iBAAiB,EACjB,kBAAkB,GAAG,CAAC,EACtB,kBAAkB,GAAG,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,EACnD,eAAe,EACf,cAAc,EAEd,cAAc,EACd,qBAAqB,GAAG,CAAC,EACzB,qBAAqB,GAAG,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,EACtD,oBAAoB,EAEpB,cAAc,EACd,YAAY,GAAG,GAAG,GACnB,GAAG,OAAO,CAAC;IAEZ,4DAA4D;IAC5D,MAAM,KAAK,GAAc,EAAE,CAAC;IAE5B,MAAM,IAAA,sBAAc,EAAC,SAAS,EAAE,CAAO,QAAQ,EAAE,KAAK,EAAE,EAAE;QACxD,IAAI,gBAAgB,IAAI,KAAK,GAAG,CAAC;YAAE,OAAO;QAE1C,MAAM,KAAK,GAAG,GAAG,QAAQ,KAAK,KAAK,GAAG,CAAC,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC;QAE/D,IAAI,eAAe,GAAG,KAAK,CAAC;QAC5B,MAAM,KAAK,GAAG,GAAG,EAAE,GAAG,eAAe,GAAG,IAAI,CAAA,CAAC,CAAC,CAAC,CAAC,kBAAkB;QAElE,0CAA0C;QAC1C,IAAI,SAAS,GAAmC,IAAI,CAAC;QACrD,MAAM,SAAS,GAAG,GAA4C,EAAE;;YAAC,OAAA,CAAC;gBAChE,OAAO;gBACP,GAAG;gBACH,QAAQ;gBACR,OAAO;gBACP,eAAe,EAAE,MAAA,SAAS,aAAT,SAAS,uBAAT,SAAS,CAAE,SAAS,mCAAI,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC;gBACnD,KAAK;aACN,CAAC,CAAA;SAAA,CAAC;QAEH,GAAG,CAAC,KAAK,CAAC,kBAAkB,KAAK,EAAE,CAAC,CAAC;QACrC,IAAA,iBAAW,EAAC,QAAkB,CAAC,CAAC;QAChC,GAAG,CAAC,IAAI,CAAC,kBAAkB,KAAK,EAAE,CAAC,CAAC;QACpC,MAAM,CAAA,UAAU,aAAV,UAAU,uBAAV,UAAU,CAAG,SAAS,EAAE,EAAE,QAAQ,CAAC,CAAA,CAAC;QAC1C,GAAG,CAAC,KAAK,CAAC,0BAA0B,KAAK,EAAE,CAAC,CAAC;QAE7C,SAAS,GAAG,mBAAmB,CAAC;YAC9B,OAAO,EAAE,SAAS,EAAE;YACpB,OAAO;YACP,iBAAiB;YACjB,eAAe;YACf,cAAc;YACd,GAAG;SACJ,CAAC,CAAC;QAEH,GAAG,CAAC,KAAK,CAAC,2CAA2C,KAAK,EAAE,CAAC,CAAC,CAAC,kBAAkB;QACjF,MAAM,CAAA,iBAAiB,aAAjB,iBAAiB,uBAAjB,iBAAiB,CAAG,SAAS,EAAE,CAAC,CAAA,CAAC;QACvC,GAAG,CAAC,KAAK,CAAC,gDAAgD,KAAK,GAAG,CAAC,CAAC,CAAC,kBAAkB;QAEvF,MAAM,cAAc,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAElE,IAAI,wBAAwB,GAAG,IAAI,CAAC;QACpC,OAAO,wBAAwB,IAAI,CAAC,eAAe,EAAE;YACnD,cAAc;YACd,0CAA0C;YAC1C,GAAG,CAAC,IAAI,CAAC,8BAA8B,KAAK,EAAE,CAAC,CAAC;YAChD,MAAM,IAAA,kBAAU,EACd,GAAS,EAAE;gBACT,IAAI,CAAC,SAAS;oBAAE,MAAM,KAAK,CAAC,+DAA+D,KAAK,EAAE,CAAC,CAAC,CAAC,kBAAkB;gBAEvH,MAAM,cAAc,GAAG,MAAM,SAAS,CAAC,QAAQ,EAAE,CAAC;gBAClD,IAAI,CAAC,cAAc,IAAI,CAAC,cAAc,EAAE;oBACtC,GAAG,CAAC,IAAI,CAAC,+BAA+B,KAAK,EAAE,CAAC,CAAC;oBACjD,OAAO;iBACR;gBAED,GAAG,CAAC,KAAK,CAAC,2BAA2B,KAAK,EAAE,CAAC,CAAC;gBAC9C,MAAM,SAAS,CAAC,SAAS,EAAE,CAAC;gBAC5B,MAAM,SAAS,CAAC,SAAS,EAAE,CAAC;gBAC5B,GAAG,CAAC,KAAK,CAAC,gCAAgC,KAAK,EAAE,CAAC,CAAC;YACrD,CAAC,CAAA,EACD;gBACE,UAAU,EAAE,kBAAkB;gBAC9B,OAAO,EAAE,CAAC,GAAG,EAAE,UAAU,EAAE,EAAE,CAAC,kBAAkB,CAAC,SAAS,EAAE,EAAE,GAAG,EAAE,UAAU,CAAC;aAC/E,CACF,CAAC;YAEF,IAAI,iBAAiB,GAAG,IAAI,CAAC;YAC7B,OAAO,iBAAiB,IAAI,CAAC,eAAe,EAAE;gBAC5C,kBAAkB;gBAClB,IAAI,UAAU,GAAG,WAAW,CAAC;gBAC7B,IAAI,MAAM,EAAE;oBACV,GAAG,CAAC,KAAK,CAAC,0BAA0B,KAAK,EAAE,CAAC,CAAC;oBAC7C,UAAU,GAAG,MAAM,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;oBACvC,GAAG,CAAC,KAAK,CAAC,+BAA+B,KAAK,EAAE,CAAC,CAAC;iBACnD;gBACD,MAAM,SAAS,GAAG,GAAG,KAAK,KAAK,UAAU,GAAG,CAAC;gBAE7C,qBAAqB;gBACrB,GAAG,CAAC,IAAI,CAAC,8BAA8B,SAAS,EAAE,CAAC,CAAC;gBACpD,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,IAAA,kBAAU,EACjC,CAAO,UAAU,EAAE,EAAE,kDAAC,OAAA,cAAc,CAAC,SAAS,EAAE,EAAE,UAAU,CAAC,CAAA,GAAA,EAC7D;oBACE,UAAU,EAAE,qBAAqB;oBACjC,OAAO,EAAE,CAAC,GAAG,EAAE,UAAU,EAAE,EAAE,CAAC,qBAAqB,CAAC,SAAS,EAAE,EAAE,GAAG,EAAE,UAAU,CAAC;iBAClF,CACF,CAAC;gBACF,GAAG,CAAC,KAAK,CAAC,mCAAmC,SAAS,EAAE,CAAC,CAAC;gBAE1D,MAAM,SAAS,GAAG,MAAM,aAAN,MAAM,cAAN,MAAM,GAAI,EAAE,CAAC;gBAC/B,KAAK,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,CAAC;gBACzB,GAAG,CAAC,IAAI,CAAC,SAAS,SAAS,CAAC,MAAM,kBAAkB,SAAS,EAAE,CAAC,CAAC;gBAEjE,6CAA6C;gBAC7C,IAAI,gBAAgB,IAAI,eAAe,EAAE;oBACvC,iBAAiB,GAAG,KAAK,CAAC;oBAC1B,IAAI,gBAAgB;wBAAE,GAAG,CAAC,IAAI,CAAC,6DAA6D,SAAS,EAAE,CAAC,CAAC,CAAC,kBAAkB;yBACvH,IAAI,eAAe;wBAAE,GAAG,CAAC,IAAI,CAAC,iBAAiB,SAAS,EAAE,CAAC,CAAC;oBACjE,SAAS;iBACV;gBAED,GAAG,CAAC,KAAK,CAAC,8CAA8C,SAAS,EAAE,CAAC,CAAC,CAAC,kBAAkB;gBACxF,MAAM,CAAA,oBAAoB,aAApB,oBAAoB,uBAApB,oBAAoB,CAAG,SAAS,EAAE,EAAE,SAAS,CAAC,CAAA,CAAC;gBACrD,GAAG,CAAC,KAAK,CAAC,mDAAmD,SAAS,EAAE,CAAC,CAAC,CAAC,kBAAkB;gBAE7F,IAAI,cAAc,IAAI,CAAC,eAAe,EAAE;oBACtC,wFAAwF;oBACxF,IAAI;wBACF,GAAG,CAAC,IAAI,CAAC,oCAAoC,SAAS,EAAE,CAAC,CAAC;wBAC1D,MAAM,cAAc,CAAC,SAAS,EAAE,EAAE,SAAS,CAAC,CAAC;wBAC7C,GAAG,CAAC,KAAK,CAAC,yCAAyC,SAAS,EAAE,CAAC,CAAC,CAAC,kBAAkB;qBACpF;oBAAC,OAAO,CAAC,EAAE;wBACV,GAAG,CAAC,IAAI,CAAC,2CAA2C,SAAS,EAAE,CAAC,CAAC,CAAC,kBAAkB;wBACpF,GAAG,CAAC,KAAK,CAAE,CAAW,CAAC,QAAQ,EAAE,CAAC,CAAC;wBACnC,iBAAiB,GAAG,KAAK,CAAC;qBAC3B;iBACF;qBAAM;oBACL,IAAI,eAAe;wBAAE,GAAG,CAAC,IAAI,CAAC,iBAAiB,SAAS,EAAE,CAAC,CAAC;oBAC5D,iBAAiB,GAAG,KAAK,CAAC;iBAC3B;gBAED,8CAA8C;gBAC9C,MAAM,IAAI,OAAO,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,UAAU,CAAC,GAAG,EAAE,YAAY,CAAC,CAAC,CAAC;aAC3D;YAED,sEAAsE;YACtE,GAAG,CAAC,KAAK,CAAC,8DAA8D,KAAK,EAAE,CAAC,CAAC;YACjF,wBAAwB,GAAG,cAAc,IAAI,CAAC,MAAM,SAAS,CAAC,YAAY,EAAE,CAAC,CAAC;YAC9E,GAAG,CAAC,KAAK,CAAC,mEAAmE,KAAK,EAAE,CAAC,CAAC;YAEtF,IAAI,wBAAwB,EAAE;gBAC5B,IAAI,CAAC,eAAe;oBAAE,GAAG,CAAC,IAAI,CAAC,oEAAoE,KAAK,EAAE,CAAC,CAAC,CAAC,kBAAkB;;oBAC1H,GAAG,CAAC,IAAI,CAAC,+FAA+F,KAAK,EAAE,CAAC,CAAC,CAAC,kBAAkB;aAC1I;;gBAAM,GAAG,CAAC,IAAI,CAAC,uDAAuD,KAAK,EAAE,CAAC,CAAC,CAAC,kBAAkB;SACpG;QACD,GAAG,CAAC,IAAI,CAAC,gBAAgB,KAAK,EAAE,CAAC,CAAC;IACpC,CAAC,CAAA,CAAC,CAAC;IACH,OAAO,KAAK,CAAC;AACf,CAAC,CAAA,CAAC;AAxKW,QAAA,oBAAoB,wBAwK/B","sourcesContent":["import { findLastIndex } from 'lodash';\n\nimport { serialAsyncMap, retryAsync } from '../../utils/async';\nimport { validateUrl } from '../../utils/url';\nimport type { MaybePromise } from '../../utils/types';\n\n// TODO - Clean this up and merge it into PageLib\n\nexport interface ListingLogger {\n debug: (msg: string, data?: any) => void;\n info: (msg: string, data?: any) => void;\n warning: (msg: string, data?: any) => void;\n error: (msg: string, data?: any) => void;\n}\n\nexport interface ListingPageFilter {\n name: string;\n disabled?: boolean;\n initState: () => MaybePromise<boolean>;\n resetState: () => MaybePromise<void>;\n nextState: () => MaybePromise<void>;\n hasNextState: () => MaybePromise<boolean>;\n hasState: () => MaybePromise<boolean>;\n loadState: () => MaybePromise<void>;\n}\n\nexport interface ListingFiltersSetupOptions<Ctx extends object, UrlType> {\n context: ListingPageScraperContext<Ctx, UrlType>;\n filters?: ListingPageFilter[];\n shouldApplyFilter?: (\n context: ListingPageScraperContext<Ctx, UrlType>,\n filter: ListingPageFilter,\n filters: ListingPageFilter[]\n ) => MaybePromise<boolean>;\n onResetFilters?: (context: ListingPageScraperContext<Ctx, UrlType>) => MaybePromise<void>;\n onFiltersLoaded?: (context: ListingPageScraperContext<Ctx, UrlType>) => MaybePromise<void>;\n log: ListingLogger;\n}\n\ntype ListingFilterController = Pick<ListingPageFilter, 'loadState' | 'nextState' | 'hasNextState' | 'hasState'>; // prettier-ignore\n\nexport interface ListingPageScraperContext<Ctx extends object, UrlType> {\n context: Ctx;\n log: ListingLogger;\n startUrl: UrlType;\n filters: ListingPageFilter[];\n /** Use this if you need to load filters again (eg after reloading page manually) */\n loadFilterState: () => MaybePromise<void>;\n /** Call this function from any callback to stop scraping */\n abort: () => void;\n}\n\n// prettier-ignore\nexport interface ListingPageScraperOptions<Ctx extends object, UrlType> extends Omit<ListingFiltersSetupOptions<Ctx, UrlType>, 'context'> {\n context: Ctx;\n startUrls: UrlType[];\n listingCountOnly?: boolean;\n /** Get ID of the current page in the pagination, so it can be logged */\n pageId?: (context: ListingPageScraperContext<Ctx, UrlType>) => MaybePromise<string>;\n log: ListingLogger;\n\n onNavigate?: (context: ListingPageScraperContext<Ctx, UrlType>, url: UrlType) => MaybePromise<void>;\n /**\n * Hook triggered after navigating to the url using Page.goto().\n *\n * One use of this hook is to conditionally disable/enable filters based on the page content.\n **/\n onAfterNavigation?: (context: ListingPageScraperContext<Ctx, UrlType>) => MaybePromise<void>;\n\n /** How many attempts are retried after filters failed to load. Defaults to 3 */\n loadFiltersRetries?: number;\n /**\n * Hook triggered after a failed attempt at loading listings page filters.\n *\n * One use of this hook is to reload the page on failed attemp in case something didn't load correctly.\n **/\n onLoadFiltersError?: (\n context: ListingPageScraperContext<Ctx, UrlType>,\n error: any,\n retryIndex: number\n ) => MaybePromise<void>;\n\n /** Main logic to extract entries from a page */\n extractEntries: (context: ListingPageScraperContext<Ctx, UrlType>, retryIndex: number) => MaybePromise<UrlType[]>;\n /** How many attempts are retried after failed to scrape entries from a listing. Defaults to 3 */\n extractEntriesRetries?: number;\n /**\n * Hook triggered after a failed attempt at scraping entries from a listing.\n *\n * One use of this hook is to reload the page on failed attemp in case something didn't load correctly.\n **/\n onExtractEntriesError?: (\n context: ListingPageScraperContext<Ctx, UrlType>,\n error: any,\n retryIndex: number\n ) => MaybePromise<void>;\n onExtractEntriesDone?: (\n context: ListingPageScraperContext<Ctx, UrlType>,\n entries: UrlType[] | null\n ) => MaybePromise<void>;\n\n /**\n * If goToNextPage hook is defined, it will be called after each page. To indicate that there's no more\n * pages left, throw an error.\n **/\n onGoToNextPage?: (\n context: ListingPageScraperContext<Ctx, UrlType>,\n entries: UrlType[] | null\n ) => MaybePromise<void>;\n /** How long to wait after we've navigated to the next page and before we start extracting? */\n nextPageWait?: number;\n}\n\n/**\n * Given configuration for listing page filters, set up functions to\n * navigate through the different states of filters, to allow to paginate\n * through all states.\n */\nconst setupListingFilters = <Ctx extends object, UrlType>({\n context,\n filters = [],\n shouldApplyFilter,\n onResetFilters,\n onFiltersLoaded,\n log,\n}: ListingFiltersSetupOptions<Ctx, UrlType>): ListingFilterController => {\n let filtersStack: ListingPageFilter[] = filters;\n\n const getNextFilterStateChangeIndex = async () => {\n const hasNextStates = await serialAsyncMap(filtersStack, (filter) => filter.hasNextState());\n return findLastIndex(hasNextStates, (x) => x);\n };\n\n const hasState = async () => {\n const hasStates = await serialAsyncMap(filtersStack, (filter) => filter.hasState());\n return hasStates.some(Boolean);\n };\n\n const hasNextState = async () => {\n const nextFilterStateChangeIndex = await getNextFilterStateChangeIndex();\n return nextFilterStateChangeIndex > -1;\n };\n\n const nextState = async () => {\n // Imagine we have 4 filters, each has 3 states (eg 3 options to select from)\n // We start with all filters in the first state:\n // State 1: F1(1), F2(1), F3(1), F4(1)\n // As we progress, we increment it akin to numbers:\n // State 2: F1(1), F2(1), F3(1), F4(2)\n // State 3: F1(1), F2(1), F3(1), F4(3)\n // State 4: F1(1), F2(1), F3(2), F4(1)\n // All the way to the last state:\n // State n: F1(3), F2(3), F3(3), F4(3)\n //\n // When we want move to a next state, we identify the RIGHT-most filter\n // whose state can be incremented (in this case we select F2):\n // YES YES NO NO\n // State x: F1(1), F2(2), F3(3), F4(3)\n //\n // When we increment a filter state, all the other filter to the RIGHT\n // will be reset:\n // State x: F1(1), F2(2), F3(3), F4(3)\n // State x+1: F1(1), F2(3), F3(1), F4(1)\n\n const initStates = await serialAsyncMap(filtersStack, (filter) => filter.initState());\n if (initStates.some(Boolean)) return log.info('Initialised filters');\n\n const nextFilterStateChangeIndex = await getNextFilterStateChangeIndex();\n if (nextFilterStateChangeIndex === -1)\n throw Error('Cannot select next filter state - reached end of list');\n\n const filterToNextState = filtersStack[nextFilterStateChangeIndex];\n const filtersToReset = filtersStack.slice(nextFilterStateChangeIndex + 1);\n\n log.info('Setting filters to next state');\n await filterToNextState.nextState();\n for (const filter of filtersToReset) {\n await filter.resetState();\n await filter.nextState();\n }\n };\n\n /** Load current filter state in the webpage */\n const loadState = async () => {\n await resetState();\n\n // Load filters one by one, and only if needed\n filtersStack = [];\n for (const filter of filters) {\n const shouldUseFilter = shouldApplyFilter\n ? await shouldApplyFilter(context, filter, filters)\n : true;\n if (!shouldUseFilter) {\n log.info(`Not applying filter \"${filter.name}\" or further filters`);\n break;\n }\n\n if (!filter.disabled) {\n log.info(`Applying filter \"${filter.name}\"`);\n await filter.loadState();\n } else {\n log.info(`Filter \"${filter.name}\" recognised but not applied because it is disabled`);\n }\n\n filtersStack.push(filter);\n }\n\n log.info(`Done loading filters`);\n await onFiltersLoaded?.(context);\n };\n\n /** Reset filter state */\n const resetState = async () => {\n log.info(`Resetting filter state`);\n await onResetFilters?.(context);\n filtersStack = filters;\n log.info(`Resetting filter state done`);\n };\n\n return {\n loadState,\n nextState,\n hasNextState,\n hasState,\n };\n};\n\n/** Get entries from a listing page (eg URLs to profiles that should be scraped later) */\nexport const scrapeListingEntries = async <Ctx extends object, UrlType>(\n options: ListingPageScraperOptions<Ctx, UrlType>\n) => {\n const {\n context,\n startUrls,\n listingCountOnly = false,\n log,\n pageId,\n onNavigate,\n onAfterNavigation,\n\n filters = [],\n shouldApplyFilter,\n loadFiltersRetries = 3,\n onLoadFiltersError = (_, err) => console.error(err),\n onFiltersLoaded,\n onResetFilters,\n\n extractEntries,\n extractEntriesRetries = 3,\n onExtractEntriesError = (_, err) => console.error(err),\n onExtractEntriesDone,\n\n onGoToNextPage,\n nextPageWait = 500,\n } = options;\n\n /** Collection of ALL urls across all pages and startUrls */\n const links: UrlType[] = [];\n\n await serialAsyncMap(startUrls, async (startUrl, index) => {\n if (listingCountOnly && index > 0) return;\n\n const logId = `${startUrl} (${index + 1}/${startUrls.length})`;\n\n let userAskedToStop = false;\n const abort = () => { userAskedToStop = true }; // prettier-ignore\n\n // Prepare context shared across all hooks\n let filterObj: ListingFilterController | null = null;\n const genCtxArg = (): ListingPageScraperContext<Ctx, UrlType> => ({\n context,\n log,\n startUrl,\n filters,\n loadFilterState: filterObj?.loadState ?? (() => {}),\n abort,\n });\n\n log.debug(`Validating URL ${logId}`);\n validateUrl(startUrl as string);\n log.info(`Navigating URL ${logId}`);\n await onNavigate?.(genCtxArg(), startUrl);\n log.debug(`Done navigating to URL ${logId}`);\n\n filterObj = setupListingFilters({\n context: genCtxArg(),\n filters,\n shouldApplyFilter,\n onFiltersLoaded,\n onResetFilters,\n log,\n });\n\n log.debug(`Calling onAfterNavigation callback. URL ${logId}`); // prettier-ignore\n await onAfterNavigation?.(genCtxArg());\n log.debug(`Done calling onAfterNavigation callback. URL ${logId})`); // prettier-ignore\n\n const isUsingFilters = filters.some((filter) => !filter.disabled);\n\n let hasFilterStatesToProcess = true;\n while (hasFilterStatesToProcess && !userAskedToStop) {\n // Filter loop\n // Load filters before we start paginating\n log.info(`Setting up filters for URL ${logId}`);\n await retryAsync(\n async () => {\n if (!filterObj) throw Error(`Filter controller is missing. This should never happen. URL ${logId}`); // prettier-ignore\n\n const filterHasState = await filterObj.hasState();\n if (!isUsingFilters || !filterHasState) {\n log.info(`Not loading filters for URL ${logId}`);\n return;\n }\n\n log.debug(`Loading filters for URL ${logId}`);\n await filterObj.nextState();\n await filterObj.loadState();\n log.debug(`Done loading filters for URL ${logId}`);\n },\n {\n maxRetries: loadFiltersRetries,\n onError: (err, retryIndex) => onLoadFiltersError(genCtxArg(), err, retryIndex),\n }\n );\n\n let nextPageAvailable = true;\n while (nextPageAvailable && !userAskedToStop) {\n // Pagination loop\n let currPageId = 'next page';\n if (pageId) {\n log.debug(`Loading pageId for URL ${logId}`);\n currPageId = await pageId(genCtxArg());\n log.debug(`Done loading pageId for URL ${logId}`);\n }\n const pageLogId = `${logId} (${currPageId})`;\n\n // Extract page links\n log.info(`Extracting links from page ${pageLogId}`);\n const { result } = await retryAsync(\n async (retryIndex) => extractEntries(genCtxArg(), retryIndex),\n {\n maxRetries: extractEntriesRetries,\n onError: (err, retryIndex) => onExtractEntriesError(genCtxArg(), err, retryIndex),\n }\n );\n log.debug(`Done extracting links from page ${pageLogId}`);\n\n const pageLinks = result ?? [];\n links.push(...pageLinks);\n log.info(`Found ${pageLinks.length} links on page ${pageLogId}`);\n\n // Leave after printing the count or on abort\n if (listingCountOnly || userAskedToStop) {\n nextPageAvailable = false;\n if (listingCountOnly) log.info(`Debugging mode. Entries are not scraped. Leaving now. URL ${pageLogId}`); // prettier-ignore\n else if (userAskedToStop) log.info(`Aborting. URL ${pageLogId}`);\n continue;\n }\n\n log.debug(`Calling onExtractEntriesDone callback. URL ${pageLogId}`); // prettier-ignore\n await onExtractEntriesDone?.(genCtxArg(), pageLinks);\n log.debug(`Done calling onExtractEntriesDone callback. URL ${pageLogId}`); // prettier-ignore\n\n if (onGoToNextPage && !userAskedToStop) {\n // If goToNextPage hook is defined, this will be called after each page, until it errors\n try {\n log.info(`Navigating to next page from URL ${pageLogId}`);\n await onGoToNextPage(genCtxArg(), pageLinks);\n log.debug(`Done navigating to next page from URL ${pageLogId}`); // prettier-ignore\n } catch (e) {\n log.info(`Failed navigating to next page from URL ${pageLogId}`); // prettier-ignore\n log.error((e as Error).toString());\n nextPageAvailable = false;\n }\n } else {\n if (userAskedToStop) log.info(`Aborting. URL ${pageLogId}`);\n nextPageAvailable = false;\n }\n\n // Wait before we start scraping the next page\n await new Promise((res) => setTimeout(res, nextPageWait));\n }\n\n // Break out if we're not using filters or we've gone through them all\n log.debug(`Checking if there are more filter states available for URL ${logId}`);\n hasFilterStatesToProcess = isUsingFilters && (await filterObj.hasNextState());\n log.debug(`Done checking if there are more filter states available for URL ${logId}`);\n\n if (hasFilterStatesToProcess) {\n if (!userAskedToStop) log.info(`Will repeat scraping this URL with different filter setting. URL ${logId}`); // prettier-ignore\n else log.info(`There are unprocessed filter setting remaining for this URL, but stopping due to abort. URL ${logId}`); // prettier-ignore\n } else log.info(`No filter setting remain for scraping this URL. URL ${logId}`); // prettier-ignore\n }\n log.info(`Finished URL ${logId}`);\n });\n return links;\n};\n"]}
|
|
1
|
+
{"version":3,"file":"scrapeListing.js","sourceRoot":"","sources":["../../../../src/lib/actions/scrapeListing.ts"],"names":[],"mappings":";;;;;;;;;;;;AAAA,mCAAuC;AAEvC,6CAA+D;AAC/D,yCAA8C;AA8G9C;;;;GAIG;AACH,MAAM,mBAAmB,GAAG,CAA8B,EACxD,OAAO,EACP,OAAO,GAAG,EAAE,EACZ,iBAAiB,EACjB,cAAc,EACd,eAAe,EACf,GAAG,GACsC,EAA2B,EAAE;IACtE,IAAI,YAAY,GAAwB,OAAO,CAAC;IAEhD,MAAM,6BAA6B,GAAG,GAAS,EAAE;QAC/C,MAAM,aAAa,GAAG,MAAM,IAAA,sBAAc,EAAC,YAAY,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,YAAY,EAAE,CAAC,CAAC;QAC5F,OAAO,IAAA,sBAAa,EAAC,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;IAChD,CAAC,CAAA,CAAC;IAEF,MAAM,QAAQ,GAAG,GAAS,EAAE;QAC1B,MAAM,SAAS,GAAG,MAAM,IAAA,sBAAc,EAAC,YAAY,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC;QACpF,OAAO,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACjC,CAAC,CAAA,CAAC;IAEF,MAAM,YAAY,GAAG,GAAS,EAAE;QAC9B,MAAM,0BAA0B,GAAG,MAAM,6BAA6B,EAAE,CAAC;QACzE,OAAO,0BAA0B,GAAG,CAAC,CAAC,CAAC;IACzC,CAAC,CAAA,CAAC;IAEF,MAAM,SAAS,GAAG,GAAS,EAAE;QAC3B,6EAA6E;QAC7E,gDAAgD;QAChD,wCAAwC;QACxC,mDAAmD;QACnD,wCAAwC;QACxC,wCAAwC;QACxC,wCAAwC;QACxC,iCAAiC;QACjC,wCAAwC;QACxC,EAAE;QACF,uEAAuE;QACvE,8DAA8D;QAC9D,sCAAsC;QACtC,wCAAwC;QACxC,EAAE;QACF,sEAAsE;QACtE,iBAAiB;QACjB,0CAA0C;QAC1C,0CAA0C;QAE1C,MAAM,UAAU,GAAG,MAAM,IAAA,sBAAc,EAAC,YAAY,EAAE,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;QACtF,IAAI,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC;YAAE,OAAO,GAAG,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;QAErE,MAAM,0BAA0B,GAAG,MAAM,6BAA6B,EAAE,CAAC;QACzE,IAAI,0BAA0B,KAAK,CAAC,CAAC;YACnC,MAAM,KAAK,CAAC,uDAAuD,CAAC,CAAC;QAEvE,MAAM,iBAAiB,GAAG,YAAY,CAAC,0BAA0B,CAAC,CAAC;QACnE,MAAM,cAAc,GAAG,YAAY,CAAC,KAAK,CAAC,0BAA0B,GAAG,CAAC,CAAC,CAAC;QAE1E,GAAG,CAAC,IAAI,CAAC,+BAA+B,CAAC,CAAC;QAC1C,MAAM,iBAAiB,CAAC,SAAS,EAAE,CAAC;QACpC,KAAK,MAAM,MAAM,IAAI,cAAc,EAAE,CAAC;YACpC,MAAM,MAAM,CAAC,UAAU,EAAE,CAAC;YAC1B,MAAM,MAAM,CAAC,SAAS,EAAE,CAAC;QAC3B,CAAC;IACH,CAAC,CAAA,CAAC;IAEF,+CAA+C;IAC/C,MAAM,SAAS,GAAG,GAAS,EAAE;QAC3B,MAAM,UAAU,EAAE,CAAC;QAEnB,8CAA8C;QAC9C,YAAY,GAAG,EAAE,CAAC;QAClB,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,MAAM,eAAe,GAAG,iBAAiB;gBACvC,CAAC,CAAC,MAAM,iBAAiB,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC;gBACnD,CAAC,CAAC,IAAI,CAAC;YACT,IAAI,CAAC,eAAe,EAAE,CAAC;gBACrB,GAAG,CAAC,IAAI,CAAC,wBAAwB,MAAM,CAAC,IAAI,sBAAsB,CAAC,CAAC;gBACpE,MAAM;YACR,CAAC;YAED,IAAI,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC;gBACrB,GAAG,CAAC,IAAI,CAAC,oBAAoB,MAAM,CAAC,IAAI,GAAG,CAAC,CAAC;gBAC7C,MAAM,MAAM,CAAC,SAAS,EAAE,CAAC;YAC3B,CAAC;iBAAM,CAAC;gBACN,GAAG,CAAC,IAAI,CAAC,WAAW,MAAM,CAAC,IAAI,qDAAqD,CAAC,CAAC;YACxF,CAAC;YAED,YAAY,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAC5B,CAAC;QAED,GAAG,CAAC,IAAI,CAAC,sBAAsB,CAAC,CAAC;QACjC,MAAM,CAAA,eAAe,aAAf,eAAe,uBAAf,eAAe,CAAG,OAAO,CAAC,CAAA,CAAC;IACnC,CAAC,CAAA,CAAC;IAEF,yBAAyB;IACzB,MAAM,UAAU,GAAG,GAAS,EAAE;QAC5B,GAAG,CAAC,IAAI,CAAC,wBAAwB,CAAC,CAAC;QACnC,MAAM,CAAA,cAAc,aAAd,cAAc,uBAAd,cAAc,CAAG,OAAO,CAAC,CAAA,CAAC;QAChC,YAAY,GAAG,OAAO,CAAC;QACvB,GAAG,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC;IAC1C,CAAC,CAAA,CAAC;IAEF,OAAO;QACL,SAAS;QACT,SAAS;QACT,YAAY;QACZ,QAAQ;KACT,CAAC;AACJ,CAAC,CAAC;AAEF,yFAAyF;AAClF,MAAM,oBAAoB,GAAG,CAClC,OAAgD,EAChD,EAAE;IACF,MAAM,EACJ,OAAO,EACP,SAAS,EACT,gBAAgB,GAAG,KAAK,EACxB,GAAG,EACH,MAAM,EACN,UAAU,EACV,iBAAiB,EAEjB,OAAO,GAAG,EAAE,EACZ,iBAAiB,EACjB,kBAAkB,GAAG,CAAC,EACtB,kBAAkB,GAAG,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,EACnD,eAAe,EACf,cAAc,EAEd,cAAc,EACd,qBAAqB,GAAG,CAAC,EACzB,qBAAqB,GAAG,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,EACtD,oBAAoB,EAEpB,cAAc,EACd,YAAY,GAAG,GAAG,GACnB,GAAG,OAAO,CAAC;IAEZ,4DAA4D;IAC5D,MAAM,KAAK,GAAc,EAAE,CAAC;IAE5B,MAAM,IAAA,sBAAc,EAAC,SAAS,EAAE,CAAO,QAAQ,EAAE,KAAK,EAAE,EAAE;QACxD,IAAI,gBAAgB,IAAI,KAAK,GAAG,CAAC;YAAE,OAAO;QAE1C,MAAM,KAAK,GAAG,GAAG,QAAQ,KAAK,KAAK,GAAG,CAAC,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC;QAE/D,IAAI,eAAe,GAAG,KAAK,CAAC;QAC5B,MAAM,KAAK,GAAG,GAAG,EAAE,GAAG,eAAe,GAAG,IAAI,CAAA,CAAC,CAAC,CAAC,CAAC,kBAAkB;QAElE,0CAA0C;QAC1C,IAAI,SAAS,GAAmC,IAAI,CAAC;QACrD,MAAM,SAAS,GAAG,GAA4C,EAAE;;YAAC,OAAA,CAAC;gBAChE,OAAO;gBACP,GAAG;gBACH,QAAQ;gBACR,OAAO;gBACP,eAAe,EAAE,MAAA,SAAS,aAAT,SAAS,uBAAT,SAAS,CAAE,SAAS,mCAAI,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC;gBACnD,KAAK;aACN,CAAC,CAAA;SAAA,CAAC;QAEH,GAAG,CAAC,KAAK,CAAC,kBAAkB,KAAK,EAAE,CAAC,CAAC;QACrC,IAAA,iBAAW,EAAC,QAAkB,CAAC,CAAC;QAChC,GAAG,CAAC,IAAI,CAAC,kBAAkB,KAAK,EAAE,CAAC,CAAC;QACpC,MAAM,CAAA,UAAU,aAAV,UAAU,uBAAV,UAAU,CAAG,SAAS,EAAE,EAAE,QAAQ,CAAC,CAAA,CAAC;QAC1C,GAAG,CAAC,KAAK,CAAC,0BAA0B,KAAK,EAAE,CAAC,CAAC;QAE7C,SAAS,GAAG,mBAAmB,CAAC;YAC9B,OAAO,EAAE,SAAS,EAAE;YACpB,OAAO;YACP,iBAAiB;YACjB,eAAe;YACf,cAAc;YACd,GAAG;SACJ,CAAC,CAAC;QAEH,GAAG,CAAC,KAAK,CAAC,2CAA2C,KAAK,EAAE,CAAC,CAAC,CAAC,kBAAkB;QACjF,MAAM,CAAA,iBAAiB,aAAjB,iBAAiB,uBAAjB,iBAAiB,CAAG,SAAS,EAAE,CAAC,CAAA,CAAC;QACvC,GAAG,CAAC,KAAK,CAAC,gDAAgD,KAAK,GAAG,CAAC,CAAC,CAAC,kBAAkB;QAEvF,MAAM,cAAc,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAElE,IAAI,wBAAwB,GAAG,IAAI,CAAC;QACpC,OAAO,wBAAwB,IAAI,CAAC,eAAe,EAAE,CAAC;YACpD,cAAc;YACd,0CAA0C;YAC1C,GAAG,CAAC,IAAI,CAAC,8BAA8B,KAAK,EAAE,CAAC,CAAC;YAChD,MAAM,IAAA,kBAAU,EACd,GAAS,EAAE;gBACT,IAAI,CAAC,SAAS;oBAAE,MAAM,KAAK,CAAC,+DAA+D,KAAK,EAAE,CAAC,CAAC,CAAC,kBAAkB;gBAEvH,MAAM,cAAc,GAAG,MAAM,SAAS,CAAC,QAAQ,EAAE,CAAC;gBAClD,IAAI,CAAC,cAAc,IAAI,CAAC,cAAc,EAAE,CAAC;oBACvC,GAAG,CAAC,IAAI,CAAC,+BAA+B,KAAK,EAAE,CAAC,CAAC;oBACjD,OAAO;gBACT,CAAC;gBAED,GAAG,CAAC,KAAK,CAAC,2BAA2B,KAAK,EAAE,CAAC,CAAC;gBAC9C,MAAM,SAAS,CAAC,SAAS,EAAE,CAAC;gBAC5B,MAAM,SAAS,CAAC,SAAS,EAAE,CAAC;gBAC5B,GAAG,CAAC,KAAK,CAAC,gCAAgC,KAAK,EAAE,CAAC,CAAC;YACrD,CAAC,CAAA,EACD;gBACE,UAAU,EAAE,kBAAkB;gBAC9B,OAAO,EAAE,CAAC,GAAG,EAAE,UAAU,EAAE,EAAE,CAAC,kBAAkB,CAAC,SAAS,EAAE,EAAE,GAAG,EAAE,UAAU,CAAC;aAC/E,CACF,CAAC;YAEF,IAAI,iBAAiB,GAAG,IAAI,CAAC;YAC7B,OAAO,iBAAiB,IAAI,CAAC,eAAe,EAAE,CAAC;gBAC7C,kBAAkB;gBAClB,IAAI,UAAU,GAAG,WAAW,CAAC;gBAC7B,IAAI,MAAM,EAAE,CAAC;oBACX,GAAG,CAAC,KAAK,CAAC,0BAA0B,KAAK,EAAE,CAAC,CAAC;oBAC7C,UAAU,GAAG,MAAM,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;oBACvC,GAAG,CAAC,KAAK,CAAC,+BAA+B,KAAK,EAAE,CAAC,CAAC;gBACpD,CAAC;gBACD,MAAM,SAAS,GAAG,GAAG,KAAK,KAAK,UAAU,GAAG,CAAC;gBAE7C,qBAAqB;gBACrB,GAAG,CAAC,IAAI,CAAC,8BAA8B,SAAS,EAAE,CAAC,CAAC;gBACpD,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,IAAA,kBAAU,EACjC,CAAO,UAAU,EAAE,EAAE,kDAAC,OAAA,cAAc,CAAC,SAAS,EAAE,EAAE,UAAU,CAAC,CAAA,GAAA,EAC7D;oBACE,UAAU,EAAE,qBAAqB;oBACjC,OAAO,EAAE,CAAC,GAAG,EAAE,UAAU,EAAE,EAAE,CAAC,qBAAqB,CAAC,SAAS,EAAE,EAAE,GAAG,EAAE,UAAU,CAAC;iBAClF,CACF,CAAC;gBACF,GAAG,CAAC,KAAK,CAAC,mCAAmC,SAAS,EAAE,CAAC,CAAC;gBAE1D,MAAM,SAAS,GAAG,MAAM,aAAN,MAAM,cAAN,MAAM,GAAI,EAAE,CAAC;gBAC/B,KAAK,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,CAAC;gBACzB,GAAG,CAAC,IAAI,CAAC,SAAS,SAAS,CAAC,MAAM,kBAAkB,SAAS,EAAE,CAAC,CAAC;gBAEjE,6CAA6C;gBAC7C,IAAI,gBAAgB,IAAI,eAAe,EAAE,CAAC;oBACxC,iBAAiB,GAAG,KAAK,CAAC;oBAC1B,IAAI,gBAAgB;wBAAE,GAAG,CAAC,IAAI,CAAC,6DAA6D,SAAS,EAAE,CAAC,CAAC,CAAC,kBAAkB;yBACvH,IAAI,eAAe;wBAAE,GAAG,CAAC,IAAI,CAAC,iBAAiB,SAAS,EAAE,CAAC,CAAC;oBACjE,SAAS;gBACX,CAAC;gBAED,GAAG,CAAC,KAAK,CAAC,8CAA8C,SAAS,EAAE,CAAC,CAAC,CAAC,kBAAkB;gBACxF,MAAM,CAAA,oBAAoB,aAApB,oBAAoB,uBAApB,oBAAoB,CAAG,SAAS,EAAE,EAAE,SAAS,CAAC,CAAA,CAAC;gBACrD,GAAG,CAAC,KAAK,CAAC,mDAAmD,SAAS,EAAE,CAAC,CAAC,CAAC,kBAAkB;gBAE7F,IAAI,cAAc,IAAI,CAAC,eAAe,EAAE,CAAC;oBACvC,wFAAwF;oBACxF,IAAI,CAAC;wBACH,GAAG,CAAC,IAAI,CAAC,oCAAoC,SAAS,EAAE,CAAC,CAAC;wBAC1D,MAAM,cAAc,CAAC,SAAS,EAAE,EAAE,SAAS,CAAC,CAAC;wBAC7C,GAAG,CAAC,KAAK,CAAC,yCAAyC,SAAS,EAAE,CAAC,CAAC,CAAC,kBAAkB;oBACrF,CAAC;oBAAC,OAAO,CAAC,EAAE,CAAC;wBACX,GAAG,CAAC,IAAI,CAAC,2CAA2C,SAAS,EAAE,CAAC,CAAC,CAAC,kBAAkB;wBACpF,GAAG,CAAC,KAAK,CAAE,CAAW,CAAC,QAAQ,EAAE,CAAC,CAAC;wBACnC,iBAAiB,GAAG,KAAK,CAAC;oBAC5B,CAAC;gBACH,CAAC;qBAAM,CAAC;oBACN,IAAI,eAAe;wBAAE,GAAG,CAAC,IAAI,CAAC,iBAAiB,SAAS,EAAE,CAAC,CAAC;oBAC5D,iBAAiB,GAAG,KAAK,CAAC;gBAC5B,CAAC;gBAED,8CAA8C;gBAC9C,MAAM,IAAI,OAAO,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,UAAU,CAAC,GAAG,EAAE,YAAY,CAAC,CAAC,CAAC;YAC5D,CAAC;YAED,sEAAsE;YACtE,GAAG,CAAC,KAAK,CAAC,8DAA8D,KAAK,EAAE,CAAC,CAAC;YACjF,wBAAwB,GAAG,cAAc,IAAI,CAAC,MAAM,SAAS,CAAC,YAAY,EAAE,CAAC,CAAC;YAC9E,GAAG,CAAC,KAAK,CAAC,mEAAmE,KAAK,EAAE,CAAC,CAAC;YAEtF,IAAI,wBAAwB,EAAE,CAAC;gBAC7B,IAAI,CAAC,eAAe;oBAAE,GAAG,CAAC,IAAI,CAAC,oEAAoE,KAAK,EAAE,CAAC,CAAC,CAAC,kBAAkB;;oBAC1H,GAAG,CAAC,IAAI,CAAC,+FAA+F,KAAK,EAAE,CAAC,CAAC,CAAC,kBAAkB;YAC3I,CAAC;;gBAAM,GAAG,CAAC,IAAI,CAAC,uDAAuD,KAAK,EAAE,CAAC,CAAC,CAAC,kBAAkB;QACrG,CAAC;QACD,GAAG,CAAC,IAAI,CAAC,gBAAgB,KAAK,EAAE,CAAC,CAAC;IACpC,CAAC,CAAA,CAAC,CAAC;IACH,OAAO,KAAK,CAAC;AACf,CAAC,CAAA,CAAC;AAxKW,QAAA,oBAAoB,wBAwK/B","sourcesContent":["import { findLastIndex } from 'lodash';\n\nimport { serialAsyncMap, retryAsync } from '../../utils/async';\nimport { validateUrl } from '../../utils/url';\nimport type { MaybePromise } from '../../utils/types';\n\n// TODO - Clean this up and merge it into PageLib\n\nexport interface ListingLogger {\n debug: (msg: string, data?: any) => void;\n info: (msg: string, data?: any) => void;\n warning: (msg: string, data?: any) => void;\n error: (msg: string, data?: any) => void;\n}\n\nexport interface ListingPageFilter {\n name: string;\n disabled?: boolean;\n initState: () => MaybePromise<boolean>;\n resetState: () => MaybePromise<void>;\n nextState: () => MaybePromise<void>;\n hasNextState: () => MaybePromise<boolean>;\n hasState: () => MaybePromise<boolean>;\n loadState: () => MaybePromise<void>;\n}\n\nexport interface ListingFiltersSetupOptions<Ctx extends object, UrlType> {\n context: ListingPageScraperContext<Ctx, UrlType>;\n filters?: ListingPageFilter[];\n shouldApplyFilter?: (\n context: ListingPageScraperContext<Ctx, UrlType>,\n filter: ListingPageFilter,\n filters: ListingPageFilter[]\n ) => MaybePromise<boolean>;\n onResetFilters?: (context: ListingPageScraperContext<Ctx, UrlType>) => MaybePromise<void>;\n onFiltersLoaded?: (context: ListingPageScraperContext<Ctx, UrlType>) => MaybePromise<void>;\n log: ListingLogger;\n}\n\ntype ListingFilterController = Pick<ListingPageFilter, 'loadState' | 'nextState' | 'hasNextState' | 'hasState'>; // prettier-ignore\n\nexport interface ListingPageScraperContext<Ctx extends object, UrlType> {\n context: Ctx;\n log: ListingLogger;\n startUrl: UrlType;\n filters: ListingPageFilter[];\n /** Use this if you need to load filters again (eg after reloading page manually) */\n loadFilterState: () => MaybePromise<void>;\n /** Call this function from any callback to stop scraping */\n abort: () => void;\n}\n\n// prettier-ignore\nexport interface ListingPageScraperOptions<Ctx extends object, UrlType> extends Omit<ListingFiltersSetupOptions<Ctx, UrlType>, 'context'> {\n context: Ctx;\n startUrls: UrlType[];\n listingCountOnly?: boolean;\n /** Get ID of the current page in the pagination, so it can be logged */\n pageId?: (context: ListingPageScraperContext<Ctx, UrlType>) => MaybePromise<string>;\n log: ListingLogger;\n\n onNavigate?: (context: ListingPageScraperContext<Ctx, UrlType>, url: UrlType) => MaybePromise<void>;\n /**\n * Hook triggered after navigating to the url using Page.goto().\n *\n * One use of this hook is to conditionally disable/enable filters based on the page content.\n **/\n onAfterNavigation?: (context: ListingPageScraperContext<Ctx, UrlType>) => MaybePromise<void>;\n\n /** How many attempts are retried after filters failed to load. Defaults to 3 */\n loadFiltersRetries?: number;\n /**\n * Hook triggered after a failed attempt at loading listings page filters.\n *\n * One use of this hook is to reload the page on failed attemp in case something didn't load correctly.\n **/\n onLoadFiltersError?: (\n context: ListingPageScraperContext<Ctx, UrlType>,\n error: any,\n retryIndex: number\n ) => MaybePromise<void>;\n\n /** Main logic to extract entries from a page */\n extractEntries: (context: ListingPageScraperContext<Ctx, UrlType>, retryIndex: number) => MaybePromise<UrlType[]>;\n /** How many attempts are retried after failed to scrape entries from a listing. Defaults to 3 */\n extractEntriesRetries?: number;\n /**\n * Hook triggered after a failed attempt at scraping entries from a listing.\n *\n * One use of this hook is to reload the page on failed attemp in case something didn't load correctly.\n **/\n onExtractEntriesError?: (\n context: ListingPageScraperContext<Ctx, UrlType>,\n error: any,\n retryIndex: number\n ) => MaybePromise<void>;\n onExtractEntriesDone?: (\n context: ListingPageScraperContext<Ctx, UrlType>,\n entries: UrlType[] | null\n ) => MaybePromise<void>;\n\n /**\n * If goToNextPage hook is defined, it will be called after each page. To indicate that there's no more\n * pages left, throw an error.\n **/\n onGoToNextPage?: (\n context: ListingPageScraperContext<Ctx, UrlType>,\n entries: UrlType[] | null\n ) => MaybePromise<void>;\n /** How long to wait after we've navigated to the next page and before we start extracting? */\n nextPageWait?: number;\n}\n\n/**\n * Given configuration for listing page filters, set up functions to\n * navigate through the different states of filters, to allow to paginate\n * through all states.\n */\nconst setupListingFilters = <Ctx extends object, UrlType>({\n context,\n filters = [],\n shouldApplyFilter,\n onResetFilters,\n onFiltersLoaded,\n log,\n}: ListingFiltersSetupOptions<Ctx, UrlType>): ListingFilterController => {\n let filtersStack: ListingPageFilter[] = filters;\n\n const getNextFilterStateChangeIndex = async () => {\n const hasNextStates = await serialAsyncMap(filtersStack, (filter) => filter.hasNextState());\n return findLastIndex(hasNextStates, (x) => x);\n };\n\n const hasState = async () => {\n const hasStates = await serialAsyncMap(filtersStack, (filter) => filter.hasState());\n return hasStates.some(Boolean);\n };\n\n const hasNextState = async () => {\n const nextFilterStateChangeIndex = await getNextFilterStateChangeIndex();\n return nextFilterStateChangeIndex > -1;\n };\n\n const nextState = async () => {\n // Imagine we have 4 filters, each has 3 states (eg 3 options to select from)\n // We start with all filters in the first state:\n // State 1: F1(1), F2(1), F3(1), F4(1)\n // As we progress, we increment it akin to numbers:\n // State 2: F1(1), F2(1), F3(1), F4(2)\n // State 3: F1(1), F2(1), F3(1), F4(3)\n // State 4: F1(1), F2(1), F3(2), F4(1)\n // All the way to the last state:\n // State n: F1(3), F2(3), F3(3), F4(3)\n //\n // When we want move to a next state, we identify the RIGHT-most filter\n // whose state can be incremented (in this case we select F2):\n // YES YES NO NO\n // State x: F1(1), F2(2), F3(3), F4(3)\n //\n // When we increment a filter state, all the other filter to the RIGHT\n // will be reset:\n // State x: F1(1), F2(2), F3(3), F4(3)\n // State x+1: F1(1), F2(3), F3(1), F4(1)\n\n const initStates = await serialAsyncMap(filtersStack, (filter) => filter.initState());\n if (initStates.some(Boolean)) return log.info('Initialised filters');\n\n const nextFilterStateChangeIndex = await getNextFilterStateChangeIndex();\n if (nextFilterStateChangeIndex === -1)\n throw Error('Cannot select next filter state - reached end of list');\n\n const filterToNextState = filtersStack[nextFilterStateChangeIndex];\n const filtersToReset = filtersStack.slice(nextFilterStateChangeIndex + 1);\n\n log.info('Setting filters to next state');\n await filterToNextState.nextState();\n for (const filter of filtersToReset) {\n await filter.resetState();\n await filter.nextState();\n }\n };\n\n /** Load current filter state in the webpage */\n const loadState = async () => {\n await resetState();\n\n // Load filters one by one, and only if needed\n filtersStack = [];\n for (const filter of filters) {\n const shouldUseFilter = shouldApplyFilter\n ? await shouldApplyFilter(context, filter, filters)\n : true;\n if (!shouldUseFilter) {\n log.info(`Not applying filter \"${filter.name}\" or further filters`);\n break;\n }\n\n if (!filter.disabled) {\n log.info(`Applying filter \"${filter.name}\"`);\n await filter.loadState();\n } else {\n log.info(`Filter \"${filter.name}\" recognised but not applied because it is disabled`);\n }\n\n filtersStack.push(filter);\n }\n\n log.info(`Done loading filters`);\n await onFiltersLoaded?.(context);\n };\n\n /** Reset filter state */\n const resetState = async () => {\n log.info(`Resetting filter state`);\n await onResetFilters?.(context);\n filtersStack = filters;\n log.info(`Resetting filter state done`);\n };\n\n return {\n loadState,\n nextState,\n hasNextState,\n hasState,\n };\n};\n\n/** Get entries from a listing page (eg URLs to profiles that should be scraped later) */\nexport const scrapeListingEntries = async <Ctx extends object, UrlType>(\n options: ListingPageScraperOptions<Ctx, UrlType>\n) => {\n const {\n context,\n startUrls,\n listingCountOnly = false,\n log,\n pageId,\n onNavigate,\n onAfterNavigation,\n\n filters = [],\n shouldApplyFilter,\n loadFiltersRetries = 3,\n onLoadFiltersError = (_, err) => console.error(err),\n onFiltersLoaded,\n onResetFilters,\n\n extractEntries,\n extractEntriesRetries = 3,\n onExtractEntriesError = (_, err) => console.error(err),\n onExtractEntriesDone,\n\n onGoToNextPage,\n nextPageWait = 500,\n } = options;\n\n /** Collection of ALL urls across all pages and startUrls */\n const links: UrlType[] = [];\n\n await serialAsyncMap(startUrls, async (startUrl, index) => {\n if (listingCountOnly && index > 0) return;\n\n const logId = `${startUrl} (${index + 1}/${startUrls.length})`;\n\n let userAskedToStop = false;\n const abort = () => { userAskedToStop = true }; // prettier-ignore\n\n // Prepare context shared across all hooks\n let filterObj: ListingFilterController | null = null;\n const genCtxArg = (): ListingPageScraperContext<Ctx, UrlType> => ({\n context,\n log,\n startUrl,\n filters,\n loadFilterState: filterObj?.loadState ?? (() => {}),\n abort,\n });\n\n log.debug(`Validating URL ${logId}`);\n validateUrl(startUrl as string);\n log.info(`Navigating URL ${logId}`);\n await onNavigate?.(genCtxArg(), startUrl);\n log.debug(`Done navigating to URL ${logId}`);\n\n filterObj = setupListingFilters({\n context: genCtxArg(),\n filters,\n shouldApplyFilter,\n onFiltersLoaded,\n onResetFilters,\n log,\n });\n\n log.debug(`Calling onAfterNavigation callback. URL ${logId}`); // prettier-ignore\n await onAfterNavigation?.(genCtxArg());\n log.debug(`Done calling onAfterNavigation callback. URL ${logId})`); // prettier-ignore\n\n const isUsingFilters = filters.some((filter) => !filter.disabled);\n\n let hasFilterStatesToProcess = true;\n while (hasFilterStatesToProcess && !userAskedToStop) {\n // Filter loop\n // Load filters before we start paginating\n log.info(`Setting up filters for URL ${logId}`);\n await retryAsync(\n async () => {\n if (!filterObj) throw Error(`Filter controller is missing. This should never happen. URL ${logId}`); // prettier-ignore\n\n const filterHasState = await filterObj.hasState();\n if (!isUsingFilters || !filterHasState) {\n log.info(`Not loading filters for URL ${logId}`);\n return;\n }\n\n log.debug(`Loading filters for URL ${logId}`);\n await filterObj.nextState();\n await filterObj.loadState();\n log.debug(`Done loading filters for URL ${logId}`);\n },\n {\n maxRetries: loadFiltersRetries,\n onError: (err, retryIndex) => onLoadFiltersError(genCtxArg(), err, retryIndex),\n }\n );\n\n let nextPageAvailable = true;\n while (nextPageAvailable && !userAskedToStop) {\n // Pagination loop\n let currPageId = 'next page';\n if (pageId) {\n log.debug(`Loading pageId for URL ${logId}`);\n currPageId = await pageId(genCtxArg());\n log.debug(`Done loading pageId for URL ${logId}`);\n }\n const pageLogId = `${logId} (${currPageId})`;\n\n // Extract page links\n log.info(`Extracting links from page ${pageLogId}`);\n const { result } = await retryAsync(\n async (retryIndex) => extractEntries(genCtxArg(), retryIndex),\n {\n maxRetries: extractEntriesRetries,\n onError: (err, retryIndex) => onExtractEntriesError(genCtxArg(), err, retryIndex),\n }\n );\n log.debug(`Done extracting links from page ${pageLogId}`);\n\n const pageLinks = result ?? [];\n links.push(...pageLinks);\n log.info(`Found ${pageLinks.length} links on page ${pageLogId}`);\n\n // Leave after printing the count or on abort\n if (listingCountOnly || userAskedToStop) {\n nextPageAvailable = false;\n if (listingCountOnly) log.info(`Debugging mode. Entries are not scraped. Leaving now. URL ${pageLogId}`); // prettier-ignore\n else if (userAskedToStop) log.info(`Aborting. URL ${pageLogId}`);\n continue;\n }\n\n log.debug(`Calling onExtractEntriesDone callback. URL ${pageLogId}`); // prettier-ignore\n await onExtractEntriesDone?.(genCtxArg(), pageLinks);\n log.debug(`Done calling onExtractEntriesDone callback. URL ${pageLogId}`); // prettier-ignore\n\n if (onGoToNextPage && !userAskedToStop) {\n // If goToNextPage hook is defined, this will be called after each page, until it errors\n try {\n log.info(`Navigating to next page from URL ${pageLogId}`);\n await onGoToNextPage(genCtxArg(), pageLinks);\n log.debug(`Done navigating to next page from URL ${pageLogId}`); // prettier-ignore\n } catch (e) {\n log.info(`Failed navigating to next page from URL ${pageLogId}`); // prettier-ignore\n log.error((e as Error).toString());\n nextPageAvailable = false;\n }\n } else {\n if (userAskedToStop) log.info(`Aborting. URL ${pageLogId}`);\n nextPageAvailable = false;\n }\n\n // Wait before we start scraping the next page\n await new Promise((res) => setTimeout(res, nextPageWait));\n }\n\n // Break out if we're not using filters or we've gone through them all\n log.debug(`Checking if there are more filter states available for URL ${logId}`);\n hasFilterStatesToProcess = isUsingFilters && (await filterObj.hasNextState());\n log.debug(`Done checking if there are more filter states available for URL ${logId}`);\n\n if (hasFilterStatesToProcess) {\n if (!userAskedToStop) log.info(`Will repeat scraping this URL with different filter setting. URL ${logId}`); // prettier-ignore\n else log.info(`There are unprocessed filter setting remaining for this URL, but stopping due to abort. URL ${logId}`); // prettier-ignore\n } else log.info(`No filter setting remain for scraping this URL. URL ${logId}`); // prettier-ignore\n }\n log.info(`Finished URL ${logId}`);\n });\n return links;\n};\n"]}
|
|
@@ -67,19 +67,19 @@ export interface RunCrawleeOneOptions<TType extends CrawlerType, T extends Crawl
|
|
|
67
67
|
* 9) Apify context (e.g. calling `Actor.getInput`) can be replaced with custom
|
|
68
68
|
* implementation using the `io` option.
|
|
69
69
|
*/
|
|
70
|
-
export declare const runCrawleeOne: <TType extends "basic" | "http" | "
|
|
70
|
+
export declare const runCrawleeOne: <TType extends "basic" | "http" | "jsdom" | "cheerio" | "playwright" | "puppeteer", T extends CrawleeOneCtx<CrawlerMeta<TType>["context"], string, Record<string, any>, CrawleeOneIO<object, object, object>, import("../..").CrawleeOneTelemetry<any, any>>>(args: RunCrawleeOneOptions<TType, T>) => Promise<void>;
|
|
71
71
|
/** Given the actor input, create common crawler options. */
|
|
72
|
-
export declare const createHttpCrawlerOptions: <T extends CrawleeOneCtx<import("crawlee").CrawlingContext<import("crawlee").
|
|
72
|
+
export declare const createHttpCrawlerOptions: <T extends CrawleeOneCtx<import("crawlee").CrawlingContext<import("crawlee").JSDOMCrawler | import("crawlee").CheerioCrawler | import("crawlee").PlaywrightCrawler | import("crawlee").PuppeteerCrawler | import("crawlee").BasicCrawler<import("crawlee").BasicCrawlingContext<import("crawlee").Dictionary>> | import("crawlee").HttpCrawler<import("crawlee").InternalHttpCrawlingContext<any, any, import("crawlee").HttpCrawler<any>>>, import("crawlee").Dictionary>, string, Record<string, any>, CrawleeOneIO<object, object, object>, import("../..").CrawleeOneTelemetry<any, any>>, TOpts extends BasicCrawlerOptions<T["context"]>>({ input, defaults, overrides, }: {
|
|
73
73
|
/** Actor input */
|
|
74
|
-
input: T[
|
|
74
|
+
input: T['input'] | null;
|
|
75
75
|
/**
|
|
76
76
|
* Default config options set by us. These may be overriden
|
|
77
77
|
* by values from actor input (set by user).
|
|
78
78
|
*/
|
|
79
|
-
defaults?: TOpts
|
|
79
|
+
defaults?: TOpts;
|
|
80
80
|
/**
|
|
81
81
|
* These config options will overwrite both the default and user
|
|
82
82
|
* options. This is useful for hard-setting values e.g. in tests.
|
|
83
83
|
*/
|
|
84
|
-
overrides?: TOpts
|
|
84
|
+
overrides?: TOpts;
|
|
85
85
|
}) => Partial<TOpts> & import("lodash").Dictionary<TOpts["requestHandler"] | TOpts["handleRequestFunction"] | TOpts["requestList"] | TOpts["requestQueue"] | TOpts["requestHandlerTimeoutSecs"] | TOpts["handleRequestTimeoutSecs"] | TOpts["errorHandler"] | TOpts["failedRequestHandler"] | TOpts["handleFailedRequestFunction"] | TOpts["maxRequestRetries"] | TOpts["maxRequestsPerCrawl"] | TOpts["autoscaledPoolOptions"] | TOpts["minConcurrency"] | TOpts["maxConcurrency"] | TOpts["maxRequestsPerMinute"] | TOpts["keepAlive"] | TOpts["useSessionPool"] | TOpts["sessionPoolOptions"] | TOpts["loggingInterval"] | TOpts["log"]>;
|
|
@@ -158,7 +158,7 @@ const createCrawleeOne = (config) => __awaiter(void 0, void 0, void 0, function*
|
|
|
158
158
|
if (config.validateInput)
|
|
159
159
|
yield config.validateInput(input);
|
|
160
160
|
const { logLevel } = (input !== null && input !== void 0 ? input : {});
|
|
161
|
-
const log = new crawlee_1.Log({ level: logLevel ? log_1.logLevelToCrawlee[logLevel] :
|
|
161
|
+
const log = new crawlee_1.Log({ level: logLevel ? log_1.logLevelToCrawlee[logLevel] : crawlee_1.LogLevel.INFO });
|
|
162
162
|
// This is context that is available to options that use initialization function
|
|
163
163
|
const getConfig = () => (Object.assign(Object.assign({}, config), { input, state, io }));
|
|
164
164
|
// Set up proxy
|
|
@@ -295,7 +295,7 @@ const createScopedCrawlerRun = (getActor) => {
|
|
|
295
295
|
// Clear cache if it was set from the input
|
|
296
296
|
if (outputCacheStoreId && outputCacheActionOnResult === 'overwrite') {
|
|
297
297
|
const store = yield actor.io.openKeyValueStore(outputCacheStoreId);
|
|
298
|
-
yield store.
|
|
298
|
+
yield store.clear();
|
|
299
299
|
}
|
|
300
300
|
yield ((_b = genHookFn(actor, outputTransformBefore, 'outputTransformBefore')) === null || _b === void 0 ? void 0 : _b()); // prettier-ignore
|
|
301
301
|
yield ((_c = genHookFn(actor, outputFilterBefore, 'outputFilterBefore')) === null || _c === void 0 ? void 0 : _c()); // prettier-ignore
|