crawlee-one 1.1.2 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,6 +17,45 @@
17
17
  // handler: detailPageHandler
18
18
  // }
19
19
  // ```
20
+ // !!!!!!!!!!
21
+ // UPDATES - START
22
+ // !!!!!!!!!!
23
+ // THE CONFIG SHOULD BE FIRST DEFINED AS PLAIN OBJECT, SO NO JS/TS,
24
+ // SO WE CAN IMPORT IT FOR TYPE GENERATION.
25
+ // SO IT SHOULD AT FIRST LOOK LIKE THIS (COULD BE JS/TS/JSON/YAML/...):
26
+ // ```
27
+ // {
28
+ // version: 1,
29
+ // schema: {
30
+ // crawlers: {
31
+ // mainCrawler {
32
+ // type: 'playwright'
33
+ // routes: ['listingPage', detailPage', 'home']
34
+ // }
35
+ // },
36
+ // }
37
+ // };
38
+ // ```
39
+ // WE IMPORT THAT USING THE COSMICCONFIG (with https://github.com/codex-/cosmiconfig-typescript-loader)
40
+ // See https://github.com/cosmiconfig/cosmiconfig#usage-for-tooling-developers
41
+ //
42
+ // AT THIS POINT DONT FORGET TO HAVE A VALIDATION SCHEMA TO COMPARE THE LAODED
43
+ // CONFIG AGAINST. THROW ERROR IF INVALID.
44
+ //
45
+ // I SHOULD GENERATE TYPES FOR:
46
+ // - The types I mentioned below
47
+ // - Each Crawler based on their type (e.g. CheerioCrawleeOneCrawler<Labels, Inputs, ...>) (But also named
48
+ // variants like `mainCrawlerCrawler`)
49
+ // - CRAWLER_NAME_ENUM
50
+ // - All Crawlers obj = { `CrawlerName`: CheerioCrawleeOneCrawler, ... }
51
+ // - Each Route based on their type (e.g. CheerioCrawleeOneRoute<Labels, Inputs, ...>) (But also named
52
+ // variants like `detailPageRoute`)
53
+ // - CRAWLER_ROUTE_ENUM - e.g. `CrawlerName`RouteLabel = ...
54
+ // - All Crawler Routes objs = { `detailPage`: detailPageRoute, ... }
55
+ // - The whole object of { crawlers: { ...}, routes: { ... } }
56
+ // !!!!!!!!!!
57
+ // UPDATES - END
58
+ // !!!!!!!!!!
20
59
  // As JS:
21
60
  // ```js
22
61
  // import { detailPageHandler } from './handlers';
@@ -39,8 +78,9 @@
39
78
  // input?: Partial<AllActorInputs>
40
79
  ///////// Hooks /////////
41
80
  // hooks?: {
42
- // validateInput?: (input) => MaybePromise<void>
43
- // onActorReady?: (actor) => MaybePromise<void>
81
+ // validateInput?: (input | null) => MaybePromise<void>
82
+ // onActorReady?: (actor) => MaybePromise<void> // NOTE: Move onACtorReady FN type to own public type
83
+ // (actor: CrawleeOneActorCtx<Labels, Input, TIO, Telem, Ctx>) => MaybePromise<void>;
44
84
  // onBeforeHandler?: (ctx) => MaybePromise<void>
45
85
  // onAfterHandler?: (ctx) => MaybePromise<void>
46
86
  // }
@@ -84,7 +124,7 @@
84
124
  // ```
85
125
  // HOW TO GENERATE TYPE FOR HANDLERS?
86
126
  // ```ts
87
- // type detailPageHandler = CrawleeOneRouteHandler<CheerioCrawlingContext, ProfesiaRouterContext>
127
+ // type detailPageRouteHandler = CrawleeOneRouteHandler<CheerioCrawlingContext, ProfesiaRouterContext>
88
128
  // ```
89
129
  //
90
130
  // 1. For each route:
@@ -110,36 +150,50 @@
110
150
  // ```ts
111
151
  // type `Label`Handler = CrawleeOneRouteHandler<`type`CrawlingContext, `CrawlerName`RouterContext>
112
152
  // ```
113
- //
114
153
  // HOW TO GENERATE TYPE FOR MATCHERS?
115
154
  // ```ts
116
- // type `Label`Matcher = CrawleeOneRouteMatcher<Labels, RouterCtx, CrawlerCtx>
155
+ // type `Label`RouteMatcher = CrawleeOneRouteMatcher<Labels, RouterCtx, CrawlerCtx>
117
156
  // ```
118
157
  //
119
158
  // 1. For each route:
120
- // 1.1 Get `CrawlingContext`
121
- // 1.1.1 Take `mainCrawler`, and find corresponding crawler. If no `mainCrawler`, there should
122
- // be only 1 crawler, and take that (if more crawler, there should've been an error).
123
- // 1.1.2 Find the `crawler.type`
124
- // 1.1.3 Take corresponding type based on `crawler.type`, e.g. 'cheerio' => `CheerioCrawlingContext`;
125
- // 1.2 Get actor router context, e.g.:
159
+ // 1.1 Get `CrawlingContext` (See HANDLERS 1.1)
160
+ // 1.2 Get actor router context (See HANDLERS 1.2)
161
+ // 1.3 Create `CrawlerName`Label type (See HANDLERS 1.2.2)
162
+ // 1.4 Put it together:
126
163
  // ```ts
127
- // type `CrawlerName`RouterContext = CrawleeOneActorRouterCtx<`type`CrawlingContext, `CrawlerName`RouteLabel, AllActorInput>;
128
- // // NOTE: We use `AllActorInput` because since it's in the code, then we can handle ALL inputs
164
+ // type `Label`Matcher = CrawleeOneRouteMatcher<`CrawlerName`Label, `CrawlerName`RouterContext, `type`CrawlingContext>
129
165
  // ```
130
- // 1.2.1 Use same CrawlingContext as in step 1.1 (e.g. `CheerioCrawlingContext`).
131
- // 1.2.2 Create `CrawlerName`Label type, e.g.
132
- // ```ts
133
- // type `CrawlerName`RouteLabel = "detailPage" | "otherLabel" | ...;
134
- // ```
135
- // 1.2.2.1 Take key (crawler name), and filter for all routes where `route.crawler == key`
136
- // 1.2.2.2 Take the keys of these routes
137
- // 1.2.2.3 Generate `type ${key}Label = ${keys.map((s) => '"' + s + '"').join(' | ')}`
166
+ // HOW TO GENERATE TYPE FOR ON_BEFORE AND ON_AFTER?
167
+ // ```ts
168
+ // type `CrawlerName`OnBeforeHandler = CrawleeOneRouteHandler<CheerioCrawlingContext, ProfesiaRouterContext>
169
+ // type `CrawlerName`OnAfterHandler = CrawleeOneRouteHandler<CheerioCrawlingContext, ProfesiaRouterContext>
170
+ // ```
171
+ //
172
+ // It's the same as for HANDLERS!
173
+ // HOW TO GENERATE TYPE FOR ON_ACTOR_READY?
174
+ // ```ts
175
+ // type `CrawlerName`OnActorReady = (actor: CrawleeOneActorCtx<Labels, Input, TIO, Telem, Ctx>) => MaybePromise<void>;
176
+ // ```
177
+ //
178
+ // 1. For each crawler:
179
+ // 1.1 Get `CrawlingContext` (See HANDLERS 1.1)
180
+ // 1.2 Create `CrawlerName`Label type (See HANDLERS 1.2.2)
138
181
  // 1.3 Put it together:
139
182
  // ```ts
140
- // type `Label`Handler = CrawleeOneRouteHandler<`type`CrawlingContext, `CrawlerName`RouterContext>
183
+ // type `CrawlerName`OnActorReady = <TIO, Telem>(actor: CrawleeOneActorCtx<`CrawlerName`Label, AllActorInputs, TIO, Telem, `type`CrawlingContext>) => MaybePromise<void>;
141
184
  // ```
185
+ // HOW TO FIND THE FILE BASED ON WHICH TO GENERATE?
186
+ //
187
+ // 1. User has to specify:
188
+ // - Path to file that exports
189
+ // - Whether it's TS or JS (or can be inferred based on extension)
142
190
  //
191
+ // ```
192
+ // presenter@Juros-MacBook-Pro apify-actor-utils % npx ts-node --project tsconfig.base.json -e 'import config from "./src/lib/router/router"; console.log(config); // NOTE: I HAD TO TRIM OFF THE EXTENSION'
193
+ // { hello: 'world' }
194
+ // presenter@Juros-MacBook-Pro apify-actor-utils % pwd
195
+ // /Users/presenter/repos/apify-actor-utils
196
+ // ```
143
197
  // NOTES:
144
198
  // - Enum with available route labels would be extracted from this definition.
145
199
  // - If there is only 1 crawler defined, all routes use that. If there is more crawlers,
@@ -1 +1 @@
1
- {"version":3,"file":"composer.js","sourceRoot":"","sources":["../../src/composer.ts"],"names":[],"mappings":";AAAA,cAAc;AACd,iCAAiC;AAEjC,IAAI;AAEJ,MAAM;AACN,wBAAwB;AACxB,qBAAqB;AACrB,uBAAuB;AACvB,6BAA6B;AAC7B,eAAe;AACf,UAAU;AACV,MAAM;AACN,IAAI;AAEJ,qBAAqB;AACrB,0EAA0E;AAC1E,uEAAuE;AACvE,+BAA+B;AAC/B,IAAI;AACJ,MAAM;AAEN,SAAS;AACT,QAAQ;AACR,kDAAkD;AAElD,uBAAuB;AACvB,gBAAgB;AAChB,oBAAoB;AACpB,2BAA2B;AAC3B,EAAE;AACF,2CAA2C;AAC3C,kFAAkF;AAClF,uBAAuB;AACvB,+BAA+B;AAC/B,kCAAkC;AAClC,2GAA2G;AAC3G,kEAAkE;AAClE,0FAA0F;AAC1F,gDAAgD;AAChD,6BAA6B;AAC7B,gDAAgD;AAChD,wCAAwC;AACxC,yBAAyB;AACzB,kBAAkB;AAClB,wDAAwD;AACxD,uDAAuD;AACvD,wDAAwD;AACxD,uDAAuD;AACvD,UAAU;AACV,qCAAqC;AACrC,gBAAgB;AAChB,oBAAoB;AACpB,aAAa;AACb,iBAAiB;AACjB,QAAQ;AACR,OAAO;AACP,EAAE;AACF,cAAc;AACd,oBAAoB;AACpB,gCAAgC;AAChC,EAAE;AACF,4EAA4E;AAC5E,WAAW;AACX,sDAAsD;AACtD,0DAA0D;AAC1D,mGAAmG;AACnG,oEAAoE;AACpE,WAAW;AACX,WAAW;AACX,iBAAiB;AACjB,sEAAsE;AACtE,WAAW;AACX,WAAW;AACX,iBAAiB;AACjB,uEAAuE;AACvE,wDAAwD;AACxD,WAAW;AACX,EAAE;AACF,oCAAoC;AACpC,kCAAkC;AAClC,6BAA6B;AAC7B,iCAAiC;AACjC,WAAW;AACX,QAAQ;AACR,MAAM;AACN,KAAK;AACL,MAAM;AAEN,qCAAqC;AACrC,QAAQ;AACR,iGAAiG;AACjG,MAAM;AACN,EAAE;AACF,qBAAqB;AACrB,8BAA8B;AAC9B,kGAAkG;AAClG,+FAA+F;AAC/F,oCAAoC;AACpC,yGAAyG;AACzG,wCAAwC;AACxC,aAAa;AACb,kIAAkI;AAClI,qGAAqG;AACrG,WAAW;AACX,qFAAqF;AACrF,iDAAiD;AACjD,eAAe;AACf,2EAA2E;AAC3E,aAAa;AACb,gGAAgG;AAChG,8CAA8C;AAC9C,4FAA4F;AAC5F,yBAAyB;AACzB,aAAa;AACb,uGAAuG;AACvG,WAAW;AACX,EAAE;AAEF,qCAAqC;AACrC,QAAQ;AACR,8EAA8E;AAC9E,MAAM;AACN,EAAE;AACF,qBAAqB;AACrB,8BAA8B;AAC9B,kGAAkG;AAClG,+FAA+F;AAC/F,oCAAoC;AACpC,yGAAyG;AACzG,wCAAwC;AACxC,aAAa;AACb,kIAAkI;AAClI,qGAAqG;AACrG,WAAW;AACX,qFAAqF;AACrF,iDAAiD;AACjD,eAAe;AACf,2EAA2E;AAC3E,aAAa;AACb,gGAAgG;AAChG,8CAA8C;AAC9C,4FAA4F;AAC5F,yBAAyB;AACzB,aAAa;AACb,uGAAuG;AACvG,WAAW;AACX,EAAE;AAEF,SAAS;AACT,8EAA8E;AAC9E,wFAAwF;AACxF,oDAAoD;AAEpD,MAAM;AACN,wBAAwB;AACxB,qBAAqB;AACrB,uBAAuB;AACvB,6BAA6B;AAC7B,eAAe;AACf,UAAU;AACV,MAAM;AACN,QAAQ;AACR,IAAI;AAEJ,kCAAkC;AAClC,kBAAkB;AAClB,uBAAuB;AACvB,6BAA6B;AAC7B,eAAe;AACf,UAAU;AACV,MAAM;AACN,IAAI;AAEJ,4BAA4B;AAC5B,QAAQ;AACR,IAAI;AAEJ,wBAAwB;AACxB,uBAAuB;AACvB,QAAQ;AACR,IAAI;AAEJ,4BAA4B;AAC5B,qBAAqB;AACrB,QAAQ;AACR,IAAI;AAEJ,qBAAqB;AACrB,sCAAsC;AACtC,0EAA0E;AAC1E,uEAAuE;AACvE,gCAAgC;AAChC,IAAI;AAEJ,kBAAkB;AAClB,4BAA4B;AAC5B,qFAAqF;AACrF,yBAAyB;AACzB,qEAAqE;AACrE,kDAAkD;AAClD,sDAAsD;AACtD,+FAA+F;AAC/F,gEAAgE;AAChE,OAAO;AACP,IAAI;AACJ,MAAM","sourcesContent":["// @ts-nocheck\n// interface ComposerCrawlerDef {\n\n// }\n\n// ```\n// crawler mainCrawler {\n// type: playwright\n// datasetId: '45678'\n// errorDatasetId: '098765'\n// options: {\n// ...\n// }\n// }\n\n// route detailPage {\n// // NOTE: If `match` is a regex, the regex is compared against the URL\n// match: /[\\W]profesia\\.sk\\/praca\\/zoznam-[a-z0-9-]+\\/?(?:[?#~]|$)/i\n// handler: detailPageHandler\n// }\n// ```\n\n// As JS:\n// ```js\n// import { detailPageHandler } from './handlers';\n\n// const scraperDef = {\n// crawlers: {\n// mainCrawler {\n// type: 'playwright'\n//\n///////// Override crawler config /////////\n// //////// { ...crawlerConfigDefaults, ...io.getInput(), ...crawlerConfig }\n// crawlerConfig?\n// crawlerConfigDefaults?\n///////// Override input /////////\n// ///// If mergeInput = true, will merge inputDefaults, input, and io.getInput() similarly to config\n// //////// { ...inputDefaults, ...io.getInput(), ...input }\n// ///// If mergeInput = false, io.getInput() will be ignored if `input` is provided\n// //////// { ...inputDefaults, ...input }\n// mergeInput?: boolean\n// inputDefaults?: Partial<AllActorInputs>\n// input?: Partial<AllActorInputs>\n///////// Hooks /////////\n// hooks?: {\n// validateInput?: (input) => MaybePromise<void>\n// onActorReady?: (actor) => MaybePromise<void>\n// onBeforeHandler?: (ctx) => MaybePromise<void>\n// onAfterHandler?: (ctx) => MaybePromise<void>\n// }\n///////// Override services /////////\n// proxy?,\n// telemetry?,\n// io?,\n// router?,\n// }\n// },\n//\n// routes: {\n// detailPage: {\n// crawler?: 'mainCrawler'\n//\n// match: /[\\W]profesia\\.sk\\/praca\\/zoznam-[a-z0-9-]+\\/?(?:[?#~]|$)/i,\n// OR\n// match: async (url, ctx, route, handlers) => {\n// const dom = cheerioPortadom(ctx.$.root(), url);\n// const isNotCustomDesign = await dom.findMany('body.listing:not(.custom-design)').length;\n// return isUrlOfCompanyProfile(url) && !!isNotCustomDesign;\n// },\n// OR\n// match: [\n// /[\\W]profesia\\.sk\\/praca\\/zoznam-[a-z0-9-]+\\/?(?:[?#~]|$)/i\n// ],\n// OR\n// match: [\n// /[\\W]profesia\\.sk\\/praca\\/zoznam-[a-z0-9-]+\\/?(?:[?#~]|$)/i,\n// async (url, ctx, route, handlers) => { ... },\n// ],\n//\n// handler: detailPageHandler,\n// handler: async (ctx) => {\n// ctx.actor.pushData\n// ctx.actor.pushRequests\n// },\n// }\n// }\n// };\n// ```\n\n// HOW TO GENERATE TYPE FOR HANDLERS?\n// ```ts\n// type detailPageHandler = CrawleeOneRouteHandler<CheerioCrawlingContext, ProfesiaRouterContext>\n// ```\n//\n// 1. For each route:\n// 1.1 Get `CrawlingContext`\n// 1.1.1 Take `mainCrawler`, and find corresponding crawler. If no `mainCrawler`, there should\n// be only 1 crawler, and take that (if more crawler, there should've been an error).\n// 1.1.2 Find the `crawler.type`\n// 1.1.3 Take corresponding type based on `crawler.type`, e.g. 'cheerio' => `CheerioCrawlingContext`;\n// 1.2 Get actor router context, e.g.:\n// ```ts\n// type `CrawlerName`RouterContext = CrawleeOneActorRouterCtx<`type`CrawlingContext, `CrawlerName`RouteLabel, AllActorInput>;\n// // NOTE: We use `AllActorInput` because since it's in the code, then we can handle ALL inputs\n// ```\n// 1.2.1 Use same CrawlingContext as in step 1.1 (e.g. `CheerioCrawlingContext`).\n// 1.2.2 Create `CrawlerName`Label type, e.g.\n// ```ts\n// type `CrawlerName`RouteLabel = \"detailPage\" | \"otherLabel\" | ...;\n// ```\n// 1.2.2.1 Take key (crawler name), and filter for all routes where `route.crawler == key`\n// 1.2.2.2 Take the keys of these routes\n// 1.2.2.3 Generate `type ${key}Label = ${keys.map((s) => '\"' + s + '\"').join(' | ')}`\n// 1.3 Put it together:\n// ```ts\n// type `Label`Handler = CrawleeOneRouteHandler<`type`CrawlingContext, `CrawlerName`RouterContext>\n// ```\n//\n\n// HOW TO GENERATE TYPE FOR MATCHERS?\n// ```ts\n// type `Label`Matcher = CrawleeOneRouteMatcher<Labels, RouterCtx, CrawlerCtx>\n// ```\n//\n// 1. For each route:\n// 1.1 Get `CrawlingContext`\n// 1.1.1 Take `mainCrawler`, and find corresponding crawler. If no `mainCrawler`, there should\n// be only 1 crawler, and take that (if more crawler, there should've been an error).\n// 1.1.2 Find the `crawler.type`\n// 1.1.3 Take corresponding type based on `crawler.type`, e.g. 'cheerio' => `CheerioCrawlingContext`;\n// 1.2 Get actor router context, e.g.:\n// ```ts\n// type `CrawlerName`RouterContext = CrawleeOneActorRouterCtx<`type`CrawlingContext, `CrawlerName`RouteLabel, AllActorInput>;\n// // NOTE: We use `AllActorInput` because since it's in the code, then we can handle ALL inputs\n// ```\n// 1.2.1 Use same CrawlingContext as in step 1.1 (e.g. `CheerioCrawlingContext`).\n// 1.2.2 Create `CrawlerName`Label type, e.g.\n// ```ts\n// type `CrawlerName`RouteLabel = \"detailPage\" | \"otherLabel\" | ...;\n// ```\n// 1.2.2.1 Take key (crawler name), and filter for all routes where `route.crawler == key`\n// 1.2.2.2 Take the keys of these routes\n// 1.2.2.3 Generate `type ${key}Label = ${keys.map((s) => '\"' + s + '\"').join(' | ')}`\n// 1.3 Put it together:\n// ```ts\n// type `Label`Handler = CrawleeOneRouteHandler<`type`CrawlingContext, `CrawlerName`RouterContext>\n// ```\n//\n\n// NOTES:\n// - Enum with available route labels would be extracted from this definition.\n// - If there is only 1 crawler defined, all routes use that. If there is more crawlers,\n// they should define which crawler it relates to.\n\n// ```\n// crawler mainCrawler {\n// type: playwright\n// datasetId: '45678'\n// errorDatasetId: '098765'\n// options: {\n// ...\n// }\n// ...\n// }\n\n// crawler productDetailsCrawler {\n// type: cheerio\n// datasetId: '45678'\n// requestQueueId: 'abcdef'\n// options: {\n// ...\n// }\n// }\n\n// requestQueue extraQueue {\n// ...\n// }\n\n// dataset mainDataset {\n// datasetId: '45678'\n// ...\n// }\n\n// keyValueStore mainStore {\n// datasetId: 'xyz'\n// ...\n// }\n\n// route detailPage {\n// crawler: 'productDetailsCrawler',\n// // NOTE: If `match` is a regex, the regex is compared against the URL\n// match: /[\\W]profesia\\.sk\\/praca\\/zoznam-[a-z0-9-]+\\/?(?:[?#~]|$)/i\n// handler: detailPageHandler,\n// }\n\n// route listing {\n// crawler: 'mainCrawler',\n// // Note: route object name is the 'label' by default, but label can be overriden\n// label: 'DETAIL_PAGE'\n// // NOTE: Otherwise `match` is a function that returns true/false\n// match: async (url, ctx, route, handlers) => {\n// const dom = cheerioPortadom(ctx.$.root(), url);\n// const isNotCustomDesign = await dom.findMany('body.listing:not(.custom-design)').length;\n// return isUrlOfCompanyProfile(url) && !!isNotCustomDesign;\n// },\n// }\n// ```\n"]}
1
+ {"version":3,"file":"composer.js","sourceRoot":"","sources":["../../src/composer.ts"],"names":[],"mappings":";AAAA,cAAc;AACd,iCAAiC;AAEjC,IAAI;AAEJ,MAAM;AACN,wBAAwB;AACxB,qBAAqB;AACrB,uBAAuB;AACvB,6BAA6B;AAC7B,eAAe;AACf,UAAU;AACV,MAAM;AACN,IAAI;AAEJ,qBAAqB;AACrB,0EAA0E;AAC1E,uEAAuE;AACvE,+BAA+B;AAC/B,IAAI;AACJ,MAAM;AAEN,aAAa;AACb,kBAAkB;AAClB,aAAa;AAEb,mEAAmE;AACnE,2CAA2C;AAC3C,uEAAuE;AACvE,MAAM;AACN,IAAI;AACJ,gBAAgB;AAChB,cAAc;AACd,kBAAkB;AAClB,sBAAsB;AACtB,6BAA6B;AAC7B,uDAAuD;AACvD,UAAU;AACV,SAAS;AACT,MAAM;AACN,KAAK;AACL,MAAM;AACN,uGAAuG;AACvG,8EAA8E;AAC9E,EAAE;AACF,8EAA8E;AAC9E,0CAA0C;AAC1C,EAAE;AACF,+BAA+B;AAC/B,gCAAgC;AAChC,0GAA0G;AAC1G,wCAAwC;AACxC,sBAAsB;AACtB,wEAAwE;AACxE,sGAAsG;AACtG,qCAAqC;AACrC,4DAA4D;AAC5D,qEAAqE;AACrE,8DAA8D;AAE9D,aAAa;AACb,gBAAgB;AAChB,aAAa;AAEb,SAAS;AACT,QAAQ;AACR,kDAAkD;AAElD,uBAAuB;AACvB,gBAAgB;AAChB,oBAAoB;AACpB,2BAA2B;AAC3B,EAAE;AACF,2CAA2C;AAC3C,kFAAkF;AAClF,uBAAuB;AACvB,+BAA+B;AAC/B,kCAAkC;AAClC,2GAA2G;AAC3G,kEAAkE;AAClE,0FAA0F;AAC1F,gDAAgD;AAChD,6BAA6B;AAC7B,gDAAgD;AAChD,wCAAwC;AACxC,yBAAyB;AACzB,kBAAkB;AAClB,+DAA+D;AAC/D,6GAA6G;AAC7G,4GAA4G;AAC5G,wDAAwD;AACxD,uDAAuD;AACvD,UAAU;AACV,qCAAqC;AACrC,gBAAgB;AAChB,oBAAoB;AACpB,aAAa;AACb,iBAAiB;AACjB,QAAQ;AACR,OAAO;AACP,EAAE;AACF,cAAc;AACd,oBAAoB;AACpB,gCAAgC;AAChC,EAAE;AACF,4EAA4E;AAC5E,WAAW;AACX,sDAAsD;AACtD,0DAA0D;AAC1D,mGAAmG;AACnG,oEAAoE;AACpE,WAAW;AACX,WAAW;AACX,iBAAiB;AACjB,sEAAsE;AACtE,WAAW;AACX,WAAW;AACX,iBAAiB;AACjB,uEAAuE;AACvE,wDAAwD;AACxD,WAAW;AACX,EAAE;AACF,oCAAoC;AACpC,kCAAkC;AAClC,6BAA6B;AAC7B,iCAAiC;AACjC,WAAW;AACX,QAAQ;AACR,MAAM;AACN,KAAK;AACL,MAAM;AAEN,qCAAqC;AACrC,QAAQ;AACR,sGAAsG;AACtG,MAAM;AACN,EAAE;AACF,qBAAqB;AACrB,8BAA8B;AAC9B,kGAAkG;AAClG,+FAA+F;AAC/F,oCAAoC;AACpC,yGAAyG;AACzG,wCAAwC;AACxC,aAAa;AACb,kIAAkI;AAClI,qGAAqG;AACrG,WAAW;AACX,qFAAqF;AACrF,iDAAiD;AACjD,eAAe;AACf,2EAA2E;AAC3E,aAAa;AACb,gGAAgG;AAChG,8CAA8C;AAC9C,4FAA4F;AAC5F,yBAAyB;AACzB,aAAa;AACb,uGAAuG;AACvG,WAAW;AAEX,qCAAqC;AACrC,QAAQ;AACR,mFAAmF;AACnF,MAAM;AACN,EAAE;AACF,qBAAqB;AACrB,iDAAiD;AACjD,oDAAoD;AACpD,4DAA4D;AAC5D,yBAAyB;AACzB,aAAa;AACb,2HAA2H;AAC3H,WAAW;AAEX,mDAAmD;AACnD,QAAQ;AACR,4GAA4G;AAC5G,2GAA2G;AAC3G,MAAM;AACN,EAAE;AACF,iCAAiC;AAEjC,2CAA2C;AAC3C,QAAQ;AACR,sHAAsH;AACtH,MAAM;AACN,EAAE;AACF,uBAAuB;AACvB,iDAAiD;AACjD,4DAA4D;AAC5D,yBAAyB;AACzB,aAAa;AACb,8KAA8K;AAC9K,WAAW;AAEX,mDAAmD;AACnD,EAAE;AACF,0BAA0B;AAC1B,+BAA+B;AAC/B,mEAAmE;AACnE,EAAE;AACF,MAAM;AACN,4MAA4M;AAC5M,qBAAqB;AACrB,sDAAsD;AACtD,2CAA2C;AAC3C,MAAM;AAEN,SAAS;AACT,8EAA8E;AAC9E,wFAAwF;AACxF,oDAAoD;AAEpD,MAAM;AACN,wBAAwB;AACxB,qBAAqB;AACrB,uBAAuB;AACvB,6BAA6B;AAC7B,eAAe;AACf,UAAU;AACV,MAAM;AACN,QAAQ;AACR,IAAI;AAEJ,kCAAkC;AAClC,kBAAkB;AAClB,uBAAuB;AACvB,6BAA6B;AAC7B,eAAe;AACf,UAAU;AACV,MAAM;AACN,IAAI;AAEJ,4BAA4B;AAC5B,QAAQ;AACR,IAAI;AAEJ,wBAAwB;AACxB,uBAAuB;AACvB,QAAQ;AACR,IAAI;AAEJ,4BAA4B;AAC5B,qBAAqB;AACrB,QAAQ;AACR,IAAI;AAEJ,qBAAqB;AACrB,sCAAsC;AACtC,0EAA0E;AAC1E,uEAAuE;AACvE,gCAAgC;AAChC,IAAI;AAEJ,kBAAkB;AAClB,4BAA4B;AAC5B,qFAAqF;AACrF,yBAAyB;AACzB,qEAAqE;AACrE,kDAAkD;AAClD,sDAAsD;AACtD,+FAA+F;AAC/F,gEAAgE;AAChE,OAAO;AACP,IAAI;AACJ,MAAM","sourcesContent":["// @ts-nocheck\n// interface ComposerCrawlerDef {\n\n// }\n\n// ```\n// crawler mainCrawler {\n// type: playwright\n// datasetId: '45678'\n// errorDatasetId: '098765'\n// options: {\n// ...\n// }\n// }\n\n// route detailPage {\n// // NOTE: If `match` is a regex, the regex is compared against the URL\n// match: /[\\W]profesia\\.sk\\/praca\\/zoznam-[a-z0-9-]+\\/?(?:[?#~]|$)/i\n// handler: detailPageHandler\n// }\n// ```\n\n// !!!!!!!!!!\n// UPDATES - START\n// !!!!!!!!!!\n\n// THE CONFIG SHOULD BE FIRST DEFINED AS PLAIN OBJECT, SO NO JS/TS,\n// SO WE CAN IMPORT IT FOR TYPE GENERATION.\n// SO IT SHOULD AT FIRST LOOK LIKE THIS (COULD BE JS/TS/JSON/YAML/...):\n// ```\n// {\n// version: 1,\n// schema: {\n// crawlers: {\n// mainCrawler {\n// type: 'playwright'\n// routes: ['listingPage', detailPage', 'home']\n// }\n// },\n// }\n// };\n// ```\n// WE IMPORT THAT USING THE COSMICCONFIG (with https://github.com/codex-/cosmiconfig-typescript-loader)\n// See https://github.com/cosmiconfig/cosmiconfig#usage-for-tooling-developers\n//\n// AT THIS POINT DONT FORGET TO HAVE A VALIDATION SCHEMA TO COMPARE THE LAODED\n// CONFIG AGAINST. THROW ERROR IF INVALID.\n//\n// I SHOULD GENERATE TYPES FOR:\n// - The types I mentioned below\n// - Each Crawler based on their type (e.g. CheerioCrawleeOneCrawler<Labels, Inputs, ...>) (But also named\n// variants like `mainCrawlerCrawler`)\n// - CRAWLER_NAME_ENUM\n// - All Crawlers obj = { `CrawlerName`: CheerioCrawleeOneCrawler, ... }\n// - Each Route based on their type (e.g. CheerioCrawleeOneRoute<Labels, Inputs, ...>) (But also named\n// variants like `detailPageRoute`)\n// - CRAWLER_ROUTE_ENUM - e.g. `CrawlerName`RouteLabel = ...\n// - All Crawler Routes objs = { `detailPage`: detailPageRoute, ... }\n// - The whole object of { crawlers: { ...}, routes: { ... } }\n\n// !!!!!!!!!!\n// UPDATES - END\n// !!!!!!!!!!\n\n// As JS:\n// ```js\n// import { detailPageHandler } from './handlers';\n\n// const scraperDef = {\n// crawlers: {\n// mainCrawler {\n// type: 'playwright'\n//\n///////// Override crawler config /////////\n// //////// { ...crawlerConfigDefaults, ...io.getInput(), ...crawlerConfig }\n// crawlerConfig?\n// crawlerConfigDefaults?\n///////// Override input /////////\n// ///// If mergeInput = true, will merge inputDefaults, input, and io.getInput() similarly to config\n// //////// { ...inputDefaults, ...io.getInput(), ...input }\n// ///// If mergeInput = false, io.getInput() will be ignored if `input` is provided\n// //////// { ...inputDefaults, ...input }\n// mergeInput?: boolean\n// inputDefaults?: Partial<AllActorInputs>\n// input?: Partial<AllActorInputs>\n///////// Hooks /////////\n// hooks?: {\n// validateInput?: (input | null) => MaybePromise<void>\n// onActorReady?: (actor) => MaybePromise<void> // NOTE: Move onACtorReady FN type to own public type\n// (actor: CrawleeOneActorCtx<Labels, Input, TIO, Telem, Ctx>) => MaybePromise<void>;\n// onBeforeHandler?: (ctx) => MaybePromise<void>\n// onAfterHandler?: (ctx) => MaybePromise<void>\n// }\n///////// Override services /////////\n// proxy?,\n// telemetry?,\n// io?,\n// router?,\n// }\n// },\n//\n// routes: {\n// detailPage: {\n// crawler?: 'mainCrawler'\n//\n// match: /[\\W]profesia\\.sk\\/praca\\/zoznam-[a-z0-9-]+\\/?(?:[?#~]|$)/i,\n// OR\n// match: async (url, ctx, route, handlers) => {\n// const dom = cheerioPortadom(ctx.$.root(), url);\n// const isNotCustomDesign = await dom.findMany('body.listing:not(.custom-design)').length;\n// return isUrlOfCompanyProfile(url) && !!isNotCustomDesign;\n// },\n// OR\n// match: [\n// /[\\W]profesia\\.sk\\/praca\\/zoznam-[a-z0-9-]+\\/?(?:[?#~]|$)/i\n// ],\n// OR\n// match: [\n// /[\\W]profesia\\.sk\\/praca\\/zoznam-[a-z0-9-]+\\/?(?:[?#~]|$)/i,\n// async (url, ctx, route, handlers) => { ... },\n// ],\n//\n// handler: detailPageHandler,\n// handler: async (ctx) => {\n// ctx.actor.pushData\n// ctx.actor.pushRequests\n// },\n// }\n// }\n// };\n// ```\n\n// HOW TO GENERATE TYPE FOR HANDLERS?\n// ```ts\n// type detailPageRouteHandler = CrawleeOneRouteHandler<CheerioCrawlingContext, ProfesiaRouterContext>\n// ```\n//\n// 1. For each route:\n// 1.1 Get `CrawlingContext`\n// 1.1.1 Take `mainCrawler`, and find corresponding crawler. If no `mainCrawler`, there should\n// be only 1 crawler, and take that (if more crawler, there should've been an error).\n// 1.1.2 Find the `crawler.type`\n// 1.1.3 Take corresponding type based on `crawler.type`, e.g. 'cheerio' => `CheerioCrawlingContext`;\n// 1.2 Get actor router context, e.g.:\n// ```ts\n// type `CrawlerName`RouterContext = CrawleeOneActorRouterCtx<`type`CrawlingContext, `CrawlerName`RouteLabel, AllActorInput>;\n// // NOTE: We use `AllActorInput` because since it's in the code, then we can handle ALL inputs\n// ```\n// 1.2.1 Use same CrawlingContext as in step 1.1 (e.g. `CheerioCrawlingContext`).\n// 1.2.2 Create `CrawlerName`Label type, e.g.\n// ```ts\n// type `CrawlerName`RouteLabel = \"detailPage\" | \"otherLabel\" | ...;\n// ```\n// 1.2.2.1 Take key (crawler name), and filter for all routes where `route.crawler == key`\n// 1.2.2.2 Take the keys of these routes\n// 1.2.2.3 Generate `type ${key}Label = ${keys.map((s) => '\"' + s + '\"').join(' | ')}`\n// 1.3 Put it together:\n// ```ts\n// type `Label`Handler = CrawleeOneRouteHandler<`type`CrawlingContext, `CrawlerName`RouterContext>\n// ```\n\n// HOW TO GENERATE TYPE FOR MATCHERS?\n// ```ts\n// type `Label`RouteMatcher = CrawleeOneRouteMatcher<Labels, RouterCtx, CrawlerCtx>\n// ```\n//\n// 1. For each route:\n// 1.1 Get `CrawlingContext` (See HANDLERS 1.1)\n// 1.2 Get actor router context (See HANDLERS 1.2)\n// 1.3 Create `CrawlerName`Label type (See HANDLERS 1.2.2)\n// 1.4 Put it together:\n// ```ts\n// type `Label`Matcher = CrawleeOneRouteMatcher<`CrawlerName`Label, `CrawlerName`RouterContext, `type`CrawlingContext>\n// ```\n\n// HOW TO GENERATE TYPE FOR ON_BEFORE AND ON_AFTER?\n// ```ts\n// type `CrawlerName`OnBeforeHandler = CrawleeOneRouteHandler<CheerioCrawlingContext, ProfesiaRouterContext>\n// type `CrawlerName`OnAfterHandler = CrawleeOneRouteHandler<CheerioCrawlingContext, ProfesiaRouterContext>\n// ```\n//\n// It's the same as for HANDLERS!\n\n// HOW TO GENERATE TYPE FOR ON_ACTOR_READY?\n// ```ts\n// type `CrawlerName`OnActorReady = (actor: CrawleeOneActorCtx<Labels, Input, TIO, Telem, Ctx>) => MaybePromise<void>;\n// ```\n//\n// 1. For each crawler:\n// 1.1 Get `CrawlingContext` (See HANDLERS 1.1)\n// 1.2 Create `CrawlerName`Label type (See HANDLERS 1.2.2)\n// 1.3 Put it together:\n// ```ts\n// type `CrawlerName`OnActorReady = <TIO, Telem>(actor: CrawleeOneActorCtx<`CrawlerName`Label, AllActorInputs, TIO, Telem, `type`CrawlingContext>) => MaybePromise<void>;\n// ```\n\n// HOW TO FIND THE FILE BASED ON WHICH TO GENERATE?\n//\n// 1. User has to specify:\n// - Path to file that exports\n// - Whether it's TS or JS (or can be inferred based on extension)\n//\n// ```\n// presenter@Juros-MacBook-Pro apify-actor-utils % npx ts-node --project tsconfig.base.json -e 'import config from \"./src/lib/router/router\"; console.log(config); // NOTE: I HAD TO TRIM OFF THE EXTENSION'\n// { hello: 'world' }\n// presenter@Juros-MacBook-Pro apify-actor-utils % pwd\n// /Users/presenter/repos/apify-actor-utils\n// ```\n\n// NOTES:\n// - Enum with available route labels would be extracted from this definition.\n// - If there is only 1 crawler defined, all routes use that. If there is more crawlers,\n// they should define which crawler it relates to.\n\n// ```\n// crawler mainCrawler {\n// type: playwright\n// datasetId: '45678'\n// errorDatasetId: '098765'\n// options: {\n// ...\n// }\n// ...\n// }\n\n// crawler productDetailsCrawler {\n// type: cheerio\n// datasetId: '45678'\n// requestQueueId: 'abcdef'\n// options: {\n// ...\n// }\n// }\n\n// requestQueue extraQueue {\n// ...\n// }\n\n// dataset mainDataset {\n// datasetId: '45678'\n// ...\n// }\n\n// keyValueStore mainStore {\n// datasetId: 'xyz'\n// ...\n// }\n\n// route detailPage {\n// crawler: 'productDetailsCrawler',\n// // NOTE: If `match` is a regex, the regex is compared against the URL\n// match: /[\\W]profesia\\.sk\\/praca\\/zoznam-[a-z0-9-]+\\/?(?:[?#~]|$)/i\n// handler: detailPageHandler,\n// }\n\n// route listing {\n// crawler: 'mainCrawler',\n// // Note: route object name is the 'label' by default, but label can be overriden\n// label: 'DETAIL_PAGE'\n// // NOTE: Otherwise `match` is a function that returns true/false\n// match: async (url, ctx, route, handlers) => {\n// const dom = cheerioPortadom(ctx.$.root(), url);\n// const isNotCustomDesign = await dom.findMany('body.listing:not(.custom-design)').length;\n// return isUrlOfCompanyProfile(url) && !!isNotCustomDesign;\n// },\n// }\n// ```\n"]}
File without changes
@@ -0,0 +1,38 @@
1
+ "use strict";
2
+ // import type { CrawlerType } from "../types";
3
+ // export interface CrawleeOneConfig {
4
+ // /** Version of the CrawleeOne config. */
5
+ // version: 1;
6
+ // /** Schema defining the crawlers in this project. This schema is used for code generation. */
7
+ // schema: {
8
+ // /** Object holding crawler configurations. Each crawler is idefntified by its key.
9
+ // *
10
+ // * E.g.
11
+ // *
12
+ // * ```js
13
+ // * {
14
+ // * myCrawler: {
15
+ // * type: 'cheerio',
16
+ // * routes: [...],
17
+ // * }
18
+ // * }
19
+ // * ```
20
+ // */
21
+ // crawlers: {
22
+ // mainCrawler {
23
+ // type: 'playwright'
24
+ // routes: ['listingPage', detailPage', 'home']
25
+ // }
26
+ // },
27
+ // }
28
+ // };
29
+ // export interface CrawleeOneConfigCrawlerDef {
30
+ // /** Crawler type - Each type is linked to a different Crawlee crawler class.
31
+ // * Different classes may use different technologies / stack for scraping.
32
+ // *
33
+ // * E.g. type `cheerio` will use `CheerioCrawler` class.
34
+ // */
35
+ // type: CrawlerType;
36
+ // routes: ['listingPage', detailPage', 'home']
37
+ // }
38
+ //# sourceMappingURL=types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/config/types.ts"],"names":[],"mappings":";AAAA,+CAA+C;AAE/C,sCAAsC;AACtC,6CAA6C;AAC7C,gBAAgB;AAChB,kGAAkG;AAClG,cAAc;AACd,yFAAyF;AACzF,UAAU;AACV,cAAc;AACd,UAAU;AACV,eAAe;AACf,WAAW;AACX,wBAAwB;AACxB,8BAA8B;AAC9B,4BAA4B;AAC5B,aAAa;AACb,WAAW;AACX,aAAa;AACb,UAAU;AACV,kBAAkB;AAClB,sBAAsB;AACtB,6BAA6B;AAC7B,uDAAuD;AACvD,UAAU;AACV,SAAS;AACT,MAAM;AACN,KAAK;AAEL,gDAAgD;AAChD,iFAAiF;AACjF,8EAA8E;AAC9E,QAAQ;AACR,4DAA4D;AAC5D,QAAQ;AACR,uBAAuB;AACvB,iDAAiD;AACjD,IAAI","sourcesContent":["// import type { CrawlerType } from \"../types\";\n\n// export interface CrawleeOneConfig {\n// /** Version of the CrawleeOne config. */\n// version: 1;\n// /** Schema defining the crawlers in this project. This schema is used for code generation. */\n// schema: {\n// /** Object holding crawler configurations. Each crawler is idefntified by its key.\n// * \n// * E.g.\n// * \n// * ```js\n// * {\n// * myCrawler: {\n// * type: 'cheerio',\n// * routes: [...],\n// * }\n// * }\n// * ```\n// */\n// crawlers: {\n// mainCrawler {\n// type: 'playwright'\n// routes: ['listingPage', detailPage', 'home']\n// }\n// },\n// }\n// };\n\n// export interface CrawleeOneConfigCrawlerDef {\n// /** Crawler type - Each type is linked to a different Crawlee crawler class.\n// * Different classes may use different technologies / stack for scraping.\n// * \n// * E.g. type `cheerio` will use `CheerioCrawler` class.\n// */\n// type: CrawlerType;\n// routes: ['listingPage', detailPage', 'home']\n// }\n"]}
@@ -6,9 +6,6 @@ export * from './lib/io/dataset';
6
6
  export * from './lib/io/requestQueue';
7
7
  export * from './lib/io/pushData';
8
8
  export * from './lib/io/pushRequests';
9
- export * from './lib/actions/dom';
10
- export * from './lib/actions/domUtils';
11
- export * from './lib/actions/page';
12
9
  export * from './lib/actions/scrapeListing';
13
10
  export * from './lib/error/errorHandler';
14
11
  export * from './lib/migrate/localMigrator';
package/dist/cjs/index.js CHANGED
@@ -22,9 +22,6 @@ __exportStar(require("./lib/io/dataset"), exports);
22
22
  __exportStar(require("./lib/io/requestQueue"), exports);
23
23
  __exportStar(require("./lib/io/pushData"), exports);
24
24
  __exportStar(require("./lib/io/pushRequests"), exports);
25
- __exportStar(require("./lib/actions/dom"), exports);
26
- __exportStar(require("./lib/actions/domUtils"), exports);
27
- __exportStar(require("./lib/actions/page"), exports);
28
25
  __exportStar(require("./lib/actions/scrapeListing"), exports);
29
26
  __exportStar(require("./lib/error/errorHandler"), exports);
30
27
  __exportStar(require("./lib/migrate/localMigrator"), exports);
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;AAAA,oDAAkC;AAClC,oDAAkC;AAClC,kDAAgC;AAChC,+CAA6B;AAC7B,mDAAiC;AACjC,wDAAsC;AACtC,oDAAkC;AAClC,wDAAsC;AACtC,oDAAkC;AAClC,yDAAuC;AACvC,qDAAmC;AACnC,8DAA4C;AAC5C,2DAAyC;AACzC,8DAA4C;AAC5C,2DAAyC;AACzC,sDAAoC;AACpC,4DAA0C;AAC1C,2DAAyC;AACzC,sDAAoC;AACpC,qDAAmC;AACnC,4CAA0B;AAC1B,mDAAiC;AACjC,6DAA2C;AAE3C,2DAAyC;AACzC,2DAAyC;AACzC,wDAAsC;AACtC,yDAAuC","sourcesContent":["export * from './lib/actor/actor';\nexport * from './lib/actor/types';\nexport * from './lib/actorSpec';\nexport * from './lib/config';\nexport * from './lib/io/dataset';\nexport * from './lib/io/requestQueue';\nexport * from './lib/io/pushData';\nexport * from './lib/io/pushRequests';\nexport * from './lib/actions/dom';\nexport * from './lib/actions/domUtils';\nexport * from './lib/actions/page';\nexport * from './lib/actions/scrapeListing';\nexport * from './lib/error/errorHandler';\nexport * from './lib/migrate/localMigrator';\nexport * from './lib/migrate/localState';\nexport * from './lib/migrate/types';\nexport * from './lib/readme/apify/readme';\nexport * from './lib/readme/apify/types';\nexport * from './lib/router/router';\nexport * from './lib/router/types';\nexport * from './lib/log';\nexport * from './lib/test/actor';\nexport * from './lib/test/mockApifyClient';\nexport type { CrawlerUrl, CrawlerType } from './types';\nexport * from './lib/integrations/apify';\nexport * from './lib/integrations/types';\nexport * from './lib/telemetry/types';\nexport * from './lib/telemetry/sentry';\n"]}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;AAAA,oDAAkC;AAClC,oDAAkC;AAClC,kDAAgC;AAChC,+CAA6B;AAC7B,mDAAiC;AACjC,wDAAsC;AACtC,oDAAkC;AAClC,wDAAsC;AACtC,8DAA4C;AAC5C,2DAAyC;AACzC,8DAA4C;AAC5C,2DAAyC;AACzC,sDAAoC;AACpC,4DAA0C;AAC1C,2DAAyC;AACzC,sDAAoC;AACpC,qDAAmC;AACnC,4CAA0B;AAC1B,mDAAiC;AACjC,6DAA2C;AAE3C,2DAAyC;AACzC,2DAAyC;AACzC,wDAAsC;AACtC,yDAAuC","sourcesContent":["export * from './lib/actor/actor';\nexport * from './lib/actor/types';\nexport * from './lib/actorSpec';\nexport * from './lib/config';\nexport * from './lib/io/dataset';\nexport * from './lib/io/requestQueue';\nexport * from './lib/io/pushData';\nexport * from './lib/io/pushRequests';\nexport * from './lib/actions/scrapeListing';\nexport * from './lib/error/errorHandler';\nexport * from './lib/migrate/localMigrator';\nexport * from './lib/migrate/localState';\nexport * from './lib/migrate/types';\nexport * from './lib/readme/apify/readme';\nexport * from './lib/readme/apify/types';\nexport * from './lib/router/router';\nexport * from './lib/router/types';\nexport * from './lib/log';\nexport * from './lib/test/actor';\nexport * from './lib/test/mockApifyClient';\nexport type { CrawlerUrl, CrawlerType } from './types';\nexport * from './lib/integrations/apify';\nexport * from './lib/integrations/types';\nexport * from './lib/telemetry/types';\nexport * from './lib/telemetry/sentry';\n"]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "crawlee-one",
3
- "version": "1.1.2",
3
+ "version": "1.1.3",
4
4
  "private": false,
5
5
  "description": "Crawlee One is a framework built on top of Crawlee and Apify for writing robust and highly configurable web scrapers",
6
6
  "author": "Juro Oravec <juraj.oravec.josefson@gmail.com>",