@crawlee/puppeteer 4.0.0-beta.27 → 4.0.0-beta.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { BrowserCrawlerOptions, BrowserCrawlingContext, BrowserHook, GetUserDataFromRequest, RouterRoutes } from '@crawlee/browser';
|
|
2
|
-
import { BrowserCrawler
|
|
2
|
+
import { BrowserCrawler } from '@crawlee/browser';
|
|
3
3
|
import type { PuppeteerController, PuppeteerPlugin } from '@crawlee/browser-pool';
|
|
4
4
|
import type { Dictionary } from '@crawlee/types';
|
|
5
5
|
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
@@ -120,7 +120,6 @@ export interface PuppeteerCrawlerOptions<ContextExtension = Dictionary<never>, E
|
|
|
120
120
|
export declare class PuppeteerCrawler<ContextExtension = Dictionary<never>, ExtendedContext extends PuppeteerCrawlingContext = PuppeteerCrawlingContext & ContextExtension> extends BrowserCrawler<Page, HTTPResponse, PuppeteerController, {
|
|
121
121
|
browserPlugins: [PuppeteerPlugin];
|
|
122
122
|
}, LaunchOptions, PuppeteerCrawlingContext, ContextExtension, ExtendedContext> {
|
|
123
|
-
readonly config: Configuration;
|
|
124
123
|
protected static optionsShape: {
|
|
125
124
|
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
126
125
|
browserPoolOptions: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
|
|
@@ -182,6 +181,12 @@ export declare class PuppeteerCrawler<ContextExtension = Dictionary<never>, Exte
|
|
|
182
181
|
onSkippedRequest: import("ow").Predicate<Function> & import("ow").BasePredicate<Function | undefined>;
|
|
183
182
|
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
184
183
|
httpClient: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
|
|
184
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
185
|
+
configuration: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
|
|
186
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
187
|
+
storageClient: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
|
|
188
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
189
|
+
eventManager: import("ow").ObjectPredicate<object> & import("ow").BasePredicate<object | undefined>;
|
|
185
190
|
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
186
191
|
minConcurrency: import("ow").NumberPredicate & import("ow").BasePredicate<number | undefined>;
|
|
187
192
|
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
@@ -202,7 +207,7 @@ export declare class PuppeteerCrawler<ContextExtension = Dictionary<never>, Exte
|
|
|
202
207
|
/**
|
|
203
208
|
* All `PuppeteerCrawler` parameters are passed via an options object.
|
|
204
209
|
*/
|
|
205
|
-
constructor(options?: PuppeteerCrawlerOptions<ContextExtension, ExtendedContext
|
|
210
|
+
constructor(options?: PuppeteerCrawlerOptions<ContextExtension, ExtendedContext>);
|
|
206
211
|
private enhanceContext;
|
|
207
212
|
protected _navigationHandler(crawlingContext: PuppeteerCrawlingContext, gotoOptions: DirectNavigationOptions): Promise<HTTPResponse | null>;
|
|
208
213
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"puppeteer-crawler.d.ts","sourceRoot":"","sources":["../../src/internals/puppeteer-crawler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACR,qBAAqB,EACrB,sBAAsB,EACtB,WAAW,EACX,sBAAsB,EACtB,YAAY,EACf,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAAE,cAAc,
|
|
1
|
+
{"version":3,"file":"puppeteer-crawler.d.ts","sourceRoot":"","sources":["../../src/internals/puppeteer-crawler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACR,qBAAqB,EACrB,sBAAsB,EACtB,WAAW,EACX,sBAAsB,EACtB,YAAY,EACf,MAAM,kBAAkB,CAAC;AAC1B,OAAO,EAAE,cAAc,EAAwB,MAAM,kBAAkB,CAAC;AACxE,OAAO,KAAK,EAAsB,mBAAmB,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAEtG,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAEjD,OAAO,KAAK,EAAE,YAAY,EAAE,aAAa,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAGnE,OAAO,KAAK,EAAE,sBAAsB,EAAE,MAAM,yBAAyB,CAAC;AAGtE,OAAO,KAAK,EAER,uBAAuB,EAGvB,qBAAqB,EAExB,MAAM,4BAA4B,CAAC;AAGpC,MAAM,WAAW,wBAAwB,CAAC,QAAQ,SAAS,UAAU,GAAG,UAAU,CAC9E,SAAQ,sBAAsB,CAAC,IAAI,EAAE,YAAY,EAAE,mBAAmB,EAAE,QAAQ,CAAC,EAC7E,qBAAqB;CAAG;AAChC,MAAM,WAAW,aAAc,SAAQ,WAAW,CAAC,wBAAwB,EAAE,oBAAoB,CAAC;CAAG;AACrG,MAAM,MAAM,oBAAoB,GAAG,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AAE/D,MAAM,WAAW,uBAAuB,CACpC,gBAAgB,GAAG,UAAU,CAAC,KAAK,CAAC,EACpC,eAAe,SAAS,wBAAwB,GAAG,wBAAwB,GAAG,gBAAgB,CAChG,SAAQ,qBAAqB,CACvB,IAAI,EACJ,YAAY,EACZ,mBAAmB,EACnB,wBAAwB,EACxB,gBAAgB,EAChB,eAAe,EACf;IAAE,cAAc,EAAE,CAAC,eAAe,CAAC,CAAA;CAAE,CACxC;IACD;;OAEG;IACH,aAAa,CAAC,EAAE,sBAAsB,CAAC;IAEvC;;;;;;;;;;;;;;;;OAgBG;IACH,kBAAkB,CAAC,EAAE,aAAa,EAAE,CAAC;IAErC;;;;;;;;;;;;;;OAcG;IACH,mBAAmB,CAAC,EAAE,aAAa,EAAE,CAAC;CACzC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8DG;AACH,qBAAa,gBAAgB,CACzB,gBAAgB,GAAG,UAAU,CAAC,KAAK,CAAC,EACpC,eAAe,SAAS,wBAAwB,GAAG,wBAAwB,GAAG,gBAAgB,CAChG,SAAQ,cAAc,CACpB,IAAI,EACJ,YAAY,EACZ,mBAAmB,EACnB;IAAE,cAAc,EAAE,CAAC,eAAe,CAAC,CAAA;CAAE,EACrC,aAAa,EACb,wBAAwB,EACxB,gBAAgB,EAChB,eAAe,CAClB;IACG,iBAA0B,YAAY;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;MAGpC;IAEF;;OAEG;gBACS,OAAO,GAAE,uBAAuB,CAAC,gBAAgB,EAAE,eAAe,CAAM;YAgDtE,cAAc;cAiDH,kBAAkB,CACvC,eAAe,EAAE,wBAAwB,EACzC,WAAW,EAAE,uBAAuB;CAI3C;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,qBAAqB,CACjC,OAAO,SAAS,wBAAwB,GAAG,wBAAwB,EACnE,QAAQ,SAAS,UAAU,GAAG,sBAAsB,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAC1E,MAAM,CAAC,EAAE,YAAY,CAAC,OAAO,EAAE,QAAQ,CAAC,qDAEzC"}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import { BrowserCrawler,
|
|
1
|
+
import { BrowserCrawler, RequestState, Router } from '@crawlee/browser';
|
|
2
|
+
import { serviceLocator } from '@crawlee/core';
|
|
2
3
|
import ow from 'ow';
|
|
3
4
|
import { PuppeteerLauncher } from './puppeteer-launcher.js';
|
|
4
5
|
import { gotoExtended, puppeteerUtils } from './utils/puppeteer_utils.js';
|
|
@@ -66,7 +67,6 @@ import { gotoExtended, puppeteerUtils } from './utils/puppeteer_utils.js';
|
|
|
66
67
|
* @category Crawlers
|
|
67
68
|
*/
|
|
68
69
|
export class PuppeteerCrawler extends BrowserCrawler {
|
|
69
|
-
config;
|
|
70
70
|
static optionsShape = {
|
|
71
71
|
...BrowserCrawler.optionsShape,
|
|
72
72
|
browserPoolOptions: ow.optional.object,
|
|
@@ -74,7 +74,7 @@ export class PuppeteerCrawler extends BrowserCrawler {
|
|
|
74
74
|
/**
|
|
75
75
|
* All `PuppeteerCrawler` parameters are passed via an options object.
|
|
76
76
|
*/
|
|
77
|
-
constructor(options = {}
|
|
77
|
+
constructor(options = {}) {
|
|
78
78
|
ow(options, 'PuppeteerCrawlerOptions', ow.object.exactShape(PuppeteerCrawler.optionsShape));
|
|
79
79
|
const { launchContext = {}, headless, proxyConfiguration, ...browserCrawlerOptions } = options;
|
|
80
80
|
const browserPoolOptions = {
|
|
@@ -93,7 +93,7 @@ export class PuppeteerCrawler extends BrowserCrawler {
|
|
|
93
93
|
launchContext.launchOptions ??= {};
|
|
94
94
|
launchContext.launchOptions.headless = headless;
|
|
95
95
|
}
|
|
96
|
-
const puppeteerLauncher = new PuppeteerLauncher(launchContext,
|
|
96
|
+
const puppeteerLauncher = new PuppeteerLauncher(launchContext, options.configuration);
|
|
97
97
|
browserPoolOptions.browserPlugins = [puppeteerLauncher.createBrowserPlugin()];
|
|
98
98
|
super({
|
|
99
99
|
...browserCrawlerOptions,
|
|
@@ -101,8 +101,7 @@ export class PuppeteerCrawler extends BrowserCrawler {
|
|
|
101
101
|
proxyConfiguration,
|
|
102
102
|
browserPoolOptions,
|
|
103
103
|
contextPipelineBuilder: () => this.buildContextPipeline().compose({ action: this.enhanceContext.bind(this) }),
|
|
104
|
-
}
|
|
105
|
-
this.config = config;
|
|
104
|
+
});
|
|
106
105
|
}
|
|
107
106
|
async enhanceContext(context) {
|
|
108
107
|
const waitForSelector = async (selector, timeoutMs = 5_000) => {
|
|
@@ -135,7 +134,7 @@ export class PuppeteerCrawler extends BrowserCrawler {
|
|
|
135
134
|
addInterceptRequestHandler: async (handler) => puppeteerUtils.addInterceptRequestHandler(context.page, handler),
|
|
136
135
|
removeInterceptRequestHandler: async (handler) => puppeteerUtils.removeInterceptRequestHandler(context.page, handler),
|
|
137
136
|
infiniteScroll: async (options) => puppeteerUtils.infiniteScroll(context.page, options),
|
|
138
|
-
saveSnapshot: async (options) => puppeteerUtils.saveSnapshot(context.page, { ...options, config:
|
|
137
|
+
saveSnapshot: async (options) => puppeteerUtils.saveSnapshot(context.page, { ...options, config: serviceLocator.getConfiguration() }),
|
|
139
138
|
closeCookieModals: async () => puppeteerUtils.closeCookieModals(context.page),
|
|
140
139
|
};
|
|
141
140
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"puppeteer-crawler.js","sourceRoot":"","sources":["../../src/internals/puppeteer-crawler.ts"],"names":[],"mappings":"AAOA,OAAO,EAAE,cAAc,EAAE,
|
|
1
|
+
{"version":3,"file":"puppeteer-crawler.js","sourceRoot":"","sources":["../../src/internals/puppeteer-crawler.ts"],"names":[],"mappings":"AAOA,OAAO,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,EAAE,MAAM,kBAAkB,CAAC;AAExE,OAAO,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AAE/C,OAAO,EAAE,MAAM,IAAI,CAAC;AAKpB,OAAO,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AAU5D,OAAO,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,4BAA4B,CAAC;AA8D1E;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8DG;AACH,MAAM,OAAO,gBAGX,SAAQ,cAST;IACa,MAAM,CAAU,YAAY,GAAG;QACrC,GAAG,cAAc,CAAC,YAAY;QAC9B,kBAAkB,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM;KACzC,CAAC;IAEF;;OAEG;IACH,YAAY,UAAsE,EAAE;QAChF,EAAE,CAAC,OAAO,EAAE,yBAAyB,EAAE,EAAE,CAAC,MAAM,CAAC,UAAU,CAAC,gBAAgB,CAAC,YAAY,CAAC,CAAC,CAAC;QAE5F,MAAM,EAAE,aAAa,GAAG,EAAE,EAAE,QAAQ,EAAE,kBAAkB,EAAE,GAAG,qBAAqB,EAAE,GAAG,OAAO,CAAC;QAE/F,MAAM,kBAAkB,GAAG;YACvB,GAAG,OAAO,CAAC,kBAAkB;SACV,CAAC;QAExB,IAAI,aAAa,CAAC,QAAQ,EAAE,CAAC;YACzB,MAAM,IAAI,KAAK,CACX,oFAAoF;gBAChF,gDAAgD,CACvD,CAAC;QACN,CAAC;QAED,2EAA2E;QAC3E,uFAAuF;QACvF,IAAI,kBAAkB,CAAC,cAAc,EAAE,CAAC;YACpC,MAAM,IAAI,KAAK,CAAC,sFAAsF,CAAC,CAAC;QAC5G,CAAC;QAED,IAAI,QAAQ,IAAI,IAAI,EAAE,CAAC;YACnB,aAAa,CAAC,aAAa,KAAK,EAAmB,CAAC;YACpD,aAAa,CAAC,aAAa,CAAC,QAAQ,GAAG,QAAmB,CAAC;QAC/D,CAAC;QAED,MAAM,iBAAiB,GAAG,IAAI,iBAAiB,CAAC,aAAa,EAAE,OAAO,CAAC,aAAa,CAAC,CAAC;QAEtF,kBAAkB,CAAC,cAAc,GAAG,CAAC,iBAAiB,CAAC,mBAAmB,EAAE,CAAC,CAAC;QAE9E,KAAK,CAAC;YACF,GAAI,qBAOF;YACF,aAAa;YACb,kBAAkB;YAClB,kBAAkB;YAClB,sBAAsB,EAAE,GAAG,EAAE,CACzB,IAAI,CAAC,oBAAoB,EAAE,CAAC,OAAO,CAAC,EAAE,MAAM,EAAE,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;SACtF,CAAC,CAAC;IACP,CAAC;IAEO,KAAK,CAAC,cAAc,CAAC,OAAwE;QACjG,MAAM,eAAe,GAAG,KAAK,EAAE,QAAgB,EAAE,SAAS,GAAG,KAAK,EAAE,EAAE;YAClE,MAAM,OAAO,CAAC,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,EAAE,OAAO,EAAE,SAAS,EAAE,CAAC,CAAC;QACzE,CAAC,CAAC;QAEF,OAAO;YACH,UAAU,EAAE,KAAK,EAAE,QAAgB,EAAE,OAA2B,EAAE,EAAE,CAChE,cAAc,CAAC,UAAU,CAAC,OAAO,CAAC,IAAI,EAAE,QAAQ,EAAE,OAAO,CAAC;YAC9D,YAAY,EAAE,KAAK,IAAI,EAAE;gBACrB,IAAI,OAAO,CAAC,OAAO,CAAC,KAAK,KAAK,YAAY,CAAC,UAAU,EAAE,CAAC;oBACpD,OAAO,CAAC,GAAG,CAAC,OAAO,CACf,mIAAmI,CACtI,CAAC;oBACF,MAAM,cAAc,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;oBAChD,OAAO;gBACX,CAAC;gBACD,MAAM,cAAc,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,kBAAkB,EAAE,KAAK,EAAE,CAAC,CAAC;YACnF,CAAC;YACD,eAAe;YACf,gBAAgB,EAAE,KAAK,EAAE,QAAiB,EAAE,SAAS,GAAG,KAAK,EAAE,EAAE;gBAC7D,IAAI,QAAQ,EAAE,CAAC;oBACX,MAAM,eAAe,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;gBAC/C,CAAC;gBAED,OAAO,cAAc,CAAC,gBAAgB,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,iBAAiB,EAAE,IAAI,CAAC,aAAa,CAAC,CAAC;YACrG,CAAC;YACD,8BAA8B,EAAE,KAAK,EACjC,OAA6E,EAC/E,EAAE,CACA,cAAc,CAAC,8BAA8B,CAAC;gBAC1C,IAAI,EAAE,OAAO,CAAC,IAAI;gBAClB,YAAY,EAAE,IAAI,CAAC,YAAa;gBAChC,GAAG,OAAO;aACb,CAAC;YACN,aAAa,EAAE,KAAK,EAAE,OAA8B,EAAE,EAAE,CACpD,cAAc,CAAC,aAAa,CAAC,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC;YACvD,aAAa,EAAE,CAAC,YAAoB,EAAE,GAAgB,EAAE,EAAE,CAAC,cAAc,CAAC,aAAa,CAAC,YAAY,EAAE,GAAG,CAAC;YAC1G,0BAA0B,EAAE,KAAK,EAAE,OAAyB,EAAE,EAAE,CAC5D,cAAc,CAAC,0BAA0B,CAAC,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC;YACpE,6BAA6B,EAAE,KAAK,EAAE,OAAyB,EAAE,EAAE,CAC/D,cAAc,CAAC,6BAA6B,CAAC,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC;YACvE,cAAc,EAAE,KAAK,EAAE,OAA+B,EAAE,EAAE,CACtD,cAAc,CAAC,cAAc,CAAC,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC;YACxD,YAAY,EAAE,KAAK,EAAE,OAA6B,EAAE,EAAE,CAClD,cAAc,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,GAAG,OAAO,EAAE,MAAM,EAAE,cAAc,CAAC,gBAAgB,EAAE,EAAE,CAAC;YACxG,iBAAiB,EAAE,KAAK,IAAI,EAAE,CAAC,cAAc,CAAC,iBAAiB,CAAC,OAAO,CAAC,IAAI,CAAC;SAChF,CAAC;IACN,CAAC;IAEkB,KAAK,CAAC,kBAAkB,CACvC,eAAyC,EACzC,WAAoC;QAEpC,OAAO,YAAY,CAAC,eAAe,CAAC,IAAI,EAAE,eAAe,CAAC,OAAO,EAAE,WAAW,CAAC,CAAC;IACpF,CAAC;;AAGL;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,MAAM,UAAU,qBAAqB,CAGnC,MAAwC;IACtC,OAAO,MAAM,CAAC,MAAM,CAAU,MAAM,CAAC,CAAC;AAC1C,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@crawlee/puppeteer",
|
|
3
|
-
"version": "4.0.0-beta.
|
|
3
|
+
"version": "4.0.0-beta.28",
|
|
4
4
|
"description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.",
|
|
5
5
|
"engines": {
|
|
6
6
|
"node": ">=22.0.0"
|
|
@@ -49,10 +49,10 @@
|
|
|
49
49
|
"dependencies": {
|
|
50
50
|
"@apify/datastructures": "^2.0.3",
|
|
51
51
|
"@apify/log": "^2.5.18",
|
|
52
|
-
"@crawlee/browser": "4.0.0-beta.
|
|
53
|
-
"@crawlee/browser-pool": "4.0.0-beta.
|
|
54
|
-
"@crawlee/types": "4.0.0-beta.
|
|
55
|
-
"@crawlee/utils": "4.0.0-beta.
|
|
52
|
+
"@crawlee/browser": "4.0.0-beta.28",
|
|
53
|
+
"@crawlee/browser-pool": "4.0.0-beta.28",
|
|
54
|
+
"@crawlee/types": "4.0.0-beta.28",
|
|
55
|
+
"@crawlee/utils": "4.0.0-beta.28",
|
|
56
56
|
"cheerio": "^1.0.0",
|
|
57
57
|
"devtools-protocol": "*",
|
|
58
58
|
"idcac-playwright": "^0.1.3",
|
|
@@ -79,5 +79,5 @@
|
|
|
79
79
|
}
|
|
80
80
|
}
|
|
81
81
|
},
|
|
82
|
-
"gitHead": "
|
|
82
|
+
"gitHead": "34fe46a70d4d6a6ddb49b0814469dbde1e7f0a9c"
|
|
83
83
|
}
|