@letsscrapedata/scraper 0.0.74 → 0.0.76
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +1 -1
- package/dist/index.d.cts +504 -2
- package/dist/index.d.ts +504 -2
- package/dist/index.js +1 -1
- package/package.json +1 -1
- package/readme.md +7 -12
package/package.json
CHANGED
package/readme.md
CHANGED
|
@@ -80,12 +80,12 @@ const scraperConfig: ScraperConfig = {
|
|
|
80
80
|
{ browserControllerType: "puppeteer", proxyUrl: "" },
|
|
81
81
|
/* launch a chromium browser using playwright, proxy */
|
|
82
82
|
{ browserContollerType: "playwright", proxyUrl: "http://proxyId:port" },
|
|
83
|
-
/* connect to the current browser using
|
|
83
|
+
/* connect to the current browser using patchright */
|
|
84
84
|
{ browserUrl: "http://localhost:9222/" },
|
|
85
85
|
],
|
|
86
86
|
// exitWhenCompleted: true,
|
|
87
|
-
// loadUnfinishedTasks: true,
|
|
88
87
|
// lsdLaunchOptions: { headless: true },
|
|
88
|
+
// loadUnfinishedTasks: true,
|
|
89
89
|
};
|
|
90
90
|
|
|
91
91
|
const newTasks: TemplateTasks[] = [{ tid: 2000008, parasstrs: ["9"] }];
|
|
@@ -98,10 +98,11 @@ await scraper(newTasks, scraperConfig);
|
|
|
98
98
|
Common configurations:
|
|
99
99
|
|
|
100
100
|
- Proxies and browser: browserConfigs, by default launching a browser using browserControllerType/browserType, without proxy
|
|
101
|
-
-
|
|
102
|
-
-
|
|
103
|
-
-
|
|
104
|
-
-
|
|
101
|
+
- Launch options of browser: lsdLaunchOptions, default {headless: false}
|
|
102
|
+
- Whether to load unfinished tasks: loadUnfinishedTasks, default false
|
|
103
|
+
- Whether to exist when completed: exitWhenCompleted, default false
|
|
104
|
+
- File format of scraped data: dataFileFormat, default "jsonl"
|
|
105
|
+
- API Key of captcha solver: captcha.clientKey
|
|
105
106
|
|
|
106
107
|
Complete configurations:
|
|
107
108
|
|
|
@@ -127,11 +128,6 @@ export interface ScraperConfig {
|
|
|
127
128
|
* if not empty, baseDir must be an absolute path, and the directory must exist and have read and write permissions.
|
|
128
129
|
*/
|
|
129
130
|
baseDir?: string;
|
|
130
|
-
/**
|
|
131
|
-
* where are the templates saved
|
|
132
|
-
* @default "", which means to get the templates from LSD server
|
|
133
|
-
*/
|
|
134
|
-
templateDir?: string;
|
|
135
131
|
/**
|
|
136
132
|
* filename in action_setvar_get/get_file must include inputFileDirePart for security.
|
|
137
133
|
* @default "LetsScrapeData"
|
|
@@ -174,7 +170,6 @@ export interface ScraperConfig {
|
|
|
174
170
|
clientKey: string;
|
|
175
171
|
},
|
|
176
172
|
//////////////////////////////////////////////////////////////////////////// template
|
|
177
|
-
templateUrl?: string; // LSD
|
|
178
173
|
/**
|
|
179
174
|
* the default maximum number of concurrent tasks that can execute the same template in a browserContext
|
|
180
175
|
* @default 1
|