@letsscrapedata/scraper 0.0.74 → 0.0.76

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@letsscrapedata/scraper",
3
- "version": "0.0.74",
3
+ "version": "0.0.76",
4
4
  "description": "Web scraper that scraping web pages by LetsScrapeData XML template",
5
5
  "type": "module",
6
6
  "main": "./dist/index.cjs",
package/readme.md CHANGED
@@ -80,12 +80,12 @@ const scraperConfig: ScraperConfig = {
80
80
  { browserControllerType: "puppeteer", proxyUrl: "" },
81
81
  /* launch a chromium browser using playwright, proxy */
82
82
  { browserContollerType: "playwright", proxyUrl: "http://proxyId:port" },
83
- /* connect to the current browser using patchwright */
83
+ /* connect to the current browser using patchright */
84
84
  { browserUrl: "http://localhost:9222/" },
85
85
  ],
86
86
  // exitWhenCompleted: true,
87
- // loadUnfinishedTasks: true,
88
87
  // lsdLaunchOptions: { headless: true },
88
+ // loadUnfinishedTasks: true,
89
89
  };
90
90
 
91
91
  const newTasks: TemplateTasks[] = [{ tid: 2000008, parasstrs: ["9"] }];
@@ -98,10 +98,11 @@ await scraper(newTasks, scraperConfig);
98
98
  Common configurations:
99
99
 
100
100
  - Proxies and browser: browserConfigs, by default launching a browser using browserControllerType/browserType, without proxy
101
- - Default browser controller to use: browserControllerType, default "playwright"
102
- - Default browser to use: browserType, default "chromium"
103
- - File format of scraped data: dataFileFormat, default "tsv"
104
- - Where are the templates: templateDir, default "" which means to obtain the template from the network
101
+ - Launch options of browser: lsdLaunchOptions, default {headless: false}
102
+ - Whether to load unfinished tasks: loadUnfinishedTasks, default false
103
+ - Whether to exist when completed: exitWhenCompleted, default false
104
+ - File format of scraped data: dataFileFormat, default "jsonl"
105
+ - API Key of captcha solver: captcha.clientKey
105
106
 
106
107
  Complete configurations:
107
108
 
@@ -127,11 +128,6 @@ export interface ScraperConfig {
127
128
  * if not empty, baseDir must be an absolute path, and the directory must exist and have read and write permissions.
128
129
  */
129
130
  baseDir?: string;
130
- /**
131
- * where are the templates saved
132
- * @default "", which means to get the templates from LSD server
133
- */
134
- templateDir?: string;
135
131
  /**
136
132
  * filename in action_setvar_get/get_file must include inputFileDirePart for security.
137
133
  * @default "LetsScrapeData"
@@ -174,7 +170,6 @@ export interface ScraperConfig {
174
170
  clientKey: string;
175
171
  },
176
172
  //////////////////////////////////////////////////////////////////////////// template
177
- templateUrl?: string; // LSD
178
173
  /**
179
174
  * the default maximum number of concurrent tasks that can execute the same template in a browserContext
180
175
  * @default 1