npm - @letsscrapedata/scraper - Versions diffs - 0.0.74 → 0.0.76 - Mend

@letsscrapedata/scraper 0.0.74 → 0.0.76

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@letsscrapedata/scraper",
-  "version": "0.0.74",
+  "version": "0.0.76",
   "description": "Web scraper that scraping web pages by LetsScrapeData XML template",
   "type": "module",
   "main": "./dist/index.cjs",

package/readme.md CHANGED Viewed

@@ -80,12 +80,12 @@ const scraperConfig: ScraperConfig = {
     { browserControllerType: "puppeteer", proxyUrl: "" },
     /* launch a chromium browser using playwright, proxy */
     { browserContollerType: "playwright", proxyUrl: "http://proxyId:port" },
-    /* connect to the current browser using patchwright */
+    /* connect to the current browser using patchright */
     { browserUrl: "http://localhost:9222/" },
   ],
   // exitWhenCompleted: true,
-  // loadUnfinishedTasks: true,
   // lsdLaunchOptions: { headless: true },
+  // loadUnfinishedTasks: true,
 };
 const newTasks: TemplateTasks[] = [{ tid: 2000008, parasstrs: ["9"] }];
@@ -98,10 +98,11 @@ await scraper(newTasks, scraperConfig);
 Common configurations:
 - Proxies and browser: browserConfigs, by default launching a browser using browserControllerType/browserType, without proxy
-- Default browser controller to use: browserControllerType, default "playwright"
-- Default browser to use: browserType, default "chromium"
-- File format of scraped data: dataFileFormat, default "tsv"
-- Where are the templates: templateDir, default "" which means to obtain the template from the network
+- Launch options of browser: lsdLaunchOptions, default {headless: false}
+- Whether to load unfinished tasks: loadUnfinishedTasks, default false
+- Whether to exist when completed: exitWhenCompleted, default false
+- File format of scraped data: dataFileFormat, default "jsonl"
+- API Key of captcha solver: captcha.clientKey
 Complete configurations:
@@ -127,11 +128,6 @@ export interface ScraperConfig {
    * if not empty, baseDir must be an absolute path, and the directory must exist and have read and write permissions.
    */
   baseDir?: string;
-  /**
-   * where are the templates saved
-   * @default "", which means to get the templates from LSD server
-   */
-  templateDir?: string;
   /**
    * filename in action_setvar_get/get_file must include inputFileDirePart for security.
    * @default "LetsScrapeData"
@@ -174,7 +170,6 @@ export interface ScraperConfig {
     clientKey: string;
   },
   ////////////////////////////////////////////////////////////////////////////    template
-  templateUrl?: string; // LSD
   /**
    * the default maximum number of concurrent tasks that can execute the same template in a browserContext
    * @default 1