crawlee 3.12.3-beta.8 → 3.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -0
- package/index.mjs +5 -0
- package/package.json +14 -14
- package/tsconfig.build.tsbuildinfo +1 -1
package/README.md
CHANGED
|
@@ -80,6 +80,28 @@ await crawler.run(['https://crawlee.dev']);
|
|
|
80
80
|
|
|
81
81
|
By default, Crawlee stores data to `./storage` in the current working directory. You can override this directory via Crawlee configuration. For details, see [Configuration guide](https://crawlee.dev/docs/guides/configuration), [Request storage](https://crawlee.dev/docs/guides/request-storage) and [Result storage](https://crawlee.dev/docs/guides/result-storage).
|
|
82
82
|
|
|
83
|
+
### Installing pre-release versions
|
|
84
|
+
|
|
85
|
+
We provide automated beta builds for every merged code change in Crawlee. You can find them in the npm [list of releases](https://www.npmjs.com/package/crawlee?activeTab=versions). If you want to test new features or bug fixes before we release them, feel free to install a beta build like this:
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
npm install crawlee@3.12.3-beta.13
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
If you also use the [Apify SDK](https://github.com/apify/apify-sdk-js), you need to specify dependency overrides in your `package.json` file so that you don't end up with multiple versions of Crawlee installed:
|
|
92
|
+
|
|
93
|
+
```json
|
|
94
|
+
{
|
|
95
|
+
"overrides": {
|
|
96
|
+
"apify": {
|
|
97
|
+
"@crawlee/core": "3.12.3-beta.13",
|
|
98
|
+
"@crawlee/types": "3.12.3-beta.13",
|
|
99
|
+
"@crawlee/utils": "3.12.3-beta.13"
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
```
|
|
104
|
+
|
|
83
105
|
## 🛠 Features
|
|
84
106
|
|
|
85
107
|
- Single interface for **HTTP and headless browser** crawling
|
package/index.mjs
CHANGED
|
@@ -126,14 +126,19 @@ export const extractUrls = mod.extractUrls;
|
|
|
126
126
|
export const extractUrlsFromCheerio = mod.extractUrlsFromCheerio;
|
|
127
127
|
export const extractUrlsFromPage = mod.extractUrlsFromPage;
|
|
128
128
|
export const filterRequestsByPatterns = mod.filterRequestsByPatterns;
|
|
129
|
+
export const getCgroupsVersion = mod.getCgroupsVersion;
|
|
129
130
|
export const getCookiesFromResponse = mod.getCookiesFromResponse;
|
|
131
|
+
export const getCurrentCpuTicksV2 = mod.getCurrentCpuTicksV2;
|
|
130
132
|
export const getDefaultCookieExpirationDate = mod.getDefaultCookieExpirationDate;
|
|
131
133
|
export const getMemoryInfo = mod.getMemoryInfo;
|
|
134
|
+
export const getMemoryInfoV2 = mod.getMemoryInfoV2;
|
|
132
135
|
export const getRequestId = mod.getRequestId;
|
|
133
136
|
export const gotScraping = mod.gotScraping;
|
|
134
137
|
export const handleRequestTimeout = mod.handleRequestTimeout;
|
|
135
138
|
export const htmlToText = mod.htmlToText;
|
|
139
|
+
export const isContainerized = mod.isContainerized;
|
|
136
140
|
export const isDocker = mod.isDocker;
|
|
141
|
+
export const isLambda = mod.isLambda;
|
|
137
142
|
export const keys = mod.keys;
|
|
138
143
|
export const launchPlaywright = mod.launchPlaywright;
|
|
139
144
|
export const launchPuppeteer = mod.launchPuppeteer;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "crawlee",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.13.0",
|
|
4
4
|
"description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.",
|
|
5
5
|
"engines": {
|
|
6
6
|
"node": ">=16.0.0"
|
|
@@ -54,18 +54,18 @@
|
|
|
54
54
|
"access": "public"
|
|
55
55
|
},
|
|
56
56
|
"dependencies": {
|
|
57
|
-
"@crawlee/basic": "3.
|
|
58
|
-
"@crawlee/browser": "3.
|
|
59
|
-
"@crawlee/browser-pool": "3.
|
|
60
|
-
"@crawlee/cheerio": "3.
|
|
61
|
-
"@crawlee/cli": "3.
|
|
62
|
-
"@crawlee/core": "3.
|
|
63
|
-
"@crawlee/http": "3.
|
|
64
|
-
"@crawlee/jsdom": "3.
|
|
65
|
-
"@crawlee/linkedom": "3.
|
|
66
|
-
"@crawlee/playwright": "3.
|
|
67
|
-
"@crawlee/puppeteer": "3.
|
|
68
|
-
"@crawlee/utils": "3.
|
|
57
|
+
"@crawlee/basic": "3.13.0",
|
|
58
|
+
"@crawlee/browser": "3.13.0",
|
|
59
|
+
"@crawlee/browser-pool": "3.13.0",
|
|
60
|
+
"@crawlee/cheerio": "3.13.0",
|
|
61
|
+
"@crawlee/cli": "3.13.0",
|
|
62
|
+
"@crawlee/core": "3.13.0",
|
|
63
|
+
"@crawlee/http": "3.13.0",
|
|
64
|
+
"@crawlee/jsdom": "3.13.0",
|
|
65
|
+
"@crawlee/linkedom": "3.13.0",
|
|
66
|
+
"@crawlee/playwright": "3.13.0",
|
|
67
|
+
"@crawlee/puppeteer": "3.13.0",
|
|
68
|
+
"@crawlee/utils": "3.13.0",
|
|
69
69
|
"import-local": "^3.1.0",
|
|
70
70
|
"tslib": "^2.4.0"
|
|
71
71
|
},
|
|
@@ -88,5 +88,5 @@
|
|
|
88
88
|
}
|
|
89
89
|
}
|
|
90
90
|
},
|
|
91
|
-
"gitHead": "
|
|
91
|
+
"gitHead": "6d5b13ae318909a66001cfc4daa1425ca88b3bb3"
|
|
92
92
|
}
|