@crawlee/impit-client 3.12.3-beta.2 → 3.12.3-beta.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -0
- package/index.d.ts +2 -1
- package/index.d.ts.map +1 -1
- package/index.js +41 -21
- package/index.js.map +1 -1
- package/package.json +4 -4
- package/tsconfig.build.tsbuildinfo +1 -1
package/README.md
CHANGED
|
@@ -80,6 +80,28 @@ await crawler.run(['https://crawlee.dev']);
|
|
|
80
80
|
|
|
81
81
|
By default, Crawlee stores data to `./storage` in the current working directory. You can override this directory via Crawlee configuration. For details, see [Configuration guide](https://crawlee.dev/docs/guides/configuration), [Request storage](https://crawlee.dev/docs/guides/request-storage) and [Result storage](https://crawlee.dev/docs/guides/result-storage).
|
|
82
82
|
|
|
83
|
+
### Installing pre-release versions
|
|
84
|
+
|
|
85
|
+
We provide automated beta builds for every merged code change in Crawlee. You can find them in the npm [list of releases](https://www.npmjs.com/package/crawlee?activeTab=versions). If you want to test new features or bug fixes before we release them, feel free to install a beta build like this:
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
npm install crawlee@3.12.3-beta.13
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
If you also use the [Apify SDK](https://github.com/apify/apify-sdk-js), you need to specify dependency overrides in your `package.json` file so that you don't end up with multiple versions of Crawlee installed:
|
|
92
|
+
|
|
93
|
+
```json
|
|
94
|
+
{
|
|
95
|
+
"overrides": {
|
|
96
|
+
"apify": {
|
|
97
|
+
"@crawlee/core": "3.12.3-beta.13",
|
|
98
|
+
"@crawlee/types": "3.12.3-beta.13",
|
|
99
|
+
"@crawlee/utils": "3.12.3-beta.13"
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
```
|
|
104
|
+
|
|
83
105
|
## 🛠 Features
|
|
84
106
|
|
|
85
107
|
- Single interface for **HTTP and headless browser** crawling
|
package/index.d.ts
CHANGED
|
@@ -26,11 +26,12 @@ export declare class ImpitHttpClient implements BaseHttpClient {
|
|
|
26
26
|
* @param request `HttpRequest` object
|
|
27
27
|
* @returns `HttpResponse` object
|
|
28
28
|
*/
|
|
29
|
-
private
|
|
29
|
+
private getResponse;
|
|
30
30
|
/**
|
|
31
31
|
* @inheritDoc
|
|
32
32
|
*/
|
|
33
33
|
sendRequest<TResponseType extends keyof ResponseTypes>(request: HttpRequest<TResponseType>): Promise<HttpResponse<TResponseType>>;
|
|
34
|
+
private getStreamWithProgress;
|
|
34
35
|
/**
|
|
35
36
|
* @inheritDoc
|
|
36
37
|
*/
|
package/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,WAAW,EAAE,YAAY,EAAE,aAAa,EAAE,qBAAqB,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AACrH,OAAO,EAAE,KAAK,YAAY,EAA8C,MAAM,OAAO,CAAC;AAEtF,OAAO,EAAE,OAAO,EAAE,MAAM,OAAO,CAAC;AAOhC;;GAEG;AACH,qBAAa,eAAgB,YAAW,cAAc;IAClD,OAAO,CAAC,YAAY,CAAe;IACnC,OAAO,CAAC,YAAY,CAAS;IAC7B,OAAO,CAAC,eAAe,CAAU;gBAErB,OAAO,CAAC,EAAE,IAAI,CAAC,YAAY,EAAE,UAAU,CAAC,GAAG;QAAE,YAAY,CAAC,EAAE,MAAM,CAAA;KAAE;IAOhF;;OAEG;YACW,aAAa;IAuB3B;;;;OAIG;IACH,OAAO,CAAC,cAAc;IAqBtB;;;;OAIG;YACW,WAAW;IAoDzB;;OAEG;IACG,WAAW,CAAC,aAAa,SAAS,MAAM,aAAa,EACvD,OAAO,EAAE,WAAW,CAAC,aAAa,CAAC,GACpC,OAAO,CAAC,YAAY,CAAC,aAAa,CAAC,CAAC;IA+BvC,OAAO,CAAC,qBAAqB;IAqB7B;;OAEG;IACG,MAAM,CAAC,OAAO,EAAE,WAAW,GAAG,OAAO,CAAC,qBAAqB,CAAC;CAmBrE"}
|
package/index.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.ImpitHttpClient = exports.Browser = void 0;
|
|
4
|
+
const web_1 = require("node:stream/web");
|
|
4
5
|
const stream_1 = require("stream");
|
|
5
6
|
const types_1 = require("util/types");
|
|
6
7
|
const impit_1 = require("impit");
|
|
@@ -40,7 +41,7 @@ class ImpitHttpClient {
|
|
|
40
41
|
if (typeof body === 'string' || (0, types_1.isTypedArray)(body)) {
|
|
41
42
|
return body;
|
|
42
43
|
}
|
|
43
|
-
if (body instanceof ReadableStream) {
|
|
44
|
+
if (body instanceof web_1.ReadableStream) {
|
|
44
45
|
const reader = body.getReader();
|
|
45
46
|
const buffer = new Uint8Array();
|
|
46
47
|
while (true) {
|
|
@@ -76,7 +77,7 @@ class ImpitHttpClient {
|
|
|
76
77
|
* @param request `HttpRequest` object
|
|
77
78
|
* @returns `HttpResponse` object
|
|
78
79
|
*/
|
|
79
|
-
async
|
|
80
|
+
async getResponse(request, redirects) {
|
|
80
81
|
if ((redirects?.redirectCount ?? 0) > this.maxRedirects) {
|
|
81
82
|
throw new Error(`Too many redirects, maximum is ${this.maxRedirects}.`);
|
|
82
83
|
}
|
|
@@ -98,7 +99,7 @@ class ImpitHttpClient {
|
|
|
98
99
|
if (!location) {
|
|
99
100
|
throw new Error('Redirect response missing location header.');
|
|
100
101
|
}
|
|
101
|
-
return this.
|
|
102
|
+
return this.getResponse({
|
|
102
103
|
...request,
|
|
103
104
|
url: location,
|
|
104
105
|
}, {
|
|
@@ -106,16 +107,26 @@ class ImpitHttpClient {
|
|
|
106
107
|
redirectUrls: [...(redirects?.redirectUrls ?? []), new URL(location)],
|
|
107
108
|
});
|
|
108
109
|
}
|
|
110
|
+
return {
|
|
111
|
+
response,
|
|
112
|
+
redirectUrls: redirects?.redirectUrls ?? [],
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* @inheritDoc
|
|
117
|
+
*/
|
|
118
|
+
async sendRequest(request) {
|
|
119
|
+
const { response, redirectUrls } = await this.getResponse(request);
|
|
109
120
|
let responseBody;
|
|
110
121
|
switch (request.responseType) {
|
|
111
122
|
case 'text':
|
|
112
|
-
responseBody = response.text();
|
|
123
|
+
responseBody = await response.text();
|
|
113
124
|
break;
|
|
114
125
|
case 'json':
|
|
115
|
-
responseBody = response.json();
|
|
126
|
+
responseBody = await response.json();
|
|
116
127
|
break;
|
|
117
128
|
case 'buffer':
|
|
118
|
-
responseBody = response.bytes();
|
|
129
|
+
responseBody = await response.bytes();
|
|
119
130
|
break;
|
|
120
131
|
default:
|
|
121
132
|
throw new Error('Unsupported response type.');
|
|
@@ -123,38 +134,47 @@ class ImpitHttpClient {
|
|
|
123
134
|
return {
|
|
124
135
|
headers: response.headers,
|
|
125
136
|
statusCode: response.status,
|
|
126
|
-
url,
|
|
137
|
+
url: response.url,
|
|
127
138
|
request,
|
|
128
|
-
redirectUrls
|
|
139
|
+
redirectUrls,
|
|
129
140
|
trailers: {},
|
|
130
141
|
body: responseBody,
|
|
131
142
|
complete: true,
|
|
132
143
|
};
|
|
133
144
|
}
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
145
|
+
getStreamWithProgress(response) {
|
|
146
|
+
const responseStream = stream_1.Readable.fromWeb(response.body);
|
|
147
|
+
let transferred = 0;
|
|
148
|
+
const total = Number(response.headers['content-length'] ?? 0);
|
|
149
|
+
responseStream.on('data', (chunk) => {
|
|
150
|
+
transferred += chunk.length;
|
|
151
|
+
});
|
|
152
|
+
const getDownloadProgress = () => {
|
|
153
|
+
return {
|
|
154
|
+
percent: Math.round((transferred / total) * 100),
|
|
155
|
+
transferred,
|
|
156
|
+
total,
|
|
157
|
+
};
|
|
158
|
+
};
|
|
159
|
+
return [responseStream, getDownloadProgress];
|
|
139
160
|
}
|
|
140
161
|
/**
|
|
141
162
|
* @inheritDoc
|
|
142
163
|
*/
|
|
143
164
|
async stream(request) {
|
|
144
|
-
const response = await this.
|
|
145
|
-
const stream =
|
|
146
|
-
stream.push(response.body);
|
|
147
|
-
stream.push(null);
|
|
165
|
+
const { response, redirectUrls } = await this.getResponse(request);
|
|
166
|
+
const [stream, getDownloadProgress] = this.getStreamWithProgress(response);
|
|
148
167
|
return {
|
|
149
168
|
request,
|
|
150
169
|
url: response.url,
|
|
151
|
-
|
|
152
|
-
statusCode: response.statusCode,
|
|
170
|
+
statusCode: response.status,
|
|
153
171
|
stream,
|
|
154
172
|
complete: true,
|
|
155
|
-
downloadProgress
|
|
173
|
+
get downloadProgress() {
|
|
174
|
+
return getDownloadProgress();
|
|
175
|
+
},
|
|
156
176
|
uploadProgress: { percent: 100, transferred: 0 },
|
|
157
|
-
redirectUrls
|
|
177
|
+
redirectUrls,
|
|
158
178
|
headers: response.headers,
|
|
159
179
|
trailers: {},
|
|
160
180
|
};
|
package/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;AAAA,mCAAkC;AAClC,sCAA0C;AAG1C,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;AAAA,yCAAiD;AACjD,mCAAkC;AAClC,sCAA0C;AAG1C,iCAAsF;AAEtF,+BAAgC;AAAvB,gGAAA,OAAO,OAAA;AAOhB;;GAEG;AACH,MAAa,eAAe;IAKxB,YAAY,OAAoE;QAJxE;;;;;WAA2B;QAC3B;;;;;WAAqB;QACrB;;;;;WAAyB;QAG7B,IAAI,CAAC,YAAY,GAAG,OAAO,IAAI,EAAE,CAAC;QAElC,IAAI,CAAC,YAAY,GAAG,OAAO,EAAE,YAAY,IAAI,EAAE,CAAC;QAChD,IAAI,CAAC,eAAe,GAAG,OAAO,EAAE,eAAe,IAAI,IAAI,CAAC;IAC5D,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,aAAa,CACvB,IAA4D;QAE5D,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAA,oBAAY,EAAC,IAAI,CAAC,EAAE,CAAC;YACjD,OAAO,IAAI,CAAC;QAChB,CAAC;QAED,IAAI,IAAI,YAAY,oBAAc,EAAE,CAAC;YACjC,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC;YAChC,MAAM,MAAM,GAAG,IAAI,UAAU,EAAE,CAAC;YAEhC,OAAO,IAAI,EAAE,CAAC;gBACV,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;gBAE5C,IAAI,IAAI;oBAAE,OAAO,MAAM,CAAC;gBAExB,MAAM,CAAC,GAAG,CAAC,KAAK,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;YACrC,CAAC;QACL,CAAC;QAED,MAAM,IAAI,KAAK,CAAC,wBAAwB,CAAC,CAAC;IAC9C,CAAC;IAED;;;;OAIG;IACK,cAAc,CAClB,OAAkE;QAElE,MAAM,MAAM,GAA2B,EAAE,CAAC;QAE1C,KAAK,MAAM,UAAU,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;YAC5C,MAAM,WAAW,GAAG,OAAO,CAAC,UAAU,CAAC,CAAC;YAExC,IAAI,WAAW,KAAK,SAAS;gBAAE,SAAS;YAExC,IAAI,KAAK,CAAC,OAAO,CAAC,WAAW,CAAC,EAAE,CAAC;gBAC7B,MAAM,CAAC,UAAU,CAAC,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;gBACpC,SAAS;YACb,CAAC;YAED,MAAM,CAAC,UAAU,CAAC,GAAG,WAAW,CAAC;QACrC,CAAC;QAED,OAAO,MAAM,CAAC;IAClB,CAAC;IAED;;;;OAIG;IACK,KAAK,CAAC,WAAW,CACrB,OAAmC,EACnC,SAGC;QAED,IAAI,CAAC,SAAS,EAAE,aAAa,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,YAAY,EAAE,CAAC;YACtD,MAAM,IAAI,KAAK,CAAC,kCAAkC,IAAI,CAAC,YAAY,GAAG,CAAC,CAAC;QAC5E,CAAC;QAED,MAAM,GAAG,GAAG,OAAO,OAAO,CAAC,GAAG,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC;QAC7E,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;QACjG,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,KAAK,SAAS,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;QAE7F,MAAM,KAAK,GAAG,IAAI,aAAK,CAAC;YACpB,GAAG,IAAI,CAAC,YAAY;YACpB,QAAQ,EAAE,OAAO,CAAC,QAAQ;YAC1B,eAAe,EAAE,KAAK;SACzB,CAAC,CAAC;QAEH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,KAAK,CAAC,GAAG,EAAE;YACpC,MAAM,EAAE,OAAO,CAAC,MAAoB;YACpC,OAAO;YACP,IAAI,EAAE,IAAc;SACvB,CAAC,CAAC;QAEH,IAAI,IAAI,CAAC,eAAe,IAAI,QAAQ,CAAC,MAAM,IAAI,GAAG,IAAI,QAAQ,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YAC1E,MAAM,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,QAAQ,CAAC;YAE3C,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACZ,MAAM,IAAI,KAAK,CAAC,4CAA4C,CAAC,CAAC;YAClE,CAAC;YAED,OAAO,IAAI,CAAC,WAAW,CACnB;gBACI,GAAG,OAAO;gBACV,GAAG,EAAE,QAAQ;aAChB,EACD;gBACI,aAAa,EAAE,CAAC,SAAS,EAAE,aAAa,IAAI,CAAC,CAAC,GAAG,CAAC;gBAClD,YAAY,EAAE,CAAC,GAAG,CAAC,SAAS,EAAE,YAAY,IAAI,EAAE,CAAC,EAAE,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC;aACxE,CACJ,CAAC;QACN,CAAC;QAED,OAAO;YACH,QAAQ;YACR,YAAY,EAAE,SAAS,EAAE,YAAY,IAAI,EAAE;SAC9C,CAAC;IACN,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,WAAW,CACb,OAAmC;QAEnC,MAAM,EAAE,QAAQ,EAAE,YAAY,EAAE,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;QAEnE,IAAI,YAAY,CAAC;QAEjB,QAAQ,OAAO,CAAC,YAAY,EAAE,CAAC;YAC3B,KAAK,MAAM;gBACP,YAAY,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;gBACrC,MAAM;YACV,KAAK,MAAM;gBACP,YAAY,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;gBACrC,MAAM;YACV,KAAK,QAAQ;gBACT,YAAY,GAAG,MAAM,QAAQ,CAAC,KAAK,EAAE,CAAC;gBACtC,MAAM;YACV;gBACI,MAAM,IAAI,KAAK,CAAC,4BAA4B,CAAC,CAAC;QACtD,CAAC;QAED,OAAO;YACH,OAAO,EAAE,QAAQ,CAAC,OAAO;YACzB,UAAU,EAAE,QAAQ,CAAC,MAAM;YAC3B,GAAG,EAAE,QAAQ,CAAC,GAAG;YACjB,OAAO;YACP,YAAY;YACZ,QAAQ,EAAE,EAAE;YACZ,IAAI,EAAE,YAAY;YAClB,QAAQ,EAAE,IAAI;SACjB,CAAC;IACN,CAAC;IAEO,qBAAqB,CACzB,QAAuB;QAEvB,MAAM,cAAc,GAAG,iBAAQ,CAAC,OAAO,CAAC,QAAQ,CAAC,IAA2B,CAAC,CAAC;QAC9E,IAAI,WAAW,GAAG,CAAC,CAAC;QACpB,MAAM,KAAK,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC;QAC9D,cAAc,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,KAAK,EAAE,EAAE;YAChC,WAAW,IAAI,KAAK,CAAC,MAAM,CAAC;QAChC,CAAC,CAAC,CAAC;QAEH,MAAM,mBAAmB,GAAG,GAAG,EAAE;YAC7B,OAAO;gBACH,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,WAAW,GAAG,KAAK,CAAC,GAAG,GAAG,CAAC;gBAChD,WAAW;gBACX,KAAK;aACR,CAAC;QACN,CAAC,CAAC;QAEF,OAAO,CAAC,cAAc,EAAE,mBAAmB,CAAC,CAAC;IACjD,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,MAAM,CAAC,OAAoB;QAC7B,MAAM,EAAE,QAAQ,EAAE,YAAY,EAAE,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;QACnE,MAAM,CAAC,MAAM,EAAE,mBAAmB,CAAC,GAAG,IAAI,CAAC,qBAAqB,CAAC,QAAQ,CAAC,CAAC;QAE3E,OAAO;YACH,OAAO;YACP,GAAG,EAAE,QAAQ,CAAC,GAAG;YACjB,UAAU,EAAE,QAAQ,CAAC,MAAM;YAC3B,MAAM;YACN,QAAQ,EAAE,IAAI;YACd,IAAI,gBAAgB;gBAChB,OAAO,mBAAmB,EAAE,CAAC;YACjC,CAAC;YACD,cAAc,EAAE,EAAE,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,CAAC,EAAE;YAChD,YAAY;YACZ,OAAO,EAAE,QAAQ,CAAC,OAAO;YACzB,QAAQ,EAAE,EAAE;SACf,CAAC;IACN,CAAC;CACJ;AAxMD,0CAwMC"}
|
package/package.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@crawlee/impit-client",
|
|
3
|
-
"version": "3.12.3-beta.
|
|
3
|
+
"version": "3.12.3-beta.20",
|
|
4
4
|
"description": "impit-based HTTP client implementation for Crawlee. Impersonates browser requests to avoid bot detection.",
|
|
5
5
|
"engines": {
|
|
6
|
-
"node": ">=
|
|
6
|
+
"node": ">=20.0.0"
|
|
7
7
|
},
|
|
8
8
|
"main": "./index.js",
|
|
9
9
|
"module": "./index.mjs",
|
|
@@ -59,7 +59,7 @@
|
|
|
59
59
|
"@crawlee/core": "^3.12.2"
|
|
60
60
|
},
|
|
61
61
|
"dependencies": {
|
|
62
|
-
"impit": "^0.1
|
|
62
|
+
"impit": "^0.2.1"
|
|
63
63
|
},
|
|
64
64
|
"packageManager": "yarn@4.6.0",
|
|
65
65
|
"lerna": {
|
|
@@ -69,5 +69,5 @@
|
|
|
69
69
|
}
|
|
70
70
|
}
|
|
71
71
|
},
|
|
72
|
-
"gitHead": "
|
|
72
|
+
"gitHead": "bcdea195ded2a44097d4fc789f2cdd58f021958a"
|
|
73
73
|
}
|