@crawlee/impit-client 3.12.3-beta.9 → 3.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -0
- package/index.d.ts +2 -1
- package/index.d.ts.map +1 -1
- package/index.js +42 -21
- package/index.js.map +1 -1
- package/package.json +6 -6
- package/tsconfig.build.tsbuildinfo +1 -1
package/README.md
CHANGED
|
@@ -80,6 +80,28 @@ await crawler.run(['https://crawlee.dev']);
|
|
|
80
80
|
|
|
81
81
|
By default, Crawlee stores data to `./storage` in the current working directory. You can override this directory via Crawlee configuration. For details, see [Configuration guide](https://crawlee.dev/docs/guides/configuration), [Request storage](https://crawlee.dev/docs/guides/request-storage) and [Result storage](https://crawlee.dev/docs/guides/result-storage).
|
|
82
82
|
|
|
83
|
+
### Installing pre-release versions
|
|
84
|
+
|
|
85
|
+
We provide automated beta builds for every merged code change in Crawlee. You can find them in the npm [list of releases](https://www.npmjs.com/package/crawlee?activeTab=versions). If you want to test new features or bug fixes before we release them, feel free to install a beta build like this:
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
npm install crawlee@3.12.3-beta.13
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
If you also use the [Apify SDK](https://github.com/apify/apify-sdk-js), you need to specify dependency overrides in your `package.json` file so that you don't end up with multiple versions of Crawlee installed:
|
|
92
|
+
|
|
93
|
+
```json
|
|
94
|
+
{
|
|
95
|
+
"overrides": {
|
|
96
|
+
"apify": {
|
|
97
|
+
"@crawlee/core": "3.12.3-beta.13",
|
|
98
|
+
"@crawlee/types": "3.12.3-beta.13",
|
|
99
|
+
"@crawlee/utils": "3.12.3-beta.13"
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
```
|
|
104
|
+
|
|
83
105
|
## 🛠 Features
|
|
84
106
|
|
|
85
107
|
- Single interface for **HTTP and headless browser** crawling
|
package/index.d.ts
CHANGED
|
@@ -26,11 +26,12 @@ export declare class ImpitHttpClient implements BaseHttpClient {
|
|
|
26
26
|
* @param request `HttpRequest` object
|
|
27
27
|
* @returns `HttpResponse` object
|
|
28
28
|
*/
|
|
29
|
-
private
|
|
29
|
+
private getResponse;
|
|
30
30
|
/**
|
|
31
31
|
* @inheritDoc
|
|
32
32
|
*/
|
|
33
33
|
sendRequest<TResponseType extends keyof ResponseTypes>(request: HttpRequest<TResponseType>): Promise<HttpResponse<TResponseType>>;
|
|
34
|
+
private getStreamWithProgress;
|
|
34
35
|
/**
|
|
35
36
|
* @inheritDoc
|
|
36
37
|
*/
|
package/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,WAAW,EAAE,YAAY,EAAE,aAAa,EAAE,qBAAqB,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AACrH,OAAO,EAAE,KAAK,YAAY,EAA8C,MAAM,OAAO,CAAC;AAEtF,OAAO,EAAE,OAAO,EAAE,MAAM,OAAO,CAAC;AAOhC;;GAEG;AACH,qBAAa,eAAgB,YAAW,cAAc;IAClD,OAAO,CAAC,YAAY,CAAe;IACnC,OAAO,CAAC,YAAY,CAAS;IAC7B,OAAO,CAAC,eAAe,CAAU;gBAErB,OAAO,CAAC,EAAE,IAAI,CAAC,YAAY,EAAE,UAAU,CAAC,GAAG;QAAE,YAAY,CAAC,EAAE,MAAM,CAAA;KAAE;IAOhF;;OAEG;YACW,aAAa;IAwB3B;;;;OAIG;IACH,OAAO,CAAC,cAAc;IAqBtB;;;;OAIG;YACW,WAAW;IAoDzB;;OAEG;IACG,WAAW,CAAC,aAAa,SAAS,MAAM,aAAa,EACvD,OAAO,EAAE,WAAW,CAAC,aAAa,CAAC,GACpC,OAAO,CAAC,YAAY,CAAC,aAAa,CAAC,CAAC;IA+BvC,OAAO,CAAC,qBAAqB;IAqB7B;;OAEG;IACG,MAAM,CAAC,OAAO,EAAE,WAAW,GAAG,OAAO,CAAC,qBAAqB,CAAC;CAmBrE"}
|
package/index.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.ImpitHttpClient = exports.Browser = void 0;
|
|
4
|
+
const web_1 = require("node:stream/web");
|
|
4
5
|
const stream_1 = require("stream");
|
|
5
6
|
const types_1 = require("util/types");
|
|
6
7
|
const impit_1 = require("impit");
|
|
@@ -40,9 +41,10 @@ class ImpitHttpClient {
|
|
|
40
41
|
if (typeof body === 'string' || (0, types_1.isTypedArray)(body)) {
|
|
41
42
|
return body;
|
|
42
43
|
}
|
|
43
|
-
if (body instanceof ReadableStream) {
|
|
44
|
+
if (body instanceof web_1.ReadableStream) {
|
|
44
45
|
const reader = body.getReader();
|
|
45
46
|
const buffer = new Uint8Array();
|
|
47
|
+
// eslint-disable-next-line no-constant-condition
|
|
46
48
|
while (true) {
|
|
47
49
|
const { done, value } = await reader.read();
|
|
48
50
|
if (done)
|
|
@@ -76,7 +78,7 @@ class ImpitHttpClient {
|
|
|
76
78
|
* @param request `HttpRequest` object
|
|
77
79
|
* @returns `HttpResponse` object
|
|
78
80
|
*/
|
|
79
|
-
async
|
|
81
|
+
async getResponse(request, redirects) {
|
|
80
82
|
if ((redirects?.redirectCount ?? 0) > this.maxRedirects) {
|
|
81
83
|
throw new Error(`Too many redirects, maximum is ${this.maxRedirects}.`);
|
|
82
84
|
}
|
|
@@ -98,7 +100,7 @@ class ImpitHttpClient {
|
|
|
98
100
|
if (!location) {
|
|
99
101
|
throw new Error('Redirect response missing location header.');
|
|
100
102
|
}
|
|
101
|
-
return this.
|
|
103
|
+
return this.getResponse({
|
|
102
104
|
...request,
|
|
103
105
|
url: location,
|
|
104
106
|
}, {
|
|
@@ -106,16 +108,26 @@ class ImpitHttpClient {
|
|
|
106
108
|
redirectUrls: [...(redirects?.redirectUrls ?? []), new URL(location)],
|
|
107
109
|
});
|
|
108
110
|
}
|
|
111
|
+
return {
|
|
112
|
+
response,
|
|
113
|
+
redirectUrls: redirects?.redirectUrls ?? [],
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* @inheritDoc
|
|
118
|
+
*/
|
|
119
|
+
async sendRequest(request) {
|
|
120
|
+
const { response, redirectUrls } = await this.getResponse(request);
|
|
109
121
|
let responseBody;
|
|
110
122
|
switch (request.responseType) {
|
|
111
123
|
case 'text':
|
|
112
|
-
responseBody = response.text();
|
|
124
|
+
responseBody = await response.text();
|
|
113
125
|
break;
|
|
114
126
|
case 'json':
|
|
115
|
-
responseBody = response.json();
|
|
127
|
+
responseBody = await response.json();
|
|
116
128
|
break;
|
|
117
129
|
case 'buffer':
|
|
118
|
-
responseBody = response.bytes();
|
|
130
|
+
responseBody = await response.bytes();
|
|
119
131
|
break;
|
|
120
132
|
default:
|
|
121
133
|
throw new Error('Unsupported response type.');
|
|
@@ -123,38 +135,47 @@ class ImpitHttpClient {
|
|
|
123
135
|
return {
|
|
124
136
|
headers: response.headers,
|
|
125
137
|
statusCode: response.status,
|
|
126
|
-
url,
|
|
138
|
+
url: response.url,
|
|
127
139
|
request,
|
|
128
|
-
redirectUrls
|
|
140
|
+
redirectUrls,
|
|
129
141
|
trailers: {},
|
|
130
142
|
body: responseBody,
|
|
131
143
|
complete: true,
|
|
132
144
|
};
|
|
133
145
|
}
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
146
|
+
getStreamWithProgress(response) {
|
|
147
|
+
const responseStream = stream_1.Readable.fromWeb(response.body);
|
|
148
|
+
let transferred = 0;
|
|
149
|
+
const total = Number(response.headers['content-length'] ?? 0);
|
|
150
|
+
responseStream.on('data', (chunk) => {
|
|
151
|
+
transferred += chunk.length;
|
|
152
|
+
});
|
|
153
|
+
const getDownloadProgress = () => {
|
|
154
|
+
return {
|
|
155
|
+
percent: Math.round((transferred / total) * 100),
|
|
156
|
+
transferred,
|
|
157
|
+
total,
|
|
158
|
+
};
|
|
159
|
+
};
|
|
160
|
+
return [responseStream, getDownloadProgress];
|
|
139
161
|
}
|
|
140
162
|
/**
|
|
141
163
|
* @inheritDoc
|
|
142
164
|
*/
|
|
143
165
|
async stream(request) {
|
|
144
|
-
const response = await this.
|
|
145
|
-
const stream =
|
|
146
|
-
stream.push(response.body);
|
|
147
|
-
stream.push(null);
|
|
166
|
+
const { response, redirectUrls } = await this.getResponse(request);
|
|
167
|
+
const [stream, getDownloadProgress] = this.getStreamWithProgress(response);
|
|
148
168
|
return {
|
|
149
169
|
request,
|
|
150
170
|
url: response.url,
|
|
151
|
-
|
|
152
|
-
statusCode: response.statusCode,
|
|
171
|
+
statusCode: response.status,
|
|
153
172
|
stream,
|
|
154
173
|
complete: true,
|
|
155
|
-
downloadProgress
|
|
174
|
+
get downloadProgress() {
|
|
175
|
+
return getDownloadProgress();
|
|
176
|
+
},
|
|
156
177
|
uploadProgress: { percent: 100, transferred: 0 },
|
|
157
|
-
redirectUrls
|
|
178
|
+
redirectUrls,
|
|
158
179
|
headers: response.headers,
|
|
159
180
|
trailers: {},
|
|
160
181
|
};
|
package/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;AAAA,mCAAkC;AAClC,sCAA0C;AAG1C,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;;AAAA,yCAAiD;AACjD,mCAAkC;AAClC,sCAA0C;AAG1C,iCAAsF;AAEtF,+BAAgC;AAAvB,gGAAA,OAAO,OAAA;AAOhB;;GAEG;AACH,MAAa,eAAe;IAKxB,YAAY,OAAoE;QAJxE;;;;;WAA2B;QAC3B;;;;;WAAqB;QACrB;;;;;WAAyB;QAG7B,IAAI,CAAC,YAAY,GAAG,OAAO,IAAI,EAAE,CAAC;QAElC,IAAI,CAAC,YAAY,GAAG,OAAO,EAAE,YAAY,IAAI,EAAE,CAAC;QAChD,IAAI,CAAC,eAAe,GAAG,OAAO,EAAE,eAAe,IAAI,IAAI,CAAC;IAC5D,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,aAAa,CACvB,IAA4D;QAE5D,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAA,oBAAY,EAAC,IAAI,CAAC,EAAE,CAAC;YACjD,OAAO,IAAI,CAAC;QAChB,CAAC;QAED,IAAI,IAAI,YAAY,oBAAc,EAAE,CAAC;YACjC,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC;YAChC,MAAM,MAAM,GAAG,IAAI,UAAU,EAAE,CAAC;YAEhC,iDAAiD;YACjD,OAAO,IAAI,EAAE,CAAC;gBACV,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,MAAM,MAAM,CAAC,IAAI,EAAE,CAAC;gBAE5C,IAAI,IAAI;oBAAE,OAAO,MAAM,CAAC;gBAExB,MAAM,CAAC,GAAG,CAAC,KAAK,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;YACrC,CAAC;QACL,CAAC;QAED,MAAM,IAAI,KAAK,CAAC,wBAAwB,CAAC,CAAC;IAC9C,CAAC;IAED;;;;OAIG;IACK,cAAc,CAClB,OAAkE;QAElE,MAAM,MAAM,GAA2B,EAAE,CAAC;QAE1C,KAAK,MAAM,UAAU,IAAI,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;YAC5C,MAAM,WAAW,GAAG,OAAO,CAAC,UAAU,CAAC,CAAC;YAExC,IAAI,WAAW,KAAK,SAAS;gBAAE,SAAS;YAExC,IAAI,KAAK,CAAC,OAAO,CAAC,WAAW,CAAC,EAAE,CAAC;gBAC7B,MAAM,CAAC,UAAU,CAAC,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;gBACpC,SAAS;YACb,CAAC;YAED,MAAM,CAAC,UAAU,CAAC,GAAG,WAAW,CAAC;QACrC,CAAC;QAED,OAAO,MAAM,CAAC;IAClB,CAAC;IAED;;;;OAIG;IACK,KAAK,CAAC,WAAW,CACrB,OAAmC,EACnC,SAGC;QAED,IAAI,CAAC,SAAS,EAAE,aAAa,IAAI,CAAC,CAAC,GAAG,IAAI,CAAC,YAAY,EAAE,CAAC;YACtD,MAAM,IAAI,KAAK,CAAC,kCAAkC,IAAI,CAAC,YAAY,GAAG,CAAC,CAAC;QAC5E,CAAC;QAED,MAAM,GAAG,GAAG,OAAO,OAAO,CAAC,GAAG,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC;QAC7E,MAAM,OAAO,GAAG,OAAO,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;QACjG,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,KAAK,SAAS,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;QAE7F,MAAM,KAAK,GAAG,IAAI,aAAK,CAAC;YACpB,GAAG,IAAI,CAAC,YAAY;YACpB,QAAQ,EAAE,OAAO,CAAC,QAAQ;YAC1B,eAAe,EAAE,KAAK;SACzB,CAAC,CAAC;QAEH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,KAAK,CAAC,GAAG,EAAE;YACpC,MAAM,EAAE,OAAO,CAAC,MAAoB;YACpC,OAAO;YACP,IAAI,EAAE,IAAc;SACvB,CAAC,CAAC;QAEH,IAAI,IAAI,CAAC,eAAe,IAAI,QAAQ,CAAC,MAAM,IAAI,GAAG,IAAI,QAAQ,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YAC1E,MAAM,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,QAAQ,CAAC;YAE3C,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACZ,MAAM,IAAI,KAAK,CAAC,4CAA4C,CAAC,CAAC;YAClE,CAAC;YAED,OAAO,IAAI,CAAC,WAAW,CACnB;gBACI,GAAG,OAAO;gBACV,GAAG,EAAE,QAAQ;aAChB,EACD;gBACI,aAAa,EAAE,CAAC,SAAS,EAAE,aAAa,IAAI,CAAC,CAAC,GAAG,CAAC;gBAClD,YAAY,EAAE,CAAC,GAAG,CAAC,SAAS,EAAE,YAAY,IAAI,EAAE,CAAC,EAAE,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC;aACxE,CACJ,CAAC;QACN,CAAC;QAED,OAAO;YACH,QAAQ;YACR,YAAY,EAAE,SAAS,EAAE,YAAY,IAAI,EAAE;SAC9C,CAAC;IACN,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,WAAW,CACb,OAAmC;QAEnC,MAAM,EAAE,QAAQ,EAAE,YAAY,EAAE,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;QAEnE,IAAI,YAAY,CAAC;QAEjB,QAAQ,OAAO,CAAC,YAAY,EAAE,CAAC;YAC3B,KAAK,MAAM;gBACP,YAAY,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;gBACrC,MAAM;YACV,KAAK,MAAM;gBACP,YAAY,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;gBACrC,MAAM;YACV,KAAK,QAAQ;gBACT,YAAY,GAAG,MAAM,QAAQ,CAAC,KAAK,EAAE,CAAC;gBACtC,MAAM;YACV;gBACI,MAAM,IAAI,KAAK,CAAC,4BAA4B,CAAC,CAAC;QACtD,CAAC;QAED,OAAO;YACH,OAAO,EAAE,QAAQ,CAAC,OAAO;YACzB,UAAU,EAAE,QAAQ,CAAC,MAAM;YAC3B,GAAG,EAAE,QAAQ,CAAC,GAAG;YACjB,OAAO;YACP,YAAY;YACZ,QAAQ,EAAE,EAAE;YACZ,IAAI,EAAE,YAAY;YAClB,QAAQ,EAAE,IAAI;SACjB,CAAC;IACN,CAAC;IAEO,qBAAqB,CACzB,QAAuB;QAEvB,MAAM,cAAc,GAAG,iBAAQ,CAAC,OAAO,CAAC,QAAQ,CAAC,IAA2B,CAAC,CAAC;QAC9E,IAAI,WAAW,GAAG,CAAC,CAAC;QACpB,MAAM,KAAK,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC;QAC9D,cAAc,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,KAAK,EAAE,EAAE;YAChC,WAAW,IAAI,KAAK,CAAC,MAAM,CAAC;QAChC,CAAC,CAAC,CAAC;QAEH,MAAM,mBAAmB,GAAG,GAAG,EAAE;YAC7B,OAAO;gBACH,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,WAAW,GAAG,KAAK,CAAC,GAAG,GAAG,CAAC;gBAChD,WAAW;gBACX,KAAK;aACR,CAAC;QACN,CAAC,CAAC;QAEF,OAAO,CAAC,cAAc,EAAE,mBAAmB,CAAC,CAAC;IACjD,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,MAAM,CAAC,OAAoB;QAC7B,MAAM,EAAE,QAAQ,EAAE,YAAY,EAAE,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;QACnE,MAAM,CAAC,MAAM,EAAE,mBAAmB,CAAC,GAAG,IAAI,CAAC,qBAAqB,CAAC,QAAQ,CAAC,CAAC;QAE3E,OAAO;YACH,OAAO;YACP,GAAG,EAAE,QAAQ,CAAC,GAAG;YACjB,UAAU,EAAE,QAAQ,CAAC,MAAM;YAC3B,MAAM;YACN,QAAQ,EAAE,IAAI;YACd,IAAI,gBAAgB;gBAChB,OAAO,mBAAmB,EAAE,CAAC;YACjC,CAAC;YACD,cAAc,EAAE,EAAE,OAAO,EAAE,GAAG,EAAE,WAAW,EAAE,CAAC,EAAE;YAChD,YAAY;YACZ,OAAO,EAAE,QAAQ,CAAC,OAAO;YACzB,QAAQ,EAAE,EAAE;SACf,CAAC;IACN,CAAC;CACJ;AAzMD,0CAyMC"}
|
package/package.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@crawlee/impit-client",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.13.0",
|
|
4
4
|
"description": "impit-based HTTP client implementation for Crawlee. Impersonates browser requests to avoid bot detection.",
|
|
5
5
|
"engines": {
|
|
6
|
-
"node": ">=
|
|
6
|
+
"node": ">=20.0.0"
|
|
7
7
|
},
|
|
8
8
|
"main": "./index.js",
|
|
9
9
|
"module": "./index.mjs",
|
|
@@ -56,12 +56,12 @@
|
|
|
56
56
|
"@crawlee/core": "^3.12.1"
|
|
57
57
|
},
|
|
58
58
|
"devDependencies": {
|
|
59
|
-
"@crawlee/core": "^3.
|
|
59
|
+
"@crawlee/core": "^3.13.0"
|
|
60
60
|
},
|
|
61
61
|
"dependencies": {
|
|
62
|
-
"impit": "^0.1
|
|
62
|
+
"impit": "^0.2.1"
|
|
63
63
|
},
|
|
64
|
-
"packageManager": "yarn@4.
|
|
64
|
+
"packageManager": "yarn@4.7.0",
|
|
65
65
|
"lerna": {
|
|
66
66
|
"command": {
|
|
67
67
|
"publish": {
|
|
@@ -69,5 +69,5 @@
|
|
|
69
69
|
}
|
|
70
70
|
}
|
|
71
71
|
},
|
|
72
|
-
"gitHead": "
|
|
72
|
+
"gitHead": "6d5b13ae318909a66001cfc4daa1425ca88b3bb3"
|
|
73
73
|
}
|