@crawlee/impit-client 4.0.0-beta.21 → 4.0.0-beta.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.d.ts +7 -21
- package/index.d.ts.map +1 -1
- package/index.js +11 -75
- package/index.js.map +1 -1
- package/package.json +4 -4
package/index.d.ts
CHANGED
|
@@ -1,16 +1,15 @@
|
|
|
1
|
-
import type {
|
|
2
|
-
import
|
|
1
|
+
import type { CustomFetchOptions } from '@crawlee/http-client';
|
|
2
|
+
import { BaseHttpClient } from '@crawlee/http-client';
|
|
3
|
+
import { type ImpitOptions } from 'impit';
|
|
3
4
|
export declare const Browser: {
|
|
4
5
|
readonly Chrome: "chrome";
|
|
5
6
|
readonly Firefox: "firefox";
|
|
6
7
|
};
|
|
7
8
|
/**
|
|
8
|
-
* A HTTP client implementation based on the `impit library.
|
|
9
|
+
* A HTTP client implementation based on the `impit` library.
|
|
9
10
|
*/
|
|
10
|
-
export declare class ImpitHttpClient
|
|
11
|
+
export declare class ImpitHttpClient extends BaseHttpClient {
|
|
11
12
|
private impitOptions;
|
|
12
|
-
private maxRedirects;
|
|
13
|
-
private followRedirects;
|
|
14
13
|
/**
|
|
15
14
|
* Enables reuse of `impit` clients for the same set of options.
|
|
16
15
|
* This is useful for performance reasons, as creating
|
|
@@ -19,23 +18,10 @@ export declare class ImpitHttpClient implements BaseHttpClient {
|
|
|
19
18
|
*/
|
|
20
19
|
private clientCache;
|
|
21
20
|
private getClient;
|
|
22
|
-
constructor(options?: Omit<ImpitOptions, 'proxyUrl'
|
|
23
|
-
maxRedirects?: number;
|
|
24
|
-
});
|
|
25
|
-
/**
|
|
26
|
-
* Common implementation for `sendRequest` and `stream` methods.
|
|
27
|
-
* @param request `HttpRequest` object
|
|
28
|
-
* @returns `HttpResponse` object
|
|
29
|
-
*/
|
|
30
|
-
private getResponse;
|
|
31
|
-
/**
|
|
32
|
-
* @inheritDoc
|
|
33
|
-
*/
|
|
34
|
-
sendRequest(request: Request, options?: SendRequestOptions): Promise<Response>;
|
|
35
|
-
private getStreamWithProgress;
|
|
21
|
+
constructor(options?: Omit<ImpitOptions, 'proxyUrl' | 'timeout'>);
|
|
36
22
|
/**
|
|
37
23
|
* @inheritDoc
|
|
38
24
|
*/
|
|
39
|
-
|
|
25
|
+
fetch(request: Request, options?: RequestInit & CustomFetchOptions): Promise<Response>;
|
|
40
26
|
}
|
|
41
27
|
//# sourceMappingURL=index.d.ts.map
|
package/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAC;AAC/D,OAAO,EAAE,cAAc,EAAmB,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAS,KAAK,YAAY,EAAE,MAAM,OAAO,CAAC;AAKjD,eAAO,MAAM,OAAO;;;CAGV,CAAC;AAEX;;GAEG;AACH,qBAAa,eAAgB,SAAQ,cAAc;IAC/C,OAAO,CAAC,YAAY,CAAe;IAEnC;;;;;OAKG;IACH,OAAO,CAAC,WAAW,CAA2F;IAE9G,OAAO,CAAC,SAAS;gBAgBL,OAAO,CAAC,EAAE,IAAI,CAAC,YAAY,EAAE,UAAU,GAAG,SAAS,CAAC;IAKhE;;OAEG;IACG,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,CAAC,EAAE,WAAW,GAAG,kBAAkB,GAAG,OAAO,CAAC,QAAQ,CAAC;CAc/F"}
|
package/index.js
CHANGED
|
@@ -1,24 +1,15 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { BaseHttpClient, ResponseWithUrl } from '@crawlee/http-client';
|
|
2
2
|
import { Impit } from 'impit';
|
|
3
3
|
import { LruCache } from '@apify/datastructures';
|
|
4
4
|
export const Browser = {
|
|
5
5
|
'Chrome': 'chrome',
|
|
6
6
|
'Firefox': 'firefox',
|
|
7
7
|
};
|
|
8
|
-
class ResponseWithUrl extends Response {
|
|
9
|
-
url;
|
|
10
|
-
constructor(body, init) {
|
|
11
|
-
super(body, init);
|
|
12
|
-
this.url = init.url ?? '';
|
|
13
|
-
}
|
|
14
|
-
}
|
|
15
8
|
/**
|
|
16
|
-
* A HTTP client implementation based on the `impit library.
|
|
9
|
+
* A HTTP client implementation based on the `impit` library.
|
|
17
10
|
*/
|
|
18
|
-
export class ImpitHttpClient {
|
|
11
|
+
export class ImpitHttpClient extends BaseHttpClient {
|
|
19
12
|
impitOptions;
|
|
20
|
-
maxRedirects;
|
|
21
|
-
followRedirects;
|
|
22
13
|
/**
|
|
23
14
|
* Enables reuse of `impit` clients for the same set of options.
|
|
24
15
|
* This is useful for performance reasons, as creating
|
|
@@ -38,77 +29,22 @@ export class ImpitHttpClient {
|
|
|
38
29
|
return client;
|
|
39
30
|
}
|
|
40
31
|
constructor(options) {
|
|
32
|
+
super();
|
|
41
33
|
this.impitOptions = options ?? {};
|
|
42
|
-
this.maxRedirects = options?.maxRedirects ?? 10;
|
|
43
|
-
this.followRedirects = options?.followRedirects ?? true;
|
|
44
34
|
}
|
|
45
35
|
/**
|
|
46
|
-
*
|
|
47
|
-
* @param request `HttpRequest` object
|
|
48
|
-
* @returns `HttpResponse` object
|
|
36
|
+
* @inheritDoc
|
|
49
37
|
*/
|
|
50
|
-
async
|
|
51
|
-
|
|
52
|
-
throw new Error(`Too many redirects, maximum is ${this.maxRedirects}.`);
|
|
53
|
-
}
|
|
38
|
+
async fetch(request, options) {
|
|
39
|
+
const { proxyUrl, redirect, signal } = options ?? {};
|
|
54
40
|
const impit = this.getClient({
|
|
55
41
|
...this.impitOptions,
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
followRedirects: false,
|
|
42
|
+
proxyUrl,
|
|
43
|
+
followRedirects: redirect === 'follow',
|
|
59
44
|
});
|
|
60
|
-
const response = await impit.fetch(request, {
|
|
61
|
-
if (this.followRedirects && response.status >= 300 && response.status < 400) {
|
|
62
|
-
const location = response.headers.get('location');
|
|
63
|
-
const redirectUrl = new URL(location ?? '', request.url);
|
|
64
|
-
if (!location) {
|
|
65
|
-
throw new Error('Redirect response missing location header.');
|
|
66
|
-
}
|
|
67
|
-
return this.getResponse({
|
|
68
|
-
...request,
|
|
69
|
-
url: redirectUrl.href,
|
|
70
|
-
}, {
|
|
71
|
-
redirectCount: (redirects?.redirectCount ?? 0) + 1,
|
|
72
|
-
redirectUrls: [...(redirects?.redirectUrls ?? []), redirectUrl],
|
|
73
|
-
});
|
|
74
|
-
}
|
|
75
|
-
return {
|
|
76
|
-
response,
|
|
77
|
-
redirectUrls: redirects?.redirectUrls ?? [],
|
|
78
|
-
};
|
|
79
|
-
}
|
|
80
|
-
/**
|
|
81
|
-
* @inheritDoc
|
|
82
|
-
*/
|
|
83
|
-
async sendRequest(request, options) {
|
|
84
|
-
const { response } = await this.getResponse(request, {}, options);
|
|
45
|
+
const response = await impit.fetch(request, { signal: signal ?? undefined });
|
|
85
46
|
// todo - cast shouldn't be needed here, impit returns `Uint8Array`
|
|
86
|
-
return new ResponseWithUrl(
|
|
87
|
-
}
|
|
88
|
-
getStreamWithProgress(response) {
|
|
89
|
-
const responseStream = Readable.fromWeb(response.body);
|
|
90
|
-
let transferred = 0;
|
|
91
|
-
const total = Number(response.headers.get('content-length') ?? 0);
|
|
92
|
-
responseStream.on('data', (chunk) => {
|
|
93
|
-
transferred += chunk.length;
|
|
94
|
-
});
|
|
95
|
-
const getDownloadProgress = () => {
|
|
96
|
-
return {
|
|
97
|
-
percent: Math.round((transferred / total) * 100),
|
|
98
|
-
transferred,
|
|
99
|
-
total,
|
|
100
|
-
};
|
|
101
|
-
};
|
|
102
|
-
return [responseStream, getDownloadProgress];
|
|
103
|
-
}
|
|
104
|
-
/**
|
|
105
|
-
* @inheritDoc
|
|
106
|
-
*/
|
|
107
|
-
async stream(request, options) {
|
|
108
|
-
const { response } = await this.getResponse(request, {}, options);
|
|
109
|
-
const [stream] = this.getStreamWithProgress(response);
|
|
110
|
-
// Cast shouldn't be needed here, undici might have a slightly different `ReadableStream` type
|
|
111
|
-
return new ResponseWithUrl(Readable.toWeb(stream), response);
|
|
47
|
+
return new ResponseWithUrl(response.body, response);
|
|
112
48
|
}
|
|
113
49
|
}
|
|
114
50
|
//# sourceMappingURL=index.js.map
|
package/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,KAAK,EAAqB,MAAM,OAAO,CAAC;AAGjD,OAAO,EAAE,QAAQ,EAAE,MAAM,uBAAuB,CAAC;AAEjD,MAAM,CAAC,MAAM,OAAO,GAAG;IACnB,QAAQ,EAAE,QAAQ;IAClB,SAAS,EAAE,SAAS;CACd,CAAC;AAEX;;GAEG;AACH,MAAM,OAAO,eAAgB,SAAQ,cAAc;IACvC,YAAY,CAAe;IAEnC;;;;;OAKG;IACK,WAAW,GAA2D,IAAI,QAAQ,CAAC,EAAE,SAAS,EAAE,EAAE,EAAE,CAAC,CAAC;IAEtG,SAAS,CAAC,OAAqB;QACnC,MAAM,EAAE,SAAS,EAAE,GAAG,IAAI,EAAE,GAAG,OAAO,CAAC;QAEvC,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QACtC,MAAM,cAAc,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAEtD,IAAI,cAAc,IAAI,CAAC,CAAC,SAAS,IAAI,cAAc,CAAC,SAAS,KAAK,SAAS,CAAC,EAAE,CAAC;YAC3E,OAAO,cAAc,CAAC,MAAM,CAAC;QACjC,CAAC;QAED,MAAM,MAAM,GAAG,IAAI,KAAK,CAAC,OAAO,CAAC,CAAC;QAClC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,SAA2B,EAAE,CAAC,CAAC;QAEnF,OAAO,MAAM,CAAC;IAClB,CAAC;IAED,YAAY,OAAoD;QAC5D,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,YAAY,GAAG,OAAO,IAAI,EAAE,CAAC;IACtC,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,KAAK,CAAC,OAAgB,EAAE,OAA0C;QACpE,MAAM,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM,EAAE,GAAG,OAAO,IAAI,EAAE,CAAC;QAErD,MAAM,KAAK,GAAG,IAAI,CAAC,SAAS,CAAC;YACzB,GAAG,IAAI,CAAC,YAAY;YACpB,QAAQ;YACR,eAAe,EAAE,QAAQ,KAAK,QAAQ;SACzC,CAAC,CAAC;QAEH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,KAAK,CAAC,OAAO,EAAE,EAAE,MAAM,EAAE,MAAM,IAAI,SAAS,EAAE,CAAC,CAAC;QAE7E,mEAAmE;QACnE,OAAO,IAAI,eAAe,CAAC,QAAQ,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;IACxD,CAAC;CACJ"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@crawlee/impit-client",
|
|
3
|
-
"version": "4.0.0-beta.
|
|
3
|
+
"version": "4.0.0-beta.23",
|
|
4
4
|
"description": "impit-based HTTP client implementation for Crawlee. Impersonates browser requests to avoid bot detection.",
|
|
5
5
|
"engines": {
|
|
6
6
|
"node": ">=22.0.0"
|
|
@@ -48,8 +48,8 @@
|
|
|
48
48
|
},
|
|
49
49
|
"dependencies": {
|
|
50
50
|
"@apify/datastructures": "^2.0.3",
|
|
51
|
-
"@crawlee/
|
|
52
|
-
"impit": "^0.
|
|
51
|
+
"@crawlee/http-client": "4.0.0-beta.23",
|
|
52
|
+
"impit": "^0.8.2",
|
|
53
53
|
"tough-cookie": "^6.0.0"
|
|
54
54
|
},
|
|
55
55
|
"lerna": {
|
|
@@ -59,5 +59,5 @@
|
|
|
59
59
|
}
|
|
60
60
|
}
|
|
61
61
|
},
|
|
62
|
-
"gitHead": "
|
|
62
|
+
"gitHead": "929fd9374b74ee5e9720b3864b39326f014cd45b"
|
|
63
63
|
}
|