@crawlee/core 4.0.0-beta.19 → 4.0.0-beta.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"got-scraping-http-client.d.ts","sourceRoot":"","sources":["../../src/http_clients/got-scraping-http-client.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,cAAc,EAAE,kBAAkB,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AAG/F;;GAEG;AACH,qBAAa,qBAAsB,YAAW,cAAc;IACxD;;;OAGG;IACH,OAAO,CAAC,eAAe;IAMvB;;OAEG;IACG,WAAW,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC,QAAQ,CAAC;
|
|
1
|
+
{"version":3,"file":"got-scraping-http-client.d.ts","sourceRoot":"","sources":["../../src/http_clients/got-scraping-http-client.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,cAAc,EAAE,kBAAkB,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AAG/F;;GAEG;AACH,qBAAa,qBAAsB,YAAW,cAAc;IACxD;;;OAGG;IACH,OAAO,CAAC,eAAe;IAMvB,OAAO,CAAE,cAAc;IAmBvB,OAAO,CAAC,YAAY;IAIpB;;OAEG;IACG,WAAW,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC,QAAQ,CAAC;IA8BpF;;OAEG;IACG,MAAM,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,CAAC,EAAE,aAAa,GAAG,OAAO,CAAC,QAAQ,CAAC;CA8D7E"}
|
|
@@ -12,6 +12,25 @@ export class GotScrapingHttpClient {
|
|
|
12
12
|
validateRequest(request) {
|
|
13
13
|
return !['CONNECT', 'connect'].includes(request.method);
|
|
14
14
|
}
|
|
15
|
+
*iterateHeaders(headers) {
|
|
16
|
+
for (const [key, value] of Object.entries(headers)) {
|
|
17
|
+
// Filter out pseudo-headers
|
|
18
|
+
if (key.startsWith(':') || value === undefined) {
|
|
19
|
+
continue;
|
|
20
|
+
}
|
|
21
|
+
if (Array.isArray(value)) {
|
|
22
|
+
for (const v of value) {
|
|
23
|
+
yield [key, v];
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
else {
|
|
27
|
+
yield [key, value];
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
parseHeaders(headers) {
|
|
32
|
+
return new Headers([...this.iterateHeaders(headers)]);
|
|
33
|
+
}
|
|
15
34
|
/**
|
|
16
35
|
* @inheritDoc
|
|
17
36
|
*/
|
|
@@ -32,18 +51,9 @@ export class GotScrapingHttpClient {
|
|
|
32
51
|
// Using the `cookieJar` option directly would override that.
|
|
33
52
|
cookieJar: undefined,
|
|
34
53
|
});
|
|
35
|
-
const
|
|
36
|
-
.map(([key, value]) => {
|
|
37
|
-
if (value === undefined)
|
|
38
|
-
return [];
|
|
39
|
-
if (Array.isArray(value)) {
|
|
40
|
-
return value.map((v) => [key, v]);
|
|
41
|
-
}
|
|
42
|
-
return [[key, value]];
|
|
43
|
-
})
|
|
44
|
-
.flat();
|
|
54
|
+
const responseHeaders = this.parseHeaders(gotResult.headers);
|
|
45
55
|
return new ResponseWithUrl(new Uint8Array(gotResult.rawBody), {
|
|
46
|
-
headers:
|
|
56
|
+
headers: responseHeaders,
|
|
47
57
|
status: gotResult.statusCode,
|
|
48
58
|
statusText: gotResult.statusMessage ?? '',
|
|
49
59
|
url: gotResult.url,
|
|
@@ -53,6 +63,7 @@ export class GotScrapingHttpClient {
|
|
|
53
63
|
* @inheritDoc
|
|
54
64
|
*/
|
|
55
65
|
async stream(request, options) {
|
|
66
|
+
const { session, timeout } = options ?? {};
|
|
56
67
|
if (!this.validateRequest(request)) {
|
|
57
68
|
throw new Error(`The HTTP method CONNECT is not supported by the GotScrapingHttpClient.`);
|
|
58
69
|
}
|
|
@@ -64,6 +75,8 @@ export class GotScrapingHttpClient {
|
|
|
64
75
|
headers: Object.fromEntries(request.headers.entries()),
|
|
65
76
|
body: request.body ? Readable.fromWeb(request.body) : undefined,
|
|
66
77
|
isStream: true,
|
|
78
|
+
proxyUrl: session?.proxyInfo?.url,
|
|
79
|
+
timeout: { request: timeout },
|
|
67
80
|
cookieJar: undefined,
|
|
68
81
|
});
|
|
69
82
|
stream.on('redirect', (updatedOptions, redirectResponse) => {
|
|
@@ -88,11 +101,12 @@ export class GotScrapingHttpClient {
|
|
|
88
101
|
}
|
|
89
102
|
stream.on('error', reject);
|
|
90
103
|
stream.on('response', (response) => {
|
|
104
|
+
const headers = this.parseHeaders(response.headers);
|
|
91
105
|
// Cast shouldn't be needed here, undici might have a different `ReadableStream` type
|
|
92
106
|
resolve(new ResponseWithUrl(Readable.toWeb(stream), {
|
|
93
107
|
status: response.statusCode,
|
|
94
108
|
statusText: response.statusMessage ?? '',
|
|
95
|
-
headers
|
|
109
|
+
headers,
|
|
96
110
|
url: response.url,
|
|
97
111
|
}));
|
|
98
112
|
});
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"got-scraping-http-client.js","sourceRoot":"","sources":["../../src/http_clients/got-scraping-http-client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAGvC,OAAO,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAG3C,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAExD;;GAEG;AACH,MAAM,OAAO,qBAAqB;IAC9B;;;OAGG;IACK,eAAe,CACnB,OAAgB;QAEhB,OAAO,CAAC,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,MAAO,CAAC,CAAC;IAC7D,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,WAAW,CAAC,OAAgB,EAAE,OAA4B;QAC5D,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,GAAG,OAAO,IAAI,EAAE,CAAC;QAE3C,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,OAAO,CAAC,EAAE,CAAC;YACjC,MAAM,IAAI,KAAK,CAAC,wEAAwE,CAAC,CAAC;QAC9F,CAAC;QAED,MAAM,SAAS,GAAG,MAAM,WAAW,CAAC;YAChC,GAAG,EAAE,OAAO,CAAC,GAAI;YACjB,MAAM,EAAE,OAAO,CAAC,MAA2B;YAC3C,OAAO,EAAE,MAAM,CAAC,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;YACtD,IAAI,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,IAAW,CAAC,CAAC,CAAC,CAAC,SAAS;YACtE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG;YACjC,OAAO,EAAE,EAAE,OAAO,EAAE,OAAO,EAAE;YAC7B,wCAAwC;YACxC,iFAAiF;YACjF,6DAA6D;YAC7D,SAAS,EAAE,SAAS;SACvB,CAAC,CAAC;QAEH,MAAM,
|
|
1
|
+
{"version":3,"file":"got-scraping-http-client.js","sourceRoot":"","sources":["../../src/http_clients/got-scraping-http-client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAGvC,OAAO,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAG3C,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAExD;;GAEG;AACH,MAAM,OAAO,qBAAqB;IAC9B;;;OAGG;IACK,eAAe,CACnB,OAAgB;QAEhB,OAAO,CAAC,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,MAAO,CAAC,CAAC;IAC7D,CAAC;IAEO,CAAC,cAAc,CACnB,OAAsD;QAEtD,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;YACjD,4BAA4B;YAC5B,IAAI,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;gBAC7C,SAAS;YACb,CAAC;YAED,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;gBACvB,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;oBACpB,MAAM,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;gBACnB,CAAC;YACL,CAAC;iBAAM,CAAC;gBACJ,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;YACvB,CAAC;QACL,CAAC;IACL,CAAC;IAEO,YAAY,CAAC,OAAsD;QACvE,OAAO,IAAI,OAAO,CAAC,CAAC,GAAG,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;IAC1D,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,WAAW,CAAC,OAAgB,EAAE,OAA4B;QAC5D,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,GAAG,OAAO,IAAI,EAAE,CAAC;QAE3C,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,OAAO,CAAC,EAAE,CAAC;YACjC,MAAM,IAAI,KAAK,CAAC,wEAAwE,CAAC,CAAC;QAC9F,CAAC;QAED,MAAM,SAAS,GAAG,MAAM,WAAW,CAAC;YAChC,GAAG,EAAE,OAAO,CAAC,GAAI;YACjB,MAAM,EAAE,OAAO,CAAC,MAA2B;YAC3C,OAAO,EAAE,MAAM,CAAC,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;YACtD,IAAI,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,IAAW,CAAC,CAAC,CAAC,CAAC,SAAS;YACtE,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG;YACjC,OAAO,EAAE,EAAE,OAAO,EAAE,OAAO,EAAE;YAC7B,wCAAwC;YACxC,iFAAiF;YACjF,6DAA6D;YAC7D,SAAS,EAAE,SAAS;SACvB,CAAC,CAAC;QAEH,MAAM,eAAe,GAAG,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;QAE7D,OAAO,IAAI,eAAe,CAAC,IAAI,UAAU,CAAC,SAAS,CAAC,OAAO,CAAC,EAAE;YAC1D,OAAO,EAAE,eAAe;YACxB,MAAM,EAAE,SAAS,CAAC,UAAU;YAC5B,UAAU,EAAE,SAAS,CAAC,aAAa,IAAI,EAAE;YACzC,GAAG,EAAE,SAAS,CAAC,GAAG;SACrB,CAAC,CAAC;IACP,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,MAAM,CAAC,OAAgB,EAAE,OAAuB;QAClD,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,GAAG,OAAO,IAAI,EAAE,CAAC;QAE3C,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,OAAO,CAAC,EAAE,CAAC;YACjC,MAAM,IAAI,KAAK,CAAC,wEAAwE,CAAC,CAAC;QAC9F,CAAC;QACD,qDAAqD;QACrD,OAAO,IAAI,OAAO,CAAC,KAAK,EAAE,OAAO,EAAE,MAAM,EAAE,EAAE;YACzC,MAAM,MAAM,GAAG,WAAW,CAAC;gBACvB,GAAG,EAAE,OAAO,CAAC,GAAG;gBAChB,MAAM,EAAE,OAAO,CAAC,MAA2B;gBAC3C,OAAO,EAAE,MAAM,CAAC,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;gBACtD,IAAI,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,IAAW,CAAC,CAAC,CAAC,CAAC,SAAS;gBACtE,QAAQ,EAAE,IAAI;gBACd,QAAQ,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG;gBACjC,OAAO,EAAE,EAAE,OAAO,EAAE,OAAO,EAAE;gBAC7B,SAAS,EAAE,SAAS;aACvB,CAAC,CAAC;YAEH,MAAM,CAAC,EAAE,CAAC,UAAU,EAAE,CAAC,cAAuB,EAAE,gBAAqB,EAAE,EAAE;gBACrE,MAAM,sBAAsB,GAAG,IAAI,eAAe,CAAC,gBAAgB,CAAC,OAAO,EAAE;oBACzE,OAAO,EAAE,gBAAgB,CAAC,OAAO;oBACjC,MAAM,EAAE,gBAAgB,CAAC,UAAU;oBACnC,UAAU,EAAE,gBAAgB,CAAC,aAAa;oBAC1C,GAAG,EAAE,gBAAgB,CAAC,GAAG;iBAC5B,CAAC,CAAC;gBAEH,MAAM,aAAa,GAAG,IAAI,OAAO,CAC7B,MAAM,CAAC,OAAO,CAAC,cAAc,CAAC,OAAO,CAAC;qBACjC,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC;qBAC3F,IAAI,EAAwB,CACpC,CAAC;gBAEF,OAAO,EAAE,UAAU,EAAE,CAAC,sBAAsB,EAAE;oBAC1C,GAAG,EAAE,cAAc,CAAC,GAAG;oBACvB,OAAO,EAAE,aAAa;iBACzB,CAAC,CAAC;gBAEH,cAAc,CAAC,OAAO,GAAG,MAAM,CAAC,WAAW,CAAC,aAAa,CAAC,OAAO,EAAE,CAAC,CAAC;YACzE,CAAC,CAAC,CAAC;YAEH,yEAAyE;YACzE,IAAI,OAAO,CAAC,MAAM,IAAI,CAAC,QAAQ,EAAE,QAAQ,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;gBAClE,MAAM,CAAC,GAAG,EAAE,CAAC;YACjB,CAAC;YAED,MAAM,CAAC,EAAE,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;YAE3B,MAAM,CAAC,EAAE,CAAC,UAAU,EAAE,CAAC,QAAuB,EAAE,EAAE;gBAC9C,MAAM,OAAO,GAAG,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;gBACpD,qFAAqF;gBACrF,OAAO,CACH,IAAI,eAAe,CAAC,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAQ,EAAE;oBAC/C,MAAM,EAAE,QAAQ,CAAC,UAAU;oBAC3B,UAAU,EAAE,QAAQ,CAAC,aAAa,IAAI,EAAE;oBACxC,OAAO;oBACP,GAAG,EAAE,QAAQ,CAAC,GAAG;iBACpB,CAAC,CACL,CAAC;YACN,CAAC,CAAC,CAAC;QACP,CAAC,CAAC,CAAC;IACP,CAAC;CACJ"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@crawlee/core",
|
|
3
|
-
"version": "4.0.0-beta.
|
|
3
|
+
"version": "4.0.0-beta.20",
|
|
4
4
|
"description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.",
|
|
5
5
|
"engines": {
|
|
6
6
|
"node": ">=22.0.0"
|
|
@@ -53,9 +53,9 @@
|
|
|
53
53
|
"@apify/pseudo_url": "^2.0.59",
|
|
54
54
|
"@apify/timeout": "^0.3.2",
|
|
55
55
|
"@apify/utilities": "^2.15.5",
|
|
56
|
-
"@crawlee/memory-storage": "4.0.0-beta.
|
|
57
|
-
"@crawlee/types": "4.0.0-beta.
|
|
58
|
-
"@crawlee/utils": "4.0.0-beta.
|
|
56
|
+
"@crawlee/memory-storage": "4.0.0-beta.20",
|
|
57
|
+
"@crawlee/types": "4.0.0-beta.20",
|
|
58
|
+
"@crawlee/utils": "4.0.0-beta.20",
|
|
59
59
|
"@sapphire/async-queue": "^1.5.5",
|
|
60
60
|
"@vladfrangu/async_event_emitter": "^2.4.6",
|
|
61
61
|
"csv-stringify": "^6.5.2",
|
|
@@ -77,5 +77,5 @@
|
|
|
77
77
|
}
|
|
78
78
|
}
|
|
79
79
|
},
|
|
80
|
-
"gitHead": "
|
|
80
|
+
"gitHead": "8e04fea33d3b7ca5a386a941c990e9212275e221"
|
|
81
81
|
}
|