@crawlee/http 4.0.0-beta.25 → 4.0.0-beta.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"file-download.d.ts","sourceRoot":"","sources":["../../src/internals/file-download.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"file-download.d.ts","sourceRoot":"","sources":["../../src/internals/file-download.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAExC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,gBAAgB,CAAC;AAC1D,OAAO,EAAE,YAAY,EAAmB,MAAM,gBAAgB,CAAC;AAC/D,OAAO,KAAK,EAAE,eAAe,EAAE,aAAa,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AAE7E,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAEjD,OAAO,KAAK,EAAE,YAAY,EAAE,sBAAsB,EAAE,gBAAgB,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAMxH,MAAM,MAAM,wBAAwB,CAChC,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,YAAY,CAAC,2BAA2B,CAAC,QAAQ,CAAC,CAAC,CAAC;AAExD,MAAM,MAAM,gBAAgB,CACxB,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,gBAAgB,CAAC,2BAA2B,CAAC,QAAQ,CAAC,CAAC,CAAC;AAE5D,MAAM,WAAW,2BAA2B,CACxC,QAAQ,SAAS,UAAU,GAAG,GAAG,CACnC,SAAQ,eAAe,CAAC,QAAQ,CAAC;IAC/B,OAAO,EAAE,aAAa,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC;IAC1C,QAAQ,EAAE,QAAQ,CAAC;IACnB,WAAW,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,cAAc,CAAA;KAAE,CAAC;CAC3D;AAED,MAAM,MAAM,0BAA0B,CAClC,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,cAAc,CAAC,2BAA2B,CAAC,QAAQ,CAAC,CAAC,CAAC;AAE1D;;;;;;;;GAQG;AACH,wBAAgB,kBAAkB,CAAC,EAC/B,YAAY,EACZ,eAAsB,EACtB,qBAAqB,EAAE,uBAA6B,GACvD,EAAE;IACC,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,qBAAqB,CAAC,EAAE,MAAM,CAAC;CAClC,GAAG,SAAS,CA4BZ;AAED;;;;;;GAMG;AACH,wBAAgB,iBAAiB,CAAC,EAC9B,mBAAmB,EACnB,eAAsB,GACzB,EAAE;IACC,mBAAmB,EAAE,CAAC,gBAAgB,EAAE,MAAM,KAAK,IAAI,CAAC;IACxD,eAAe,CAAC,EAAE,MAAM,CAAC;CAC5B,GAAG,SAAS,CAoBZ;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA0CG;AACH,qBAAa,YAAa,SAAQ,YAAY,CAAC,2BAA2B,CAAC;gBAE3D,OAAO,GAAE,mBAAmB,CAAC,2BAA2B,CAAM;YAmB5D,gBAAgB;CAoBjC;AA0BD;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,gBAAgB,CAC5B,OAAO,SAAS,2BAA2B,GAAG,2BAA2B,EACzE,QAAQ,SAAS,UAAU,GAAG,sBAAsB,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAC1E,MAAM,CAAC,EAAE,YAAY,CAAC,OAAO,EAAE,QAAQ,CAAC,mDAEzC"}
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import { Transform } from 'node:stream';
|
|
2
|
-
import { finished } from 'node:stream/promises';
|
|
3
2
|
import { BasicCrawler, ContextPipeline } from '@crawlee/basic';
|
|
3
|
+
import { ResponseWithUrl } from '@crawlee/http-client';
|
|
4
4
|
import { Router } from '../index.js';
|
|
5
5
|
import { parseContentTypeFromResponse } from './utils.js';
|
|
6
|
+
const kBodyDrained = Symbol('bodyDrained');
|
|
6
7
|
/**
|
|
7
8
|
* Creates a transform stream that throws an error if the source data speed is below the specified minimum speed.
|
|
8
9
|
* This `Transform` checks the amount of data every `checkProgressInterval` milliseconds.
|
|
@@ -112,7 +113,12 @@ export class FileDownload extends BasicCrawler {
|
|
|
112
113
|
contextPipelineBuilder: () => ContextPipeline.create().compose({
|
|
113
114
|
action: async (context) => this.initiateDownload(context),
|
|
114
115
|
cleanup: async (context) => {
|
|
115
|
-
|
|
116
|
+
if (!context.response.bodyUsed) {
|
|
117
|
+
// Nobody consumed the body — cancel it so the
|
|
118
|
+
// underlying connection can be released.
|
|
119
|
+
await context.response.body?.cancel();
|
|
120
|
+
}
|
|
121
|
+
await context[kBodyDrained];
|
|
116
122
|
},
|
|
117
123
|
}),
|
|
118
124
|
});
|
|
@@ -123,14 +129,36 @@ export class FileDownload extends BasicCrawler {
|
|
|
123
129
|
});
|
|
124
130
|
const { type, charset: encoding } = parseContentTypeFromResponse(response);
|
|
125
131
|
context.request.url = response.url;
|
|
132
|
+
const { response: trackedResponse, bodyDrained } = trackBodyConsumption(response);
|
|
126
133
|
const contextExtension = {
|
|
127
134
|
request: context.request,
|
|
128
|
-
response,
|
|
135
|
+
response: trackedResponse,
|
|
129
136
|
contentType: { type, encoding },
|
|
137
|
+
[kBodyDrained]: bodyDrained,
|
|
130
138
|
};
|
|
131
139
|
return contextExtension;
|
|
132
140
|
}
|
|
133
141
|
}
|
|
142
|
+
/**
|
|
143
|
+
* Wraps a Response so that we can track when the body stream has been fully
|
|
144
|
+
* consumed (or errored). Pipes the original body through a TransformStream;
|
|
145
|
+
* the readable side becomes the new Response body, and `pipeTo` gives us a
|
|
146
|
+
* promise that resolves once the body is fully read or cancelled.
|
|
147
|
+
*/
|
|
148
|
+
function trackBodyConsumption(response) {
|
|
149
|
+
if (!response.body) {
|
|
150
|
+
return { response, bodyDrained: Promise.resolve() };
|
|
151
|
+
}
|
|
152
|
+
const passthrough = new TransformStream();
|
|
153
|
+
const bodyDrained = response.body.pipeTo(passthrough.writable).catch(() => { });
|
|
154
|
+
const trackedResponse = new ResponseWithUrl(passthrough.readable, {
|
|
155
|
+
headers: response.headers,
|
|
156
|
+
status: response.status,
|
|
157
|
+
statusText: response.statusText,
|
|
158
|
+
url: response.url,
|
|
159
|
+
});
|
|
160
|
+
return { response: trackedResponse, bodyDrained };
|
|
161
|
+
}
|
|
134
162
|
/**
|
|
135
163
|
* Creates new {@link Router} instance that works based on request labels.
|
|
136
164
|
* This instance can then serve as a `requestHandler` of your {@link FileDownload}.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"file-download.js","sourceRoot":"","sources":["../../src/internals/file-download.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"file-download.js","sourceRoot":"","sources":["../../src/internals/file-download.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAGxC,OAAO,EAAE,YAAY,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AAE/D,OAAO,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAIvD,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,4BAA4B,EAAE,MAAM,YAAY,CAAC;AAE1D,MAAM,YAAY,GAAG,MAAM,CAAC,aAAa,CAAC,CAAC;AAsB3C;;;;;;;;GAQG;AACH,MAAM,UAAU,kBAAkB,CAAC,EAC/B,YAAY,EACZ,eAAe,GAAG,IAAI,EACtB,qBAAqB,EAAE,uBAAuB,GAAG,GAAG,GAKvD;IACG,IAAI,SAAS,GAA2C,EAAE,CAAC;IAE3D,MAAM,aAAa,GAAG,WAAW,CAAC,GAAG,EAAE;QACnC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAEvB,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE,EAAE,CAAC,GAAG,GAAG,QAAQ,CAAC,SAAS,GAAG,eAAe,CAAC,CAAC;QACvF,MAAM,UAAU,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,QAAQ,EAAE,EAAE,CAAC,GAAG,GAAG,QAAQ,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;QAChF,MAAM,OAAO,GAAG,CAAC,GAAG,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,IAAI,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC;QAE9D,IAAI,UAAU,GAAG,IAAI,GAAG,OAAO,GAAG,YAAY,EAAE,CAAC;YAC7C,aAAa,CAAC,aAAa,CAAC,CAAC;YAC7B,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,KAAK,CAAC,oCAAoC,CAAC,CAAC,CAAC;QAC1E,CAAC;IACL,CAAC,EAAE,uBAAuB,CAAC,CAAC;IAE5B,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC;QACzB,SAAS,EAAE,CAAC,KAAK,EAAE,CAAC,EAAE,QAAQ,EAAE,EAAE;YAC9B,SAAS,CAAC,IAAI,CAAC,EAAE,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE,KAAK,EAAE,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;YAC/D,QAAQ,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;QAC1B,CAAC;QACD,KAAK,EAAE,CAAC,QAAQ,EAAE,EAAE;YAChB,aAAa,CAAC,aAAa,CAAC,CAAC;YAC7B,QAAQ,EAAE,CAAC;QACf,CAAC;KACJ,CAAC,CAAC;IAEH,OAAO,MAAM,CAAC;AAClB,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,iBAAiB,CAAC,EAC9B,mBAAmB,EACnB,eAAe,GAAG,IAAI,GAIzB;IACG,IAAI,gBAAgB,GAAG,CAAC,CAAC;IACzB,IAAI,WAAW,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE7B,OAAO,IAAI,SAAS,CAAC;QACjB,SAAS,EAAE,CAAC,KAAK,EAAE,CAAC,EAAE,QAAQ,EAAE,EAAE;YAC9B,gBAAgB,IAAI,KAAK,CAAC,MAAM,CAAC;YAEjC,IAAI,IAAI,CAAC,GAAG,EAAE,GAAG,WAAW,GAAG,eAAe,EAAE,CAAC;gBAC7C,WAAW,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;gBACzB,mBAAmB,CAAC,gBAAgB,CAAC,CAAC;YAC1C,CAAC;YAED,QAAQ,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;QAC1B,CAAC;QACD,KAAK,EAAE,CAAC,QAAQ,EAAE,EAAE;YAChB,mBAAmB,CAAC,gBAAgB,CAAC,CAAC;YACtC,QAAQ,EAAE,CAAC;QACf,CAAC;KACJ,CAAC,CAAC;AACP,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA0CG;AACH,MAAM,OAAO,YAAa,SAAQ,YAAyC;IACvE,aAAa;IACb,YAAY,UAA4D,EAAE;QACtE,KAAK,CAAC;YACF,GAAG,OAAO;YACV,sBAAsB,EAAE,GAAG,EAAE,CACzB,eAAe,CAAC,MAAM,EAAmB,CAAC,OAAO,CAAC;gBAC9C,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,CAAC,IAAI,CAAC,gBAAgB,CAAC,OAAO,CAAC;gBACzD,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE;oBACvB,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,QAAQ,EAAE,CAAC;wBAC7B,8CAA8C;wBAC9C,yCAAyC;wBACzC,MAAM,OAAO,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,CAAC;oBAC1C,CAAC;oBAED,MAAO,OAA6C,CAAC,YAAY,CAAC,CAAC;gBACvE,CAAC;aACJ,CAAC;SACT,CAAC,CAAC;IACP,CAAC;IAEO,KAAK,CAAC,gBAAgB,CAAC,OAAwB;QACnD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,mBAAmB,EAAE,EAAE;YACtF,OAAO,EAAE,OAAO,CAAC,OAAO;SAC3B,CAAC,CAAC;QAEH,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,EAAE,GAAG,4BAA4B,CAAC,QAAQ,CAAC,CAAC;QAE3E,OAAO,CAAC,OAAO,CAAC,GAAG,GAAG,QAAQ,CAAC,GAAG,CAAC;QAEnC,MAAM,EAAE,QAAQ,EAAE,eAAe,EAAE,WAAW,EAAE,GAAG,oBAAoB,CAAC,QAAQ,CAAC,CAAC;QAElF,MAAM,gBAAgB,GAAG;YACrB,OAAO,EAAE,OAAO,CAAC,OAAiC;YAClD,QAAQ,EAAE,eAAe;YACzB,WAAW,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;YAC/B,CAAC,YAAY,CAAC,EAAE,WAAW;SAC9B,CAAC;QAEF,OAAO,gBAAgB,CAAC;IAC5B,CAAC;CACJ;AAED;;;;;GAKG;AACH,SAAS,oBAAoB,CAAC,QAAkB;IAC5C,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;QACjB,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC;IACxD,CAAC;IAED,MAAM,WAAW,GAAG,IAAI,eAAe,EAAE,CAAC;IAC1C,MAAM,WAAW,GAAG,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;IAE/E,MAAM,eAAe,GAAG,IAAI,eAAe,CAAC,WAAW,CAAC,QAAQ,EAAE;QAC9D,OAAO,EAAE,QAAQ,CAAC,OAAO;QACzB,MAAM,EAAE,QAAQ,CAAC,MAAM;QACvB,UAAU,EAAE,QAAQ,CAAC,UAAU;QAC/B,GAAG,EAAE,QAAQ,CAAC,GAAG;KACpB,CAAC,CAAC;IAEH,OAAO,EAAE,QAAQ,EAAE,eAAe,EAAE,WAAW,EAAE,CAAC;AACtD,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,MAAM,UAAU,gBAAgB,CAG9B,MAAwC;IACtC,OAAO,MAAM,CAAC,MAAM,CAAU,MAAM,CAAC,CAAC;AAC1C,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@crawlee/http",
|
|
3
|
-
"version": "4.0.0-beta.
|
|
3
|
+
"version": "4.0.0-beta.26",
|
|
4
4
|
"description": "The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.",
|
|
5
5
|
"engines": {
|
|
6
6
|
"node": ">=22.0.0"
|
|
@@ -49,10 +49,10 @@
|
|
|
49
49
|
"dependencies": {
|
|
50
50
|
"@apify/timeout": "^0.3.2",
|
|
51
51
|
"@apify/utilities": "^2.15.5",
|
|
52
|
-
"@crawlee/basic": "4.0.0-beta.
|
|
53
|
-
"@crawlee/http-client": "4.0.0-beta.
|
|
54
|
-
"@crawlee/types": "4.0.0-beta.
|
|
55
|
-
"@crawlee/utils": "4.0.0-beta.
|
|
52
|
+
"@crawlee/basic": "4.0.0-beta.26",
|
|
53
|
+
"@crawlee/http-client": "4.0.0-beta.26",
|
|
54
|
+
"@crawlee/types": "4.0.0-beta.26",
|
|
55
|
+
"@crawlee/utils": "4.0.0-beta.26",
|
|
56
56
|
"@types/content-type": "^1.1.8",
|
|
57
57
|
"cheerio": "^1.0.0",
|
|
58
58
|
"content-type": "^1.0.5",
|
|
@@ -69,5 +69,5 @@
|
|
|
69
69
|
}
|
|
70
70
|
}
|
|
71
71
|
},
|
|
72
|
-
"gitHead": "
|
|
72
|
+
"gitHead": "e5398dea2e3eb898ec9a05f81f48a986b85fae7a"
|
|
73
73
|
}
|