@crawlee/http 4.0.0-beta.4 → 4.0.0-beta.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -5
- package/internals/file-download.d.ts +58 -32
- package/internals/file-download.d.ts.map +1 -1
- package/internals/file-download.js +116 -73
- package/internals/file-download.js.map +1 -1
- package/internals/http-crawler.d.ts +92 -175
- package/internals/http-crawler.d.ts.map +1 -1
- package/internals/http-crawler.js +169 -321
- package/internals/http-crawler.js.map +1 -1
- package/internals/utils.d.ts +14 -0
- package/internals/utils.d.ts.map +1 -0
- package/internals/utils.js +71 -0
- package/internals/utils.js.map +1 -0
- package/package.json +7 -7
- package/tsconfig.build.tsbuildinfo +0 -1
package/README.md
CHANGED
|
@@ -9,6 +9,10 @@
|
|
|
9
9
|
<small>A web scraping and browser automation library</small>
|
|
10
10
|
</h1>
|
|
11
11
|
|
|
12
|
+
<p align=center>
|
|
13
|
+
<a href="https://trendshift.io/repositories/5179" target="_blank"><img src="https://trendshift.io/api/badge/repositories/5179" alt="apify%2Fcrawlee | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
|
|
14
|
+
</p>
|
|
15
|
+
|
|
12
16
|
<p align=center>
|
|
13
17
|
<a href="https://www.npmjs.com/package/@crawlee/core" rel="nofollow"><img src="https://img.shields.io/npm/v/@crawlee/core.svg" alt="NPM latest version" data-canonical-src="https://img.shields.io/npm/v/@crawlee/core/next.svg" style="max-width: 100%;"></a>
|
|
14
18
|
<a href="https://www.npmjs.com/package/@crawlee/core" rel="nofollow"><img src="https://img.shields.io/npm/dm/@crawlee/core.svg" alt="Downloads" data-canonical-src="https://img.shields.io/npm/dm/@crawlee/core.svg" style="max-width: 100%;"></a>
|
|
@@ -24,7 +28,7 @@ Crawlee is available as the [`crawlee`](https://www.npmjs.com/package/crawlee) N
|
|
|
24
28
|
|
|
25
29
|
> 👉 **View full documentation, guides and examples on the [Crawlee project website](https://crawlee.dev)** 👈
|
|
26
30
|
|
|
27
|
-
>
|
|
31
|
+
> Do you prefer 🐍 Python instead of JavaScript? [👉 Checkout Crawlee for Python 👈](https://github.com/apify/crawlee-python).
|
|
28
32
|
|
|
29
33
|
## Installation
|
|
30
34
|
|
|
@@ -85,7 +89,7 @@ By default, Crawlee stores data to `./storage` in the current working directory.
|
|
|
85
89
|
We provide automated beta builds for every merged code change in Crawlee. You can find them in the npm [list of releases](https://www.npmjs.com/package/crawlee?activeTab=versions). If you want to test new features or bug fixes before we release them, feel free to install a beta build like this:
|
|
86
90
|
|
|
87
91
|
```bash
|
|
88
|
-
npm install crawlee@
|
|
92
|
+
npm install crawlee@next
|
|
89
93
|
```
|
|
90
94
|
|
|
91
95
|
If you also use the [Apify SDK](https://github.com/apify/apify-sdk-js), you need to specify dependency overrides in your `package.json` file so that you don't end up with multiple versions of Crawlee installed:
|
|
@@ -94,9 +98,9 @@ If you also use the [Apify SDK](https://github.com/apify/apify-sdk-js), you need
|
|
|
94
98
|
{
|
|
95
99
|
"overrides": {
|
|
96
100
|
"apify": {
|
|
97
|
-
"@crawlee/core": "
|
|
98
|
-
"@crawlee/types": "
|
|
99
|
-
"@crawlee/utils": "
|
|
101
|
+
"@crawlee/core": "$crawlee",
|
|
102
|
+
"@crawlee/types": "$crawlee",
|
|
103
|
+
"@crawlee/utils": "$crawlee"
|
|
100
104
|
}
|
|
101
105
|
}
|
|
102
106
|
}
|
|
@@ -1,29 +1,47 @@
|
|
|
1
|
+
import { Transform } from 'node:stream';
|
|
2
|
+
import type { BasicCrawlerOptions } from '@crawlee/basic';
|
|
3
|
+
import { BasicCrawler } from '@crawlee/basic';
|
|
4
|
+
import type { CrawlingContext, LoadedRequest, Request } from '@crawlee/core';
|
|
5
|
+
import { ResponseWithUrl } from '@crawlee/http-client';
|
|
1
6
|
import type { Dictionary } from '@crawlee/types';
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
export
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
JSONData extends Dictionary = any> = (Omit<HttpCrawlerOptions<FileDownloadCrawlingContext<UserData, JSONData>>, 'requestHandler'> & {
|
|
14
|
-
requestHandler?: never;
|
|
15
|
-
streamHandler?: StreamHandler;
|
|
16
|
-
}) | (Omit<HttpCrawlerOptions<FileDownloadCrawlingContext<UserData, JSONData>>, 'requestHandler'> & {
|
|
17
|
-
requestHandler: FileDownloadRequestHandler;
|
|
18
|
-
streamHandler?: never;
|
|
19
|
-
});
|
|
20
|
-
export type FileDownloadHook<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
|
|
21
|
-
JSONData extends Dictionary = any> = InternalHttpHook<FileDownloadCrawlingContext<UserData, JSONData>>;
|
|
22
|
-
export interface FileDownloadCrawlingContext<UserData extends Dictionary = any, // with default to Dictionary we cant use a typed router in untyped crawler
|
|
23
|
-
JSONData extends Dictionary = any> extends InternalHttpCrawlingContext<UserData, JSONData, FileDownload> {
|
|
7
|
+
import type { ErrorHandler, GetUserDataFromRequest, InternalHttpHook, RequestHandler, RouterRoutes } from '../index.js';
|
|
8
|
+
declare const kBodyDrained: unique symbol;
|
|
9
|
+
export type FileDownloadErrorHandler<UserData extends Dictionary = any> = ErrorHandler<FileDownloadCrawlingContext<UserData>>;
|
|
10
|
+
export type FileDownloadHook<UserData extends Dictionary = any> = InternalHttpHook<FileDownloadCrawlingContext<UserData>>;
|
|
11
|
+
export interface FileDownloadCrawlingContext<UserData extends Dictionary = any> extends CrawlingContext<UserData> {
|
|
12
|
+
request: LoadedRequest<Request<UserData>>;
|
|
13
|
+
response: Response;
|
|
14
|
+
contentType: {
|
|
15
|
+
type: string;
|
|
16
|
+
encoding: BufferEncoding;
|
|
17
|
+
};
|
|
24
18
|
}
|
|
25
|
-
export type FileDownloadRequestHandler<UserData extends Dictionary = any
|
|
26
|
-
|
|
19
|
+
export type FileDownloadRequestHandler<UserData extends Dictionary = any> = RequestHandler<FileDownloadCrawlingContext<UserData>>;
|
|
20
|
+
/**
|
|
21
|
+
* Creates a transform stream that throws an error if the source data speed is below the specified minimum speed.
|
|
22
|
+
* This `Transform` checks the amount of data every `checkProgressInterval` milliseconds.
|
|
23
|
+
* If the stream has received less than `minSpeedKbps * historyLengthMs / 1000` bytes in the last `historyLengthMs` milliseconds,
|
|
24
|
+
* it will throw an error.
|
|
25
|
+
*
|
|
26
|
+
* Can be used e.g. to abort a download if the network speed is too slow.
|
|
27
|
+
* @returns Transform stream that monitors the speed of the incoming data.
|
|
28
|
+
*/
|
|
29
|
+
export declare function MinimumSpeedStream({ minSpeedKbps, historyLengthMs, checkProgressInterval: checkProgressIntervalMs, }: {
|
|
30
|
+
minSpeedKbps: number;
|
|
31
|
+
historyLengthMs?: number;
|
|
32
|
+
checkProgressInterval?: number;
|
|
33
|
+
}): Transform;
|
|
34
|
+
/**
|
|
35
|
+
* Creates a transform stream that logs the progress of the incoming data.
|
|
36
|
+
* This `Transform` calls the `logProgress` function every `loggingInterval` milliseconds with the number of bytes received so far.
|
|
37
|
+
*
|
|
38
|
+
* Can be used e.g. to log the progress of a download.
|
|
39
|
+
* @returns Transform stream logging the progress of the incoming data.
|
|
40
|
+
*/
|
|
41
|
+
export declare function ByteCounterStream({ logTransferredBytes, loggingInterval, }: {
|
|
42
|
+
logTransferredBytes: (transferredBytes: number) => void;
|
|
43
|
+
loggingInterval?: number;
|
|
44
|
+
}): Transform;
|
|
27
45
|
/**
|
|
28
46
|
* Provides a framework for downloading files in parallel using plain HTTP requests. The URLs to download are fed either from a static list of URLs or they can be added on the fly from another crawler.
|
|
29
47
|
*
|
|
@@ -39,11 +57,11 @@ JSONData extends Dictionary = any> = RequestHandler<FileDownloadCrawlingContext<
|
|
|
39
57
|
*
|
|
40
58
|
* The crawler finishes when there are no more {@link Request} objects to crawl.
|
|
41
59
|
*
|
|
42
|
-
* We can use the `preNavigationHooks` to adjust
|
|
60
|
+
* We can use the `preNavigationHooks` to adjust the crawling context before the request is made:
|
|
43
61
|
*
|
|
44
62
|
* ```
|
|
45
63
|
* preNavigationHooks: [
|
|
46
|
-
* (crawlingContext
|
|
64
|
+
* (crawlingContext) => {
|
|
47
65
|
* // ...
|
|
48
66
|
* },
|
|
49
67
|
* ]
|
|
@@ -67,11 +85,19 @@ JSONData extends Dictionary = any> = RequestHandler<FileDownloadCrawlingContext<
|
|
|
67
85
|
* ]);
|
|
68
86
|
* ```
|
|
69
87
|
*/
|
|
70
|
-
export declare class FileDownload extends
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
protected
|
|
74
|
-
|
|
88
|
+
export declare class FileDownload extends BasicCrawler<FileDownloadCrawlingContext> {
|
|
89
|
+
constructor(options?: BasicCrawlerOptions<FileDownloadCrawlingContext>);
|
|
90
|
+
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
91
|
+
protected buildContextPipeline(): import("@crawlee/basic").ContextPipeline<CrawlingContext<Dictionary>, CrawlingContext<Dictionary> & {
|
|
92
|
+
request: LoadedRequest<Request>;
|
|
93
|
+
response: ResponseWithUrl;
|
|
94
|
+
contentType: {
|
|
95
|
+
type: string;
|
|
96
|
+
encoding: BufferEncoding;
|
|
97
|
+
};
|
|
98
|
+
[kBodyDrained]: Promise<void>;
|
|
99
|
+
}>;
|
|
100
|
+
private initiateDownload;
|
|
75
101
|
}
|
|
76
102
|
/**
|
|
77
103
|
* Creates new {@link Router} instance that works based on request labels.
|
|
@@ -98,6 +124,6 @@ export declare class FileDownload extends HttpCrawler<FileDownloadCrawlingContex
|
|
|
98
124
|
* ```
|
|
99
125
|
*/
|
|
100
126
|
// @ts-ignore optional peer dependency or compatibility with es2022
|
|
101
|
-
export declare function createFileRouter<Context extends FileDownloadCrawlingContext = FileDownloadCrawlingContext, UserData extends Dictionary = GetUserDataFromRequest<Context['request']>>(routes?: RouterRoutes<Context, UserData>): import("
|
|
127
|
+
export declare function createFileRouter<Context extends FileDownloadCrawlingContext = FileDownloadCrawlingContext, UserData extends Dictionary = GetUserDataFromRequest<Context['request']>>(routes?: RouterRoutes<Context, UserData>): import("@crawlee/basic").RouterHandler<Context>;
|
|
102
128
|
export {};
|
|
103
129
|
//# sourceMappingURL=file-download.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"file-download.d.ts","sourceRoot":"","sources":["../../src/internals/file-download.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"file-download.d.ts","sourceRoot":"","sources":["../../src/internals/file-download.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAExC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,gBAAgB,CAAC;AAC1D,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAC9C,OAAO,KAAK,EAAE,eAAe,EAAE,aAAa,EAAE,OAAO,EAAE,MAAM,eAAe,CAAC;AAC7E,OAAO,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AACvD,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAEjD,OAAO,KAAK,EAAE,YAAY,EAAE,sBAAsB,EAAE,gBAAgB,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAIxH,QAAA,MAAM,YAAY,eAAwB,CAAC;AAE3C,MAAM,MAAM,wBAAwB,CAChC,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,YAAY,CAAC,2BAA2B,CAAC,QAAQ,CAAC,CAAC,CAAC;AAExD,MAAM,MAAM,gBAAgB,CACxB,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,gBAAgB,CAAC,2BAA2B,CAAC,QAAQ,CAAC,CAAC,CAAC;AAE5D,MAAM,WAAW,2BAA2B,CACxC,QAAQ,SAAS,UAAU,GAAG,GAAG,CACnC,SAAQ,eAAe,CAAC,QAAQ,CAAC;IAC/B,OAAO,EAAE,aAAa,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC;IAC1C,QAAQ,EAAE,QAAQ,CAAC;IACnB,WAAW,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,cAAc,CAAA;KAAE,CAAC;CAC3D;AAED,MAAM,MAAM,0BAA0B,CAClC,QAAQ,SAAS,UAAU,GAAG,GAAG,IACjC,cAAc,CAAC,2BAA2B,CAAC,QAAQ,CAAC,CAAC,CAAC;AAE1D;;;;;;;;GAQG;AACH,wBAAgB,kBAAkB,CAAC,EAC/B,YAAY,EACZ,eAAsB,EACtB,qBAAqB,EAAE,uBAA6B,GACvD,EAAE;IACC,YAAY,EAAE,MAAM,CAAC;IACrB,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,qBAAqB,CAAC,EAAE,MAAM,CAAC;CAClC,GAAG,SAAS,CA4BZ;AAED;;;;;;GAMG;AACH,wBAAgB,iBAAiB,CAAC,EAC9B,mBAAmB,EACnB,eAAsB,GACzB,EAAE;IACC,mBAAmB,EAAE,CAAC,gBAAgB,EAAE,MAAM,KAAK,IAAI,CAAC;IACxD,eAAe,CAAC,EAAE,MAAM,CAAC;CAC5B,GAAG,SAAS,CAoBZ;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA0CG;AACH,qBAAa,YAAa,SAAQ,YAAY,CAAC,2BAA2B,CAAC;gBAE3D,OAAO,GAAE,mBAAmB,CAAC,2BAA2B,CAAM;cAOvD,oBAAoB;iBA2BH,aAAa,CAAC,OAAO,CAAC;;;;;;;;YAZ5C,gBAAgB;CAoBjC;AA0BD;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,wBAAgB,gBAAgB,CAC5B,OAAO,SAAS,2BAA2B,GAAG,2BAA2B,EACzE,QAAQ,SAAS,UAAU,GAAG,sBAAsB,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,EAC1E,MAAM,CAAC,EAAE,YAAY,CAAC,OAAO,EAAE,QAAQ,CAAC,mDAEzC"}
|
|
@@ -1,6 +1,67 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
3
|
-
import {
|
|
1
|
+
import { Transform } from 'node:stream';
|
|
2
|
+
import { BasicCrawler } from '@crawlee/basic';
|
|
3
|
+
import { ResponseWithUrl } from '@crawlee/http-client';
|
|
4
|
+
import { Router } from '../index.js';
|
|
5
|
+
import { parseContentTypeFromResponse } from './utils.js';
|
|
6
|
+
const kBodyDrained = Symbol('bodyDrained');
|
|
7
|
+
/**
|
|
8
|
+
* Creates a transform stream that throws an error if the source data speed is below the specified minimum speed.
|
|
9
|
+
* This `Transform` checks the amount of data every `checkProgressInterval` milliseconds.
|
|
10
|
+
* If the stream has received less than `minSpeedKbps * historyLengthMs / 1000` bytes in the last `historyLengthMs` milliseconds,
|
|
11
|
+
* it will throw an error.
|
|
12
|
+
*
|
|
13
|
+
* Can be used e.g. to abort a download if the network speed is too slow.
|
|
14
|
+
* @returns Transform stream that monitors the speed of the incoming data.
|
|
15
|
+
*/
|
|
16
|
+
export function MinimumSpeedStream({ minSpeedKbps, historyLengthMs = 10e3, checkProgressInterval: checkProgressIntervalMs = 5e3, }) {
|
|
17
|
+
let snapshots = [];
|
|
18
|
+
const checkInterval = setInterval(() => {
|
|
19
|
+
const now = Date.now();
|
|
20
|
+
snapshots = snapshots.filter((snapshot) => now - snapshot.timestamp < historyLengthMs);
|
|
21
|
+
const totalBytes = snapshots.reduce((acc, snapshot) => acc + snapshot.bytes, 0);
|
|
22
|
+
const elapsed = (now - (snapshots[0]?.timestamp ?? 0)) / 1000;
|
|
23
|
+
if (totalBytes / 1024 / elapsed < minSpeedKbps) {
|
|
24
|
+
clearInterval(checkInterval);
|
|
25
|
+
stream.emit('error', new Error(`Stream speed too slow, aborting...`));
|
|
26
|
+
}
|
|
27
|
+
}, checkProgressIntervalMs);
|
|
28
|
+
const stream = new Transform({
|
|
29
|
+
transform: (chunk, _, callback) => {
|
|
30
|
+
snapshots.push({ timestamp: Date.now(), bytes: chunk.length });
|
|
31
|
+
callback(null, chunk);
|
|
32
|
+
},
|
|
33
|
+
final: (callback) => {
|
|
34
|
+
clearInterval(checkInterval);
|
|
35
|
+
callback();
|
|
36
|
+
},
|
|
37
|
+
});
|
|
38
|
+
return stream;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Creates a transform stream that logs the progress of the incoming data.
|
|
42
|
+
* This `Transform` calls the `logProgress` function every `loggingInterval` milliseconds with the number of bytes received so far.
|
|
43
|
+
*
|
|
44
|
+
* Can be used e.g. to log the progress of a download.
|
|
45
|
+
* @returns Transform stream logging the progress of the incoming data.
|
|
46
|
+
*/
|
|
47
|
+
export function ByteCounterStream({ logTransferredBytes, loggingInterval = 5000, }) {
|
|
48
|
+
let transferredBytes = 0;
|
|
49
|
+
let lastLogTime = Date.now();
|
|
50
|
+
return new Transform({
|
|
51
|
+
transform: (chunk, _, callback) => {
|
|
52
|
+
transferredBytes += chunk.length;
|
|
53
|
+
if (Date.now() - lastLogTime > loggingInterval) {
|
|
54
|
+
lastLogTime = Date.now();
|
|
55
|
+
logTransferredBytes(transferredBytes);
|
|
56
|
+
}
|
|
57
|
+
callback(null, chunk);
|
|
58
|
+
},
|
|
59
|
+
flush: (callback) => {
|
|
60
|
+
logTransferredBytes(transferredBytes);
|
|
61
|
+
callback();
|
|
62
|
+
},
|
|
63
|
+
});
|
|
64
|
+
}
|
|
4
65
|
/**
|
|
5
66
|
* Provides a framework for downloading files in parallel using plain HTTP requests. The URLs to download are fed either from a static list of URLs or they can be added on the fly from another crawler.
|
|
6
67
|
*
|
|
@@ -16,11 +77,11 @@ import { HttpCrawler, Router } from '../index.js';
|
|
|
16
77
|
*
|
|
17
78
|
* The crawler finishes when there are no more {@link Request} objects to crawl.
|
|
18
79
|
*
|
|
19
|
-
* We can use the `preNavigationHooks` to adjust
|
|
80
|
+
* We can use the `preNavigationHooks` to adjust the crawling context before the request is made:
|
|
20
81
|
*
|
|
21
82
|
* ```
|
|
22
83
|
* preNavigationHooks: [
|
|
23
|
-
* (crawlingContext
|
|
84
|
+
* (crawlingContext) => {
|
|
24
85
|
* // ...
|
|
25
86
|
* },
|
|
26
87
|
* ]
|
|
@@ -44,80 +105,62 @@ import { HttpCrawler, Router } from '../index.js';
|
|
|
44
105
|
* ]);
|
|
45
106
|
* ```
|
|
46
107
|
*/
|
|
47
|
-
export class FileDownload extends
|
|
48
|
-
|
|
108
|
+
export class FileDownload extends BasicCrawler {
|
|
109
|
+
// TODO hooks
|
|
49
110
|
constructor(options = {}) {
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
options.requestHandlerTimeoutSecs = options.navigationTimeoutSecs ?? 120;
|
|
55
|
-
}
|
|
56
|
-
super(options);
|
|
57
|
-
this.streamHandler = streamHandler;
|
|
58
|
-
if (this.streamHandler) {
|
|
59
|
-
this.requestHandler = this.streamRequestHandler;
|
|
60
|
-
}
|
|
61
|
-
// The base HttpCrawler class only supports a handful of text based mime types.
|
|
62
|
-
// With the FileDownload crawler, we want to download any file type.
|
|
63
|
-
this.supportedMimeTypes = new Set(['*/*']);
|
|
111
|
+
super({
|
|
112
|
+
...options,
|
|
113
|
+
contextPipelineBuilder: () => this.buildContextPipeline(),
|
|
114
|
+
});
|
|
64
115
|
}
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
context
|
|
68
|
-
|
|
69
|
-
|
|
116
|
+
buildContextPipeline() {
|
|
117
|
+
return super.buildContextPipeline().compose({
|
|
118
|
+
action: async (context) => this.initiateDownload(context),
|
|
119
|
+
cleanup: async (context) => {
|
|
120
|
+
if (!context.response.bodyUsed) {
|
|
121
|
+
// Nobody consumed the body — cancel it so the
|
|
122
|
+
// underlying connection can be released.
|
|
123
|
+
await context.response.body?.cancel();
|
|
124
|
+
}
|
|
125
|
+
await context[kBodyDrained];
|
|
126
|
+
},
|
|
127
|
+
});
|
|
70
128
|
}
|
|
71
|
-
async
|
|
72
|
-
const
|
|
73
|
-
|
|
74
|
-
url,
|
|
75
|
-
timeout: { request: undefined },
|
|
76
|
-
proxyUrl: context.proxyInfo?.url,
|
|
129
|
+
async initiateDownload(context) {
|
|
130
|
+
const response = await this.httpClient.sendRequest(context.request.intoFetchAPIRequest(), {
|
|
131
|
+
session: context.session,
|
|
77
132
|
});
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
133
|
+
const { type, charset: encoding } = parseContentTypeFromResponse(response);
|
|
134
|
+
context.request.url = response.url;
|
|
135
|
+
const { response: trackedResponse, bodyDrained } = trackBodyConsumption(response);
|
|
136
|
+
const contextExtension = {
|
|
137
|
+
request: context.request,
|
|
138
|
+
response: trackedResponse,
|
|
139
|
+
contentType: { type, encoding },
|
|
140
|
+
[kBodyDrained]: bodyDrained,
|
|
82
141
|
};
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
try {
|
|
96
|
-
context.stream = response.stream;
|
|
97
|
-
context.response = response;
|
|
98
|
-
streamHandlerResult = this.streamHandler(context);
|
|
99
|
-
}
|
|
100
|
-
catch (e) {
|
|
101
|
-
cleanUp();
|
|
102
|
-
reject(e);
|
|
103
|
-
}
|
|
104
|
-
if (isPromise(streamHandlerResult)) {
|
|
105
|
-
streamHandlerResult
|
|
106
|
-
.then(() => {
|
|
107
|
-
resolve();
|
|
108
|
-
})
|
|
109
|
-
.catch((e) => {
|
|
110
|
-
cleanUp();
|
|
111
|
-
reject(e);
|
|
112
|
-
});
|
|
113
|
-
}
|
|
114
|
-
else {
|
|
115
|
-
resolve();
|
|
116
|
-
}
|
|
117
|
-
});
|
|
118
|
-
await Promise.all([downloadPromise, finished(response.stream)]);
|
|
119
|
-
cleanUp();
|
|
142
|
+
return contextExtension;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* Wraps a Response so that we can track when the body stream has been fully
|
|
147
|
+
* consumed (or errored). Pipes the original body through a TransformStream;
|
|
148
|
+
* the readable side becomes the new Response body, and `pipeTo` gives us a
|
|
149
|
+
* promise that resolves once the body is fully read or cancelled.
|
|
150
|
+
*/
|
|
151
|
+
function trackBodyConsumption(response) {
|
|
152
|
+
if (!response.body) {
|
|
153
|
+
return { response, bodyDrained: Promise.resolve() };
|
|
120
154
|
}
|
|
155
|
+
const passthrough = new TransformStream();
|
|
156
|
+
const bodyDrained = response.body.pipeTo(passthrough.writable).catch(() => { });
|
|
157
|
+
const trackedResponse = new ResponseWithUrl(passthrough.readable, {
|
|
158
|
+
headers: response.headers,
|
|
159
|
+
status: response.status,
|
|
160
|
+
statusText: response.statusText,
|
|
161
|
+
url: response.url,
|
|
162
|
+
});
|
|
163
|
+
return { response: trackedResponse, bodyDrained };
|
|
121
164
|
}
|
|
122
165
|
/**
|
|
123
166
|
* Creates new {@link Router} instance that works based on request labels.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"file-download.js","sourceRoot":"","sources":["../../src/internals/file-download.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"file-download.js","sourceRoot":"","sources":["../../src/internals/file-download.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAGxC,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAE9C,OAAO,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAIvD,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACrC,OAAO,EAAE,4BAA4B,EAAE,MAAM,YAAY,CAAC;AAE1D,MAAM,YAAY,GAAG,MAAM,CAAC,aAAa,CAAC,CAAC;AAsB3C;;;;;;;;GAQG;AACH,MAAM,UAAU,kBAAkB,CAAC,EAC/B,YAAY,EACZ,eAAe,GAAG,IAAI,EACtB,qBAAqB,EAAE,uBAAuB,GAAG,GAAG,GAKvD;IACG,IAAI,SAAS,GAA2C,EAAE,CAAC;IAE3D,MAAM,aAAa,GAAG,WAAW,CAAC,GAAG,EAAE;QACnC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAEvB,SAAS,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE,EAAE,CAAC,GAAG,GAAG,QAAQ,CAAC,SAAS,GAAG,eAAe,CAAC,CAAC;QACvF,MAAM,UAAU,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,QAAQ,EAAE,EAAE,CAAC,GAAG,GAAG,QAAQ,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;QAChF,MAAM,OAAO,GAAG,CAAC,GAAG,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,IAAI,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC;QAE9D,IAAI,UAAU,GAAG,IAAI,GAAG,OAAO,GAAG,YAAY,EAAE,CAAC;YAC7C,aAAa,CAAC,aAAa,CAAC,CAAC;YAC7B,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,KAAK,CAAC,oCAAoC,CAAC,CAAC,CAAC;QAC1E,CAAC;IACL,CAAC,EAAE,uBAAuB,CAAC,CAAC;IAE5B,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC;QACzB,SAAS,EAAE,CAAC,KAAK,EAAE,CAAC,EAAE,QAAQ,EAAE,EAAE;YAC9B,SAAS,CAAC,IAAI,CAAC,EAAE,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE,KAAK,EAAE,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;YAC/D,QAAQ,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;QAC1B,CAAC;QACD,KAAK,EAAE,CAAC,QAAQ,EAAE,EAAE;YAChB,aAAa,CAAC,aAAa,CAAC,CAAC;YAC7B,QAAQ,EAAE,CAAC;QACf,CAAC;KACJ,CAAC,CAAC;IAEH,OAAO,MAAM,CAAC;AAClB,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,iBAAiB,CAAC,EAC9B,mBAAmB,EACnB,eAAe,GAAG,IAAI,GAIzB;IACG,IAAI,gBAAgB,GAAG,CAAC,CAAC;IACzB,IAAI,WAAW,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE7B,OAAO,IAAI,SAAS,CAAC;QACjB,SAAS,EAAE,CAAC,KAAK,EAAE,CAAC,EAAE,QAAQ,EAAE,EAAE;YAC9B,gBAAgB,IAAI,KAAK,CAAC,MAAM,CAAC;YAEjC,IAAI,IAAI,CAAC,GAAG,EAAE,GAAG,WAAW,GAAG,eAAe,EAAE,CAAC;gBAC7C,WAAW,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;gBACzB,mBAAmB,CAAC,gBAAgB,CAAC,CAAC;YAC1C,CAAC;YAED,QAAQ,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;QAC1B,CAAC;QACD,KAAK,EAAE,CAAC,QAAQ,EAAE,EAAE;YAChB,mBAAmB,CAAC,gBAAgB,CAAC,CAAC;YACtC,QAAQ,EAAE,CAAC;QACf,CAAC;KACJ,CAAC,CAAC;AACP,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA0CG;AACH,MAAM,OAAO,YAAa,SAAQ,YAAyC;IACvE,aAAa;IACb,YAAY,UAA4D,EAAE;QACtE,KAAK,CAAC;YACF,GAAG,OAAO;YACV,sBAAsB,EAAE,GAAG,EAAE,CAAC,IAAI,CAAC,oBAAoB,EAAE;SAC5D,CAAC,CAAC;IACP,CAAC;IAEkB,oBAAoB;QACnC,OAAO,KAAK,CAAC,oBAAoB,EAAE,CAAC,OAAO,CAAC;YACxC,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,CAAC,IAAI,CAAC,gBAAgB,CAAC,OAAO,CAAC;YACzD,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE;gBACvB,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,QAAQ,EAAE,CAAC;oBAC7B,8CAA8C;oBAC9C,yCAAyC;oBACzC,MAAM,OAAO,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,EAAE,CAAC;gBAC1C,CAAC;gBAED,MAAO,OAA6C,CAAC,YAAY,CAAC,CAAC;YACvE,CAAC;SACJ,CAAC,CAAC;IACP,CAAC;IAEO,KAAK,CAAC,gBAAgB,CAAC,OAAwB;QACnD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC,OAAO,CAAC,OAAO,CAAC,mBAAmB,EAAE,EAAE;YACtF,OAAO,EAAE,OAAO,CAAC,OAAO;SAC3B,CAAC,CAAC;QAEH,MAAM,EAAE,IAAI,EAAE,OAAO,EAAE,QAAQ,EAAE,GAAG,4BAA4B,CAAC,QAAQ,CAAC,CAAC;QAE3E,OAAO,CAAC,OAAO,CAAC,GAAG,GAAG,QAAQ,CAAC,GAAG,CAAC;QAEnC,MAAM,EAAE,QAAQ,EAAE,eAAe,EAAE,WAAW,EAAE,GAAG,oBAAoB,CAAC,QAAQ,CAAC,CAAC;QAElF,MAAM,gBAAgB,GAAG;YACrB,OAAO,EAAE,OAAO,CAAC,OAAiC;YAClD,QAAQ,EAAE,eAAe;YACzB,WAAW,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE;YAC/B,CAAC,YAAY,CAAC,EAAE,WAAW;SAC9B,CAAC;QAEF,OAAO,gBAAgB,CAAC;IAC5B,CAAC;CACJ;AAED;;;;;GAKG;AACH,SAAS,oBAAoB,CAAC,QAAkB;IAC5C,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;QACjB,OAAO,EAAE,QAAQ,EAAE,WAAW,EAAE,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC;IACxD,CAAC;IAED,MAAM,WAAW,GAAG,IAAI,eAAe,EAAE,CAAC;IAC1C,MAAM,WAAW,GAAG,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,GAAE,CAAC,CAAC,CAAC;IAE/E,MAAM,eAAe,GAAG,IAAI,eAAe,CAAC,WAAW,CAAC,QAAQ,EAAE;QAC9D,OAAO,EAAE,QAAQ,CAAC,OAAO;QACzB,MAAM,EAAE,QAAQ,CAAC,MAAM;QACvB,UAAU,EAAE,QAAQ,CAAC,UAAU;QAC/B,GAAG,EAAE,QAAQ,CAAC,GAAG;KACpB,CAAC,CAAC;IAEH,OAAO,EAAE,QAAQ,EAAE,eAAe,EAAE,WAAW,EAAE,CAAC;AACtD,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,MAAM,UAAU,gBAAgB,CAG9B,MAAwC;IACtC,OAAO,MAAM,CAAC,MAAM,CAAU,MAAM,CAAC,CAAC;AAC1C,CAAC"}
|