@milaboratories/pl-drivers 1.7.0 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/clients/download.d.ts +7 -5
- package/dist/clients/download.d.ts.map +1 -1
- package/dist/drivers/download_blob/download_blob.d.ts.map +1 -1
- package/dist/drivers/download_blob/download_blob_task.d.ts.map +1 -1
- package/dist/drivers/download_blob_url/task.d.ts.map +1 -1
- package/dist/drivers/download_url/task.d.ts.map +1 -1
- package/dist/drivers/helpers/read_file.d.ts +7 -0
- package/dist/drivers/helpers/read_file.d.ts.map +1 -0
- package/dist/helpers/download.d.ts +6 -4
- package/dist/helpers/download.d.ts.map +1 -1
- package/dist/index.js +2 -2
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +1604 -1564
- package/dist/index.mjs.map +1 -1
- package/package.json +8 -8
- package/src/clients/download.test.ts +12 -3
- package/src/clients/download.ts +45 -22
- package/src/clients/upload.ts +2 -2
- package/src/drivers/download_blob/download_blob.ts +35 -34
- package/src/drivers/download_blob/download_blob_task.ts +16 -13
- package/src/drivers/download_blob_url/task.ts +57 -52
- package/src/drivers/download_url/task.ts +21 -14
- package/src/drivers/helpers/read_file.ts +32 -0
- package/src/helpers/download.ts +38 -14
- package/dist/clients/helpers.d.ts +0 -6
- package/dist/clients/helpers.d.ts.map +0 -1
- package/src/clients/helpers.ts +0 -12
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@milaboratories/pl-drivers",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.8.0",
|
|
4
4
|
"engines": {
|
|
5
5
|
"node": ">=20"
|
|
6
6
|
},
|
|
@@ -31,12 +31,12 @@
|
|
|
31
31
|
"undici": "~7.10.0",
|
|
32
32
|
"zod": "~3.23.8",
|
|
33
33
|
"upath": "^2.0.1",
|
|
34
|
+
"@milaboratories/ts-helpers": "^1.4.3",
|
|
34
35
|
"@milaboratories/helpers": "^1.6.19",
|
|
35
|
-
"@milaboratories/
|
|
36
|
-
"@milaboratories/
|
|
37
|
-
"@milaboratories/pl-
|
|
38
|
-
"@milaboratories/pl-
|
|
39
|
-
"@milaboratories/pl-model-common": "^1.19.0"
|
|
36
|
+
"@milaboratories/pl-client": "^2.11.6",
|
|
37
|
+
"@milaboratories/computable": "^2.6.3",
|
|
38
|
+
"@milaboratories/pl-model-common": "^1.19.1",
|
|
39
|
+
"@milaboratories/pl-tree": "^1.7.5"
|
|
40
40
|
},
|
|
41
41
|
"devDependencies": {
|
|
42
42
|
"eslint": "^9.25.1",
|
|
@@ -47,8 +47,8 @@
|
|
|
47
47
|
"vitest": "^2.1.9",
|
|
48
48
|
"@vitest/coverage-v8": "^2.1.9",
|
|
49
49
|
"@types/tar-fs": "^2.0.4",
|
|
50
|
-
"@milaboratories/
|
|
51
|
-
"@milaboratories/
|
|
50
|
+
"@milaboratories/eslint-config": "^1.0.4",
|
|
51
|
+
"@milaboratories/build-configs": "1.0.5"
|
|
52
52
|
},
|
|
53
53
|
"scripts": {
|
|
54
54
|
"type-check": "tsc --noEmit --composite false",
|
|
@@ -45,10 +45,19 @@ test('client download from a local file', async () => {
|
|
|
45
45
|
]),
|
|
46
46
|
});
|
|
47
47
|
|
|
48
|
-
const
|
|
48
|
+
const result = await clientDownload.withLocalFileContent(
|
|
49
|
+
`storage://tmp/${fName}`,
|
|
50
|
+
{},
|
|
51
|
+
async (content, size) => {
|
|
52
|
+
expect(size).toBe(2);
|
|
53
|
+
const textContent = await text(content);
|
|
54
|
+
expect(textContent).toBe('42');
|
|
55
|
+
return { size, textContent };
|
|
56
|
+
}
|
|
57
|
+
);
|
|
49
58
|
|
|
50
|
-
expect(
|
|
51
|
-
expect(
|
|
59
|
+
expect(result.size).toBe(2);
|
|
60
|
+
expect(result.textContent).toBe('42');
|
|
52
61
|
|
|
53
62
|
await fs.rm(fPath);
|
|
54
63
|
});
|
package/src/clients/download.ts
CHANGED
|
@@ -3,6 +3,7 @@ import type { GrpcClientProvider, GrpcClientProviderFactory } from '@milaborator
|
|
|
3
3
|
import { addRTypeToMetadata, stringifyWithResourceId } from '@milaboratories/pl-client';
|
|
4
4
|
import type { ResourceInfo } from '@milaboratories/pl-tree';
|
|
5
5
|
import type { MiLogger } from '@milaboratories/ts-helpers';
|
|
6
|
+
import { ConcurrencyLimitingExecutor } from '@milaboratories/ts-helpers';
|
|
6
7
|
import type { RpcOptions } from '@protobuf-ts/runtime-rpc';
|
|
7
8
|
import * as fs from 'node:fs';
|
|
8
9
|
import * as fsp from 'node:fs/promises';
|
|
@@ -10,12 +11,11 @@ import * as path from 'node:path';
|
|
|
10
11
|
import { Readable } from 'node:stream';
|
|
11
12
|
import type { Dispatcher } from 'undici';
|
|
12
13
|
import type { LocalStorageProjection } from '../drivers/types';
|
|
13
|
-
import type {
|
|
14
|
+
import type { DownloadOps, ContentHandler } from '../helpers/download';
|
|
14
15
|
import { RemoteFileDownloader } from '../helpers/download';
|
|
15
16
|
import { validateAbsolute } from '../helpers/validate';
|
|
16
17
|
import type { DownloadAPI_GetDownloadURL_Response } from '../proto/github.com/milaboratory/pl/controllers/shared/grpc/downloadapi/protocol';
|
|
17
18
|
import { DownloadClient } from '../proto/github.com/milaboratory/pl/controllers/shared/grpc/downloadapi/protocol.client';
|
|
18
|
-
import { toHeadersMap } from './helpers';
|
|
19
19
|
|
|
20
20
|
/** Gets URLs for downloading from pl-core, parses them and reads or downloads
|
|
21
21
|
* files locally and from the web. */
|
|
@@ -26,6 +26,9 @@ export class ClientDownload {
|
|
|
26
26
|
/** Helps to find a storage root directory by a storage id from URL scheme. */
|
|
27
27
|
private readonly localStorageIdsToRoot: Map<string, string>;
|
|
28
28
|
|
|
29
|
+
/** Concurrency limiter for local file reads - limit to 32 parallel reads */
|
|
30
|
+
private readonly localFileReadLimiter = new ConcurrencyLimitingExecutor(32);
|
|
31
|
+
|
|
29
32
|
constructor(
|
|
30
33
|
grpcClientProviderFactory: GrpcClientProviderFactory,
|
|
31
34
|
public readonly httpClient: Dispatcher,
|
|
@@ -40,40 +43,60 @@ export class ClientDownload {
|
|
|
40
43
|
|
|
41
44
|
close() {}
|
|
42
45
|
|
|
43
|
-
/**
|
|
46
|
+
/**
|
|
47
|
+
* Gets a presign URL and downloads the file.
|
|
44
48
|
* An optional range with 2 numbers from what byte and to what byte to download can be provided.
|
|
45
49
|
* @param fromBytes - from byte including this byte
|
|
46
50
|
* @param toBytes - to byte excluding this byte
|
|
47
51
|
*/
|
|
48
|
-
async
|
|
52
|
+
async withBlobContent<T>(
|
|
49
53
|
info: ResourceInfo,
|
|
50
|
-
options
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
const { downloadUrl, headers } = await this.grpcGetDownloadUrl(info, options, signal);
|
|
54
|
+
options: RpcOptions | undefined,
|
|
55
|
+
ops: DownloadOps,
|
|
56
|
+
handler: ContentHandler<T>,
|
|
57
|
+
): Promise<T> {
|
|
58
|
+
const { downloadUrl, headers } = await this.grpcGetDownloadUrl(info, options, ops.signal);
|
|
56
59
|
|
|
57
|
-
const remoteHeaders =
|
|
58
|
-
this.logger.info(`download blob ${stringifyWithResourceId(info)} from url ${downloadUrl},
|
|
60
|
+
const remoteHeaders = Object.fromEntries(headers.map(({ name, value }) => [name, value]));
|
|
61
|
+
this.logger.info(`download blob ${stringifyWithResourceId(info)} from url ${downloadUrl}, ops: ${JSON.stringify(ops)}`);
|
|
59
62
|
|
|
60
63
|
return isLocal(downloadUrl)
|
|
61
|
-
? await this.
|
|
62
|
-
: await this.remoteFileDownloader.
|
|
64
|
+
? await this.withLocalFileContent(downloadUrl, ops, handler)
|
|
65
|
+
: await this.remoteFileDownloader.withContent(downloadUrl, remoteHeaders, ops, handler);
|
|
63
66
|
}
|
|
64
67
|
|
|
65
|
-
async
|
|
68
|
+
async withLocalFileContent<T>(
|
|
66
69
|
url: string,
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
): Promise<
|
|
70
|
+
ops: DownloadOps,
|
|
71
|
+
handler: ContentHandler<T>,
|
|
72
|
+
): Promise<T> {
|
|
70
73
|
const { storageId, relativePath } = parseLocalUrl(url);
|
|
71
74
|
const fullPath = getFullPath(storageId, this.localStorageIdsToRoot, relativePath);
|
|
72
75
|
|
|
73
|
-
return {
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
76
|
+
return await this.localFileReadLimiter.run(async () => {
|
|
77
|
+
const readOps = {
|
|
78
|
+
start: ops.range?.from,
|
|
79
|
+
end: ops.range?.to !== undefined ? ops.range.to - 1 : undefined,
|
|
80
|
+
};
|
|
81
|
+
let stream: fs.ReadStream | undefined;
|
|
82
|
+
let handlerSuccess = false;
|
|
83
|
+
|
|
84
|
+
try {
|
|
85
|
+
const stat = await fsp.stat(fullPath);
|
|
86
|
+
stream = fs.createReadStream(fullPath, readOps);
|
|
87
|
+
const webStream = Readable.toWeb(stream);
|
|
88
|
+
|
|
89
|
+
const result = await handler(webStream, stat.size);
|
|
90
|
+
handlerSuccess = true;
|
|
91
|
+
return result;
|
|
92
|
+
} catch (error) {
|
|
93
|
+
// Cleanup on error (including handler errors)
|
|
94
|
+
if (!handlerSuccess && stream && !stream.destroyed) {
|
|
95
|
+
stream.destroy();
|
|
96
|
+
}
|
|
97
|
+
throw error;
|
|
98
|
+
}
|
|
99
|
+
});
|
|
77
100
|
}
|
|
78
101
|
|
|
79
102
|
private async grpcGetDownloadUrl(
|
package/src/clients/upload.ts
CHANGED
|
@@ -8,7 +8,7 @@ import type { Dispatcher } from 'undici';
|
|
|
8
8
|
import { request } from 'undici';
|
|
9
9
|
import type { uploadapi_GetPartURL_Response } from '../proto/github.com/milaboratory/pl/controllers/shared/grpc/uploadapi/protocol';
|
|
10
10
|
import { UploadClient } from '../proto/github.com/milaboratory/pl/controllers/shared/grpc/uploadapi/protocol.client';
|
|
11
|
-
|
|
11
|
+
|
|
12
12
|
import type { IncomingHttpHeaders } from 'undici/types/header';
|
|
13
13
|
|
|
14
14
|
export class MTimeError extends Error {
|
|
@@ -90,7 +90,7 @@ export class ClientUpload {
|
|
|
90
90
|
// that's why we got big timeout here.
|
|
91
91
|
headersTimeout: 60000,
|
|
92
92
|
bodyTimeout: 60000,
|
|
93
|
-
headers:
|
|
93
|
+
headers: Object.fromEntries(info.headers.map(({ name, value }) => [name, value])),
|
|
94
94
|
method: info.method.toUpperCase() as Dispatcher.HttpMethod,
|
|
95
95
|
});
|
|
96
96
|
|
|
@@ -39,10 +39,11 @@ import * as fsp from 'node:fs/promises';
|
|
|
39
39
|
import * as os from 'node:os';
|
|
40
40
|
import * as path from 'node:path';
|
|
41
41
|
import * as readline from 'node:readline/promises';
|
|
42
|
-
import {
|
|
42
|
+
import { Writable } from 'node:stream';
|
|
43
43
|
import { buffer } from 'node:stream/consumers';
|
|
44
44
|
import type { ClientDownload } from '../../clients/download';
|
|
45
45
|
import type { ClientLogs } from '../../clients/logs';
|
|
46
|
+
import { readFileContent } from '../helpers/read_file';
|
|
46
47
|
import {
|
|
47
48
|
isLocalBlobHandle,
|
|
48
49
|
newLocalHandle,
|
|
@@ -298,7 +299,7 @@ export class DownloadDriver implements BlobDriver {
|
|
|
298
299
|
}
|
|
299
300
|
|
|
300
301
|
if (isLocalBlobHandle(handle)) {
|
|
301
|
-
return await
|
|
302
|
+
return await readFileContent(this.getLocalPath(handle), range);
|
|
302
303
|
}
|
|
303
304
|
|
|
304
305
|
if (isRemoteBlobHandle(handle)) {
|
|
@@ -307,18 +308,15 @@ export class DownloadDriver implements BlobDriver {
|
|
|
307
308
|
const key = blobKey(result.info.id);
|
|
308
309
|
const filePath = await this.rangesCache.get(key, range ?? { from: 0, to: result.size });
|
|
309
310
|
if (filePath) {
|
|
310
|
-
return await
|
|
311
|
+
return await readFileContent(filePath, range);
|
|
311
312
|
}
|
|
312
313
|
|
|
313
|
-
const
|
|
314
|
+
const data = await this.clientDownload.withBlobContent(
|
|
314
315
|
{ id: result.info.id, type: result.info.type },
|
|
315
316
|
undefined,
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
range?.to,
|
|
317
|
+
{ range },
|
|
318
|
+
async (content) => await buffer(content)
|
|
319
319
|
);
|
|
320
|
-
|
|
321
|
-
const data = await buffer(content);
|
|
322
320
|
await this.rangesCache.set(key, range ?? { from: 0, to: result.size }, data);
|
|
323
321
|
|
|
324
322
|
return data;
|
|
@@ -657,42 +655,45 @@ class LastLinesGetter {
|
|
|
657
655
|
|
|
658
656
|
/** Gets last lines from a file by reading the file from the top and keeping
|
|
659
657
|
* last N lines in a window queue. */
|
|
660
|
-
function getLastLines(fPath: string, nLines: number, patternToSearch?: string): Promise<string> {
|
|
661
|
-
|
|
662
|
-
|
|
658
|
+
async function getLastLines(fPath: string, nLines: number, patternToSearch?: string): Promise<string> {
|
|
659
|
+
let inStream: fs.ReadStream | undefined;
|
|
660
|
+
let rl: readline.Interface | undefined;
|
|
663
661
|
|
|
664
|
-
|
|
665
|
-
|
|
662
|
+
try {
|
|
663
|
+
inStream = fs.createReadStream(fPath);
|
|
664
|
+
rl = readline.createInterface({ input: inStream, crlfDelay: Infinity });
|
|
666
665
|
|
|
667
666
|
const lines = new Denque();
|
|
668
|
-
|
|
669
|
-
|
|
667
|
+
|
|
668
|
+
for await (const line of rl) {
|
|
669
|
+
if (patternToSearch != undefined && !line.includes(patternToSearch)) continue;
|
|
670
670
|
|
|
671
671
|
lines.push(line);
|
|
672
672
|
if (lines.length > nLines) {
|
|
673
673
|
lines.shift();
|
|
674
674
|
}
|
|
675
|
-
}
|
|
676
|
-
|
|
677
|
-
rl.on('error', reject);
|
|
675
|
+
}
|
|
678
676
|
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
677
|
+
// last EOL is for keeping backward compat with platforma implementation.
|
|
678
|
+
return lines.toArray().join(os.EOL) + os.EOL;
|
|
679
|
+
} finally {
|
|
680
|
+
// Cleanup resources in finally block to ensure they're always cleaned up
|
|
681
|
+
try {
|
|
682
|
+
if (rl) {
|
|
683
|
+
rl.close();
|
|
684
|
+
}
|
|
685
|
+
} catch (cleanupError) {
|
|
686
|
+
console.error('Error closing readline interface:', cleanupError);
|
|
687
|
+
}
|
|
685
688
|
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
689
|
+
try {
|
|
690
|
+
if (inStream && !inStream.destroyed) {
|
|
691
|
+
inStream.destroy();
|
|
692
|
+
}
|
|
693
|
+
} catch (cleanupError) {
|
|
694
|
+
console.error('Error destroying read stream:', cleanupError);
|
|
695
|
+
}
|
|
691
696
|
}
|
|
692
|
-
|
|
693
|
-
const stream = fs.createReadStream(path, ops);
|
|
694
|
-
|
|
695
|
-
return await buffer(Readable.toWeb(stream));
|
|
696
697
|
}
|
|
697
698
|
|
|
698
699
|
function validateDownloadableResourceType(methodName: string, rType: ResourceType) {
|
|
@@ -94,23 +94,26 @@ export class DownloadBlobTask {
|
|
|
94
94
|
return this.state.fileSize;
|
|
95
95
|
}
|
|
96
96
|
|
|
97
|
-
const
|
|
97
|
+
const fileSize = await this.clientDownload.withBlobContent(
|
|
98
98
|
this.rInfo,
|
|
99
99
|
{},
|
|
100
|
-
this.signalCtl.signal,
|
|
100
|
+
{ signal: this.signalCtl.signal },
|
|
101
|
+
async (content, size) => {
|
|
102
|
+
this.state.fileSize = size;
|
|
103
|
+
this.state.downloaded = true;
|
|
104
|
+
|
|
105
|
+
await createPathAtomically(this.logger, this.state.filePath!, async (fPath: string) => {
|
|
106
|
+
const f = Writable.toWeb(fs.createWriteStream(fPath, { flags: 'wx' }));
|
|
107
|
+
await content.pipeTo(f, { signal: this.signalCtl.signal });
|
|
108
|
+
this.state.tempWritten = true;
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
this.state.done = true;
|
|
112
|
+
return size;
|
|
113
|
+
}
|
|
101
114
|
);
|
|
102
|
-
this.state.fileSize = size;
|
|
103
|
-
this.state.downloaded = true;
|
|
104
|
-
|
|
105
|
-
await createPathAtomically(this.logger, this.state.filePath, async (fPath: string) => {
|
|
106
|
-
const f = Writable.toWeb(fs.createWriteStream(fPath, { flags: 'wx' }));
|
|
107
|
-
await content.pipeTo(f, { signal: this.signalCtl.signal });
|
|
108
|
-
this.state.tempWritten = true;
|
|
109
|
-
});
|
|
110
|
-
|
|
111
|
-
this.state.done = true;
|
|
112
115
|
|
|
113
|
-
return
|
|
116
|
+
return fileSize;
|
|
114
117
|
}
|
|
115
118
|
|
|
116
119
|
public abort(reason: string) {
|
|
@@ -95,59 +95,64 @@ export class DownloadAndUnarchiveTask {
|
|
|
95
95
|
return await dirSize(this.path);
|
|
96
96
|
}
|
|
97
97
|
|
|
98
|
-
const
|
|
99
|
-
this.rInfo,
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
98
|
+
const size = await this.clientDownload.withBlobContent(
|
|
99
|
+
this.rInfo,
|
|
100
|
+
{},
|
|
101
|
+
{ signal },
|
|
102
|
+
async (content, size) => {
|
|
103
|
+
this.state!.downloaded = true;
|
|
104
|
+
|
|
105
|
+
await createPathAtomically(this.logger, this.path, async (fPath: string) => {
|
|
106
|
+
this.state!.tempPath = fPath;
|
|
107
|
+
this.state!.archiveFormat = this.format;
|
|
108
|
+
|
|
109
|
+
switch (this.format) {
|
|
110
|
+
case 'tar':
|
|
111
|
+
await fsp.mkdir(fPath); // throws if a directory already exists.
|
|
112
|
+
const simpleUntar = Writable.toWeb(tar.extract(fPath));
|
|
113
|
+
await content.pipeTo(simpleUntar, { signal });
|
|
114
|
+
return;
|
|
115
|
+
|
|
116
|
+
case 'tgz':
|
|
117
|
+
await fsp.mkdir(fPath); // throws if a directory already exists.
|
|
118
|
+
const gunzip = Transform.toWeb(zlib.createGunzip());
|
|
119
|
+
const untar = Writable.toWeb(tar.extract(fPath));
|
|
120
|
+
|
|
121
|
+
await content
|
|
122
|
+
.pipeThrough(gunzip, { signal })
|
|
123
|
+
.pipeTo(untar, { signal });
|
|
124
|
+
return;
|
|
125
|
+
|
|
126
|
+
case 'zip':
|
|
127
|
+
this.state!.zipPath = this.path + '.zip';
|
|
128
|
+
|
|
129
|
+
const f = Writable.toWeb(fs.createWriteStream(this.state!.zipPath));
|
|
130
|
+
await content.pipeTo(f, { signal });
|
|
131
|
+
this.state!.zipPathCreated = true;
|
|
132
|
+
|
|
133
|
+
// Without this filter it fails with
|
|
134
|
+
// "EISDIR: illegal operation on a directory".
|
|
135
|
+
// The workaround is from
|
|
136
|
+
// https://github.com/kevva/decompress/issues/46#issuecomment-525048104
|
|
137
|
+
await decompress(this.state!.zipPath, fPath, {
|
|
138
|
+
filter: file => !file.path.endsWith('/'),
|
|
139
|
+
});
|
|
140
|
+
this.state!.zipDecompressed = true;
|
|
141
|
+
|
|
142
|
+
await fs.promises.rm(this.state!.zipPath);
|
|
143
|
+
this.state!.zipPathDeleted = true;
|
|
144
|
+
|
|
145
|
+
return;
|
|
146
|
+
|
|
147
|
+
default:
|
|
148
|
+
assertNever(this.format);
|
|
149
|
+
}
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
this.state!.pathCreated = true;
|
|
153
|
+
return size;
|
|
147
154
|
}
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
this.state.pathCreated = true;
|
|
155
|
+
);
|
|
151
156
|
|
|
152
157
|
return size;
|
|
153
158
|
}
|
|
@@ -82,21 +82,28 @@ export class DownloadByUrlTask {
|
|
|
82
82
|
return await dirSize(this.path);
|
|
83
83
|
}
|
|
84
84
|
|
|
85
|
-
const
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
85
|
+
const size = await clientDownload.withContent(
|
|
86
|
+
this.url.toString(),
|
|
87
|
+
{},
|
|
88
|
+
{ signal },
|
|
89
|
+
async (content, size) => {
|
|
90
|
+
let processedContent = content;
|
|
91
|
+
if (withGunzip) {
|
|
92
|
+
const gunzip = Transform.toWeb(zlib.createGunzip());
|
|
93
|
+
processedContent = content.pipeThrough(gunzip, { signal });
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
await createPathAtomically(this.logger, this.path, async (fPath: string) => {
|
|
97
|
+
await fsp.mkdir(fPath); // throws if a directory already exists.
|
|
98
|
+
const untar = Writable.toWeb(tar.extract(fPath));
|
|
99
|
+
await processedContent.pipeTo(untar, { signal });
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
return size;
|
|
103
|
+
}
|
|
104
|
+
);
|
|
98
105
|
|
|
99
|
-
return
|
|
106
|
+
return size;
|
|
100
107
|
}
|
|
101
108
|
|
|
102
109
|
getUrl(): UrlResult | undefined {
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import { ConcurrencyLimitingExecutor } from '@milaboratories/ts-helpers';
|
|
2
|
+
import type { RangeBytes } from '@milaboratories/pl-model-common';
|
|
3
|
+
import * as fs from 'node:fs';
|
|
4
|
+
import { buffer } from 'node:stream/consumers';
|
|
5
|
+
|
|
6
|
+
// Global concurrency limiter for file reads - limit to 32 parallel reads
|
|
7
|
+
const fileReadLimiter = new ConcurrencyLimitingExecutor(32);
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Reads file content with concurrency limiting and proper error handling.
|
|
11
|
+
* Ensures file descriptors are properly cleaned up even in error cases.
|
|
12
|
+
*/
|
|
13
|
+
export async function readFileContent(path: string, range?: RangeBytes): Promise<Uint8Array> {
|
|
14
|
+
return await fileReadLimiter.run(async () => {
|
|
15
|
+
const ops: { start?: number; end?: number } = {};
|
|
16
|
+
if (range) {
|
|
17
|
+
ops.start = range.from;
|
|
18
|
+
ops.end = range.to - 1;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
let stream: fs.ReadStream | undefined;
|
|
22
|
+
try {
|
|
23
|
+
stream = fs.createReadStream(path, ops);
|
|
24
|
+
return await buffer(stream);
|
|
25
|
+
} catch (error) {
|
|
26
|
+
if (stream && !stream.destroyed) {
|
|
27
|
+
stream.destroy();
|
|
28
|
+
}
|
|
29
|
+
throw error;
|
|
30
|
+
}
|
|
31
|
+
});
|
|
32
|
+
}
|
package/src/helpers/download.ts
CHANGED
|
@@ -5,12 +5,15 @@ import { request } from 'undici';
|
|
|
5
5
|
import { Readable } from 'node:stream';
|
|
6
6
|
import type { ReadableStream } from 'node:stream/web';
|
|
7
7
|
import { text } from 'node:stream/consumers';
|
|
8
|
+
import type { RangeBytes } from '@milaboratories/pl-model-common';
|
|
8
9
|
|
|
9
|
-
export interface
|
|
10
|
-
|
|
11
|
-
|
|
10
|
+
export interface DownloadOps {
|
|
11
|
+
signal?: AbortSignal;
|
|
12
|
+
range?: RangeBytes;
|
|
12
13
|
}
|
|
13
14
|
|
|
15
|
+
export type ContentHandler<T> = (content: ReadableStream, size: number) => Promise<T>;
|
|
16
|
+
|
|
14
17
|
/** Throws when a status code of the downloading URL was in range [400, 500). */
|
|
15
18
|
export class NetworkError400 extends Error {
|
|
16
19
|
name = 'NetworkError400';
|
|
@@ -19,24 +22,45 @@ export class NetworkError400 extends Error {
|
|
|
19
22
|
export class RemoteFileDownloader {
|
|
20
23
|
constructor(public readonly httpClient: Dispatcher) {}
|
|
21
24
|
|
|
22
|
-
async
|
|
25
|
+
async withContent<T>(
|
|
23
26
|
url: string,
|
|
24
27
|
reqHeaders: Record<string, string>,
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
+
ops: DownloadOps,
|
|
29
|
+
handler: ContentHandler<T>,
|
|
30
|
+
): Promise<T> {
|
|
31
|
+
const headers = { ...reqHeaders };
|
|
32
|
+
|
|
33
|
+
// Add range header if specified
|
|
34
|
+
if (ops.range) {
|
|
35
|
+
headers['Range'] = `bytes=${ops.range.from}-${ops.range.to - 1}`;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const { statusCode, body, headers: responseHeaders } = await request(url, {
|
|
28
39
|
dispatcher: this.httpClient,
|
|
29
|
-
headers
|
|
30
|
-
signal,
|
|
40
|
+
headers,
|
|
41
|
+
signal: ops.signal,
|
|
31
42
|
});
|
|
32
43
|
|
|
33
44
|
const webBody = Readable.toWeb(body);
|
|
34
|
-
|
|
45
|
+
let handlerSuccess = false;
|
|
35
46
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
size
|
|
39
|
-
|
|
47
|
+
try {
|
|
48
|
+
await checkStatusCodeOk(statusCode, webBody, url);
|
|
49
|
+
const size = Number(responseHeaders['content-length']);
|
|
50
|
+
const result = await handler(webBody, size);
|
|
51
|
+
handlerSuccess = true;
|
|
52
|
+
return result;
|
|
53
|
+
} catch (error) {
|
|
54
|
+
// Cleanup on error (including handler errors)
|
|
55
|
+
if (!handlerSuccess && !webBody.locked) {
|
|
56
|
+
try {
|
|
57
|
+
await webBody.cancel();
|
|
58
|
+
} catch {
|
|
59
|
+
// Ignore cleanup errors
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
throw error;
|
|
63
|
+
}
|
|
40
64
|
}
|
|
41
65
|
}
|
|
42
66
|
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"helpers.d.ts","sourceRoot":"","sources":["../../src/clients/helpers.ts"],"names":[],"mappings":"AAAA,wBAAgB,YAAY,CAC1B,OAAO,EAAE;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,EAAE,EAC1C,SAAS,CAAC,EAAE,MAAM,EAAE,sBAAsB;AAC1C,OAAO,CAAC,EAAE,MAAM,GACf,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAOxB"}
|
package/src/clients/helpers.ts
DELETED
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
export function toHeadersMap(
|
|
2
|
-
headers: { name: string; value: string }[],
|
|
3
|
-
fromBytes?: number, // including this byte
|
|
4
|
-
toBytes?: number, // excluding this byte
|
|
5
|
-
): Record<string, string> {
|
|
6
|
-
const result = Object.fromEntries(headers.map(({ name, value }) => [name, value]));
|
|
7
|
-
if (fromBytes !== undefined && toBytes !== undefined) {
|
|
8
|
-
result['Range'] = `bytes=${fromBytes}-${toBytes - 1}`;
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
return result;
|
|
12
|
-
}
|