@milaboratories/pl-drivers 1.5.64 → 1.5.65

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. package/dist/drivers/download_blob/download_blob.d.ts +12 -3
  2. package/dist/drivers/download_blob/download_blob.d.ts.map +1 -1
  3. package/dist/drivers/download_blob/sparse_cache/cache.d.ts +79 -0
  4. package/dist/drivers/download_blob/sparse_cache/cache.d.ts.map +1 -0
  5. package/dist/drivers/download_blob/sparse_cache/file.d.ts +8 -0
  6. package/dist/drivers/download_blob/sparse_cache/file.d.ts.map +1 -0
  7. package/dist/drivers/download_blob/sparse_cache/ranges.d.ts +46 -0
  8. package/dist/drivers/download_blob/sparse_cache/ranges.d.ts.map +1 -0
  9. package/dist/drivers/helpers/download_remote_handle.d.ts +4 -1
  10. package/dist/drivers/helpers/download_remote_handle.d.ts.map +1 -1
  11. package/dist/index.js +2 -2
  12. package/dist/index.js.map +1 -1
  13. package/dist/index.mjs +1584 -1391
  14. package/dist/index.mjs.map +1 -1
  15. package/package.json +8 -7
  16. package/src/drivers/download_blob/download_blob.test.ts +48 -41
  17. package/src/drivers/download_blob/download_blob.ts +38 -39
  18. package/src/drivers/download_blob/sparse_cache/cache.test.ts +371 -0
  19. package/src/drivers/download_blob/sparse_cache/cache.ts +240 -0
  20. package/src/drivers/download_blob/sparse_cache/create_sparse_file_script.js +123 -0
  21. package/src/drivers/download_blob/sparse_cache/file.ts +49 -0
  22. package/src/drivers/download_blob/sparse_cache/ranges.test.ts +115 -0
  23. package/src/drivers/download_blob/sparse_cache/ranges.ts +93 -0
  24. package/src/drivers/download_url.ts +1 -1
  25. package/src/drivers/helpers/download_remote_handle.ts +16 -9
  26. package/src/drivers/logs.test.ts +18 -6
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@milaboratories/pl-drivers",
3
- "version": "1.5.64",
3
+ "version": "1.5.65",
4
4
  "engines": {
5
5
  "node": ">=20"
6
6
  },
@@ -31,17 +31,18 @@
31
31
  "undici": "~7.5.0",
32
32
  "zod": "~3.23.8",
33
33
  "upath": "^2.0.1",
34
- "@milaboratories/ts-helpers": "^1.3.1",
35
- "@milaboratories/computable": "^2.4.10",
36
- "@milaboratories/pl-client": "^2.10.0",
37
- "@milaboratories/pl-tree": "^1.6.6",
38
- "@milaboratories/pl-model-common": "^1.15.2"
34
+ "@milaboratories/helpers": "^1.6.12",
35
+ "@milaboratories/ts-helpers": "^1.3.2",
36
+ "@milaboratories/pl-client": "^2.10.1",
37
+ "@milaboratories/computable": "^2.4.11",
38
+ "@milaboratories/pl-tree": "^1.6.7",
39
+ "@milaboratories/pl-model-common": "^1.15.3"
39
40
  },
40
41
  "devDependencies": {
41
42
  "eslint": "^9.25.1",
42
43
  "@types/decompress": "^4.2.7",
43
44
  "typescript": "~5.5.4",
44
- "vite": "^5.4.11",
45
+ "vite": "^6.3.5",
45
46
  "@types/node": "~20.16.15",
46
47
  "vitest": "^2.1.9",
47
48
  "@vitest/coverage-v8": "^2.1.9",
@@ -4,7 +4,8 @@ import type {
4
4
  FieldRef,
5
5
  PlClient,
6
6
  PlTransaction,
7
- PollTxAccessor } from '@milaboratories/pl-client';
7
+ PollTxAccessor
8
+ } from '@milaboratories/pl-client';
8
9
  import {
9
10
  jsonToData,
10
11
  poll,
@@ -22,10 +23,12 @@ import * as env from '../../test_env';
22
23
 
23
24
  const fileName = 'answer_to_the_ultimate_question.txt';
24
25
 
25
- test('should download a blob and read its content', { timeout: 10000 }, async () => {
26
+ test('should download a blob and read its content', { timeout: 10000 }, async () => {
26
27
  await TestHelpers.withTempRoot(async (client) => {
27
28
  const dir = await fsp.mkdtemp(path.join(os.tmpdir(), 'test-download-driver'));
28
- const driver = await genDriver(client, dir, genSigner(), { cacheSoftSizeBytes: 700 * 1024, nConcurrentDownloads: 10 });
29
+ const rangesCacheDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'test-download-ranges'));
30
+
31
+ const driver = await genDriver(client, dir, rangesCacheDir, genSigner());
29
32
  const downloadable = await makeDownloadableBlobFromAssets(client, fileName);
30
33
 
31
34
  const c = driver.getDownloadedBlob(downloadable);
@@ -50,7 +53,9 @@ test('should download a blob and read its content', { timeout: 10000 }, async (
50
53
  test('should download a blob range and read its content with a range', async () => {
51
54
  await TestHelpers.withTempRoot(async (client) => {
52
55
  const dir = await fsp.mkdtemp(path.join(os.tmpdir(), 'test-download-driver'));
53
- const driver = await genDriver(client, dir, genSigner(), { cacheSoftSizeBytes: 700 * 1024, nConcurrentDownloads: 10 });
56
+ const rangesCacheDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'test-download-ranges'));
57
+
58
+ const driver = await genDriver(client, dir, rangesCacheDir, genSigner());
54
59
  const downloadable = await makeDownloadableBlobFromAssets(client, fileName);
55
60
 
56
61
  const c = driver.getDownloadedBlob(downloadable);
@@ -75,8 +80,10 @@ test('should download a blob range and read its content with a range', async ()
75
80
  test('should not redownload a blob when a file already exists', async () => {
76
81
  await TestHelpers.withTempRoot(async (client) => {
77
82
  const dir = await fsp.mkdtemp(path.join(os.tmpdir(), 'test-download-driver'));
83
+ const rangesCacheDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'test-download-ranges'));
78
84
  const signer = genSigner();
79
- const driver = await genDriver(client, dir, signer, { cacheSoftSizeBytes: 700 * 1024, nConcurrentDownloads: 10 });
85
+
86
+ const driver = await genDriver(client, dir, rangesCacheDir, signer);
80
87
 
81
88
  console.log('Download the first time');
82
89
  const downloadable = await makeDownloadableBlobFromAssets(client, fileName);
@@ -88,7 +95,7 @@ test('should not redownload a blob when a file already exists', async () => {
88
95
 
89
96
  await driver.releaseAll();
90
97
 
91
- const driver2 = await genDriver(client, dir, signer, { cacheSoftSizeBytes: 700 * 1024, nConcurrentDownloads: 10 });
98
+ const driver2 = await genDriver(client, dir, rangesCacheDir, signer);
92
99
 
93
100
  console.log('Download the second time');
94
101
  const c2 = driver2.getDownloadedBlob(downloadable);
@@ -104,7 +111,9 @@ test('should not redownload a blob when a file already exists', async () => {
104
111
  test('should get on demand blob without downloading a blob', async () => {
105
112
  await TestHelpers.withTempRoot(async (client) => {
106
113
  const dir = await fsp.mkdtemp(path.join(os.tmpdir(), 'test-download-2-'));
107
- const driver = await genDriver(client, dir, genSigner(), { cacheSoftSizeBytes: 700 * 1024, nConcurrentDownloads: 10 });
114
+ const rangesCacheDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'test-download-ranges'));
115
+
116
+ const driver = await genDriver(client, dir, rangesCacheDir, genSigner());
108
117
 
109
118
  const downloadable = await makeDownloadableBlobFromAssets(client, fileName);
110
119
  const c = driver.getOnDemandBlob(downloadable);
@@ -121,7 +130,9 @@ test('should get on demand blob without downloading a blob', async () => {
121
130
  test('should get on demand blob without downloading a blob range', async () => {
122
131
  await TestHelpers.withTempRoot(async (client) => {
123
132
  const dir = await fsp.mkdtemp(path.join(os.tmpdir(), 'test-download-2-'));
124
- const driver = await genDriver(client, dir, genSigner(), { cacheSoftSizeBytes: 700 * 1024, nConcurrentDownloads: 10 });
133
+ const rangesCacheDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'test-download-ranges'));
134
+
135
+ const driver = await genDriver(client, dir, rangesCacheDir, genSigner());
125
136
 
126
137
  const downloadable = await makeDownloadableBlobFromAssets(client, fileName);
127
138
 
@@ -151,7 +162,13 @@ test('should get on demand blob without downloading a blob range', async () => {
151
162
  test('should get undefined when releasing a blob from a small cache and the blob was deleted.', async () => {
152
163
  await TestHelpers.withTempRoot(async (client) => {
153
164
  const dir = await fsp.mkdtemp(path.join(os.tmpdir(), 'test-download-3-'));
154
- const driver = await genDriver(client, dir, genSigner(), { cacheSoftSizeBytes: 1, nConcurrentDownloads: 10 });
165
+ const rangesCacheDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'test-download-ranges'));
166
+
167
+ const driver = await genDriver(client, dir, rangesCacheDir, genSigner(), {
168
+ cacheSoftSizeBytes: 1,
169
+ nConcurrentDownloads: 10,
170
+ rangesCacheMaxSizeBytes: 1024,
171
+ });
155
172
  const downloadable = await makeDownloadableBlobFromAssets(client, fileName);
156
173
 
157
174
  const c = driver.getDownloadedBlob(downloadable);
@@ -180,7 +197,13 @@ test('should get undefined when releasing a blob from a small cache and the blob
180
197
  test('should get undefined when releasing a blob from a small cache and the blob was deleted range.', async () => {
181
198
  await TestHelpers.withTempRoot(async (client) => {
182
199
  const dir = await fsp.mkdtemp(path.join(os.tmpdir(), 'test-download-driver'));
183
- const driver = await genDriver(client, dir, genSigner(), { cacheSoftSizeBytes: 1, nConcurrentDownloads: 10 });
200
+ const rangesCacheDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'test-download-ranges'));
201
+
202
+ const driver = await genDriver(client, dir, rangesCacheDir, genSigner(), {
203
+ cacheSoftSizeBytes: 1,
204
+ nConcurrentDownloads: 10,
205
+ rangesCacheMaxSizeBytes: 1024,
206
+ });
184
207
  const downloadable = await makeDownloadableBlobFromAssets(client, fileName);
185
208
 
186
209
  const c = driver.getDownloadedBlob(downloadable);
@@ -206,49 +229,33 @@ test('should get undefined when releasing a blob from a small cache and the blob
206
229
  });
207
230
  });
208
231
 
209
- // test('should get the blob range after init if it already existed.', async () => {
210
- // await TestHelpers.withTempRoot(async (client) => {
211
- // const dir = await fsp.mkdtemp(path.join(os.tmpdir(), 'test-download-driver'));
212
- // const signer = genSigner();
213
- // const driver = await genDriver(client, dir, signer, { cacheSoftSizeBytes: 720, nConcurrentDownloads: 10 });
214
- // const downloadable = await makeDownloadableBlobFromAssets(client, fileName);
215
-
216
- // const c = driver.getDownloadedBlob(downloadable);
217
- // const blob = await c.getValue();
218
- // expect(blob).toBeUndefined();
219
- // await c.awaitChange();
220
-
221
- // const blob2 = await c.getValue();
222
- // expect(blob2).toBeDefined();
223
- // expect(blob2!.size).toBe(3);
224
- // expect((await driver.getContent(blob2!.handle, { from: 1, to: 3 }))?.toString()).toBe('2\n');
225
-
226
- // // Make the second driver, the cache was already initialized.
227
- // // We should get the blob instantly.
228
- // const driver2 = await genDriver(client, dir, signer, { cacheSoftSizeBytes: 720, nConcurrentDownloads: 10 });
229
- // const c2 = driver2.getDownloadedBlob(downloadable);
230
- // const blob3 = await c2.getValue();
231
- // expect(blob3).toBeDefined();
232
- // expect(blob3!.size).toBe(2);
233
- // expect((await driver.getContent(blob3!.handle, { from: 1, to: 3 }))?.toString()).toBe('2\n');
234
- // });
235
- // })
236
- ;
237
-
238
232
  function genSigner() {
239
233
  return new HmacSha256Signer(HmacSha256Signer.generateSecret())
240
234
  }
241
235
 
242
- async function genDriver(client: PlClient, dir: string, signer: Signer, ops: DownloadDriverOps) {
236
+ async function genDriver(
237
+ client: PlClient,
238
+ dir: string,
239
+ rangesCacheDir: string,
240
+ signer: Signer,
241
+ ops?: Partial<DownloadDriverOps>,
242
+ ) {
243
243
  const logger = new ConsoleLoggerAdapter();
244
244
 
245
+ const defaultOps = {
246
+ cacheSoftSizeBytes: 700 * 1024,
247
+ nConcurrentDownloads: 10,
248
+ rangesCacheMaxSizeBytes: 1024,
249
+ };
250
+
245
251
  const driver = await DownloadDriver.init(
246
252
  logger,
247
253
  createDownloadClient(logger, client, []),
248
254
  createLogsClient(client, logger),
249
255
  dir,
256
+ rangesCacheDir,
250
257
  signer,
251
- ops,
258
+ { ...defaultOps, ...ops },
252
259
  );
253
260
 
254
261
  return driver;
@@ -19,7 +19,7 @@ import type {
19
19
  RemoteBlobHandleAndSize,
20
20
  StreamingApiResponse,
21
21
  } from '@milaboratories/pl-model-common';
22
- import { newRangeBytesOpt, type RangeBytes, validateRangeBytes } from '@milaboratories/pl-model-common';
22
+ import { type RangeBytes, validateRangeBytes } from '@milaboratories/pl-model-common';
23
23
  import type {
24
24
  PlTreeEntry,
25
25
  ResourceInfo,
@@ -31,7 +31,7 @@ import {
31
31
  treeEntryToResourceInfo,
32
32
  } from '@milaboratories/pl-tree';
33
33
  import type { MiLogger, Signer } from '@milaboratories/ts-helpers';
34
- import { CallersCounter, mapGet, notEmpty, TaskProcessor } from '@milaboratories/ts-helpers';
34
+ import { CallersCounter, mapGet, TaskProcessor } from '@milaboratories/ts-helpers';
35
35
  import Denque from 'denque';
36
36
  import * as fs from 'fs';
37
37
  import { randomUUID } from 'node:crypto';
@@ -59,6 +59,7 @@ import { getSize, OnDemandBlobResourceSnapshot } from '../types';
59
59
  import { blobKey, pathToKey } from './blob_key';
60
60
  import { DownloadBlobTask, nonRecoverableError } from './download_blob_task';
61
61
  import { FilesCache } from '../helpers/files_cache';
62
+ import { SparseCache, SparseCacheFsFile, SparseCacheFsRanges } from './sparse_cache/cache';
62
63
 
63
64
  export type DownloadDriverOps = {
64
65
  /**
@@ -67,6 +68,15 @@ export type DownloadDriverOps = {
67
68
  * when they become unneeded.
68
69
  * */
69
70
  cacheSoftSizeBytes: number;
71
+
72
+ /**
73
+ * A hard limit of the amount of sparse cache, in bytes.
74
+ * Once exceeded, the download driver will start deleting blobs one by one.
75
+ *
76
+ * The sparse cache is used to store ranges of blobs.
77
+ * */
78
+ rangesCacheMaxSizeBytes: number;
79
+
70
80
  /**
71
81
  * Max number of concurrent downloads while calculating computable states
72
82
  * derived from this driver
@@ -83,6 +93,7 @@ export class DownloadDriver implements BlobDriver {
83
93
  /** Writes and removes files to a hard drive and holds a counter for every
84
94
  * file that should be kept. */
85
95
  private cache: FilesCache<DownloadBlobTask>;
96
+ private rangesCache: SparseCache;
86
97
 
87
98
  /** Downloads files and writes them to the local dir. */
88
99
  private downloadQueue: TaskProcessor;
@@ -99,10 +110,16 @@ export class DownloadDriver implements BlobDriver {
99
110
  private readonly clientDownload: ClientDownload,
100
111
  private readonly clientLogs: ClientLogs,
101
112
  saveDir: string,
113
+ private readonly rangesCacheDir: string,
102
114
  private readonly signer: Signer,
103
- ops: DownloadDriverOps,
115
+ private readonly ops: DownloadDriverOps,
104
116
  ) {
105
- this.cache = new FilesCache(ops.cacheSoftSizeBytes);
117
+ this.cache = new FilesCache(this.ops.cacheSoftSizeBytes);
118
+
119
+ const fsRanges = new SparseCacheFsRanges(this.logger, this.rangesCacheDir);
120
+ const fsStorage = new SparseCacheFsFile(this.logger, this.rangesCacheDir);
121
+ this.rangesCache = new SparseCache(this.logger, this.ops.rangesCacheMaxSizeBytes, fsRanges, fsStorage);
122
+
106
123
  this.downloadQueue = new TaskProcessor(this.logger, ops.nConcurrentDownloads);
107
124
 
108
125
  this.saveDir = path.resolve(saveDir);
@@ -113,45 +130,16 @@ export class DownloadDriver implements BlobDriver {
113
130
  clientDownload: ClientDownload,
114
131
  clientLogs: ClientLogs,
115
132
  saveDir: string,
133
+ rangesCacheDir: string,
116
134
  signer: Signer,
117
135
  ops: DownloadDriverOps,
118
136
  ): Promise<DownloadDriver> {
119
- const driver = new DownloadDriver(logger, clientDownload, clientLogs, saveDir, signer, ops);
120
- await driver.initCache();
137
+ const driver = new DownloadDriver(logger, clientDownload, clientLogs, saveDir, rangesCacheDir, signer, ops);
138
+ await driver.rangesCache.reset();
121
139
 
122
140
  return driver;
123
141
  }
124
142
 
125
- private async initCache() {
126
- let files: string[];
127
- try {
128
- files = await fsp.readdir(this.saveDir);
129
- } catch (e: unknown) {
130
- if (typeof e === 'object' && e !== null && (e as { code?: string }).code === 'ENOENT') {
131
- return;
132
- }
133
-
134
- throw e;
135
- }
136
-
137
- // for (const file of files) {
138
- // const { size } = await fsp.stat(path.resolve(this.saveDir, file));
139
-
140
- // const blobInfo = pathToBlobInfo(file);
141
- // if (blobInfo == undefined) {
142
- // continue;
143
- // }
144
-
145
- // this.cache.addCache({
146
- // path: path.resolve(this.saveDir, file),
147
- // baseKey: blobKey(blobInfo.resourceId),
148
- // key: blobKey(blobInfo.resourceId, blobInfo.range),
149
- // counter: new CallersCounter(),
150
- // range,
151
- // });
152
- // }
153
- }
154
-
155
143
  /** Gets a blob or part of the blob by its resource id or downloads a blob and sets it in a cache. */
156
144
  public getDownloadedBlob(
157
145
  res: ResourceInfo | PlTreeEntry,
@@ -312,17 +300,28 @@ export class DownloadDriver implements BlobDriver {
312
300
  if (isLocalBlobHandle(handle)) {
313
301
  return await read(this.getLocalPath(handle), range);
314
302
  }
303
+
315
304
  if (isRemoteBlobHandle(handle)) {
316
305
  const result = parseRemoteHandle(handle, this.signer);
306
+
307
+ const key = blobKey(result.info.id);
308
+ const filePath = await this.rangesCache.get(key, range ?? { from: 0, to: result.size });
309
+ if (filePath) {
310
+ return await read(filePath, range);
311
+ }
312
+
317
313
  const { content } = await this.clientDownload.downloadBlob(
318
- { id: result.id, type: result.type },
314
+ { id: result.info.id, type: result.info.type },
319
315
  undefined,
320
316
  undefined,
321
317
  range?.from,
322
318
  range?.to,
323
319
  );
324
320
 
325
- return await buffer(content);
321
+ const data = await buffer(content);
322
+ await this.rangesCache.set(key, range ?? { from: 0, to: result.size }, data);
323
+
324
+ return data;
326
325
  }
327
326
 
328
327
  throw new Error('Malformed remote handle');
@@ -342,7 +341,7 @@ export class DownloadDriver implements BlobDriver {
342
341
 
343
342
  return Computable.make((ctx) =>
344
343
  this.getDownloadedBlob(res, ctx), {
345
- postprocessValue: (v) => v ? this.getContent(v.handle, range) : undefined
344
+ postprocessValue: (v) => v ? this.getContent(v.handle, range) : undefined
346
345
  }
347
346
  ).withStableType()
348
347
  }