@milaboratories/pl-drivers 1.5.64 → 1.5.66

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. package/dist/drivers/download_blob/download_blob.d.ts +12 -3
  2. package/dist/drivers/download_blob/download_blob.d.ts.map +1 -1
  3. package/dist/drivers/download_blob/sparse_cache/cache.d.ts +79 -0
  4. package/dist/drivers/download_blob/sparse_cache/cache.d.ts.map +1 -0
  5. package/dist/drivers/download_blob/sparse_cache/file.d.ts +8 -0
  6. package/dist/drivers/download_blob/sparse_cache/file.d.ts.map +1 -0
  7. package/dist/drivers/download_blob/sparse_cache/ranges.d.ts +46 -0
  8. package/dist/drivers/download_blob/sparse_cache/ranges.d.ts.map +1 -0
  9. package/dist/drivers/helpers/download_remote_handle.d.ts +4 -1
  10. package/dist/drivers/helpers/download_remote_handle.d.ts.map +1 -1
  11. package/dist/index.js +2 -2
  12. package/dist/index.js.map +1 -1
  13. package/dist/index.mjs +1584 -1391
  14. package/dist/index.mjs.map +1 -1
  15. package/package.json +10 -9
  16. package/src/drivers/download_blob/download_blob.test.ts +48 -41
  17. package/src/drivers/download_blob/download_blob.ts +38 -39
  18. package/src/drivers/download_blob/sparse_cache/cache.test.ts +371 -0
  19. package/src/drivers/download_blob/sparse_cache/cache.ts +240 -0
  20. package/src/drivers/download_blob/sparse_cache/create_sparse_file_script.js +123 -0
  21. package/src/drivers/download_blob/sparse_cache/file.ts +49 -0
  22. package/src/drivers/download_blob/sparse_cache/ranges.test.ts +115 -0
  23. package/src/drivers/download_blob/sparse_cache/ranges.ts +93 -0
  24. package/src/drivers/download_url.ts +1 -1
  25. package/src/drivers/helpers/download_remote_handle.ts +16 -9
  26. package/src/drivers/logs.test.ts +18 -6
@@ -0,0 +1,123 @@
1
+ /** Test script for creating sparse files on any OS. It also checks if they was created on Windows.
2
+ * Usage: node script.js <dir where to create a file>
3
+ * Works on any node >= 18.0.0 */
4
+
5
+ const { exec } = require('node:child_process');
6
+ const { promisify } = require('node:util');
7
+ const path = require('node:path');
8
+ const fs = require('node:fs/promises');
9
+
10
+ const testDir = path.resolve(process.argv[2]);
11
+ const baseFileName = 'sparse-test-file.dat';
12
+ const logicalFileSize = 50 * 1024 * 1024; // 50 MB
13
+
14
+ async function ensureDir(dirPath) {
15
+ try {
16
+ await fs.access(dirPath);
17
+ } catch {
18
+ await fs.mkdir(dirPath, { recursive: true });
19
+ }
20
+ }
21
+
22
+ async function cleanupFile(filePath) {
23
+ try {
24
+ await fs.access(filePath);
25
+ await fs.unlink(filePath);
26
+ } catch {
27
+ // File doesn't exist or already removed, ignore
28
+ }
29
+ }
30
+
31
+ const asyncExec = promisify(exec);
32
+
33
+ async function runCommand(commandAndArgs, options) {
34
+ try {
35
+ const { stdout, stderr } = await asyncExec(commandAndArgs.join(' '), options);
36
+
37
+ return { stdout: stdout?.toString(), stderr: stderr?.toString() };
38
+ } catch (error) {
39
+ // execFile promisified version throws an error that includes stdout and stderr
40
+ console.error(
41
+ `Command "${commandAndArgs.join(' ')}" failed: ${error.message}`
42
+ );
43
+ console.error(`STDOUT: ${error.stdout}`);
44
+ console.error(`STDERR: ${error.stderr}`);
45
+ throw error; // Re-throw to be caught by the calling function
46
+ }
47
+ }
48
+
49
+ async function createSparseFile(
50
+ platform,
51
+ currentFilePath,
52
+ size
53
+ ) {
54
+ try {
55
+ await ensureDir(path.dirname(currentFilePath));
56
+ await cleanupFile(currentFilePath); // Clean up if file exists
57
+
58
+ let fileHandle;
59
+
60
+ fileHandle = await fs.open(currentFilePath, 'w');
61
+ await fileHandle.close(); // Close before calling fsutil
62
+ fileHandle = undefined; // Reset handle
63
+
64
+ if (platform === 'win32') {
65
+ console.log(
66
+ `Windows: File "${currentFilePath}" created/truncated to ${size} bytes.`
67
+ );
68
+ await runCommand(['fsutil', 'sparse', 'setflag', `"${currentFilePath}"`], { stdio: 'pipe' });
69
+ console.log(`Windows: File "${currentFilePath}" marked as sparse.`);
70
+ }
71
+
72
+ // Write data with gaps to demonstrate sparse nature
73
+ fileHandle = await fs.open(currentFilePath, 'r+');
74
+ const initialData = Buffer.from(`BEGIN_SPARSE_${platform.toUpperCase()}_`);
75
+ await fileHandle.write(initialData, 0, initialData.length, 0);
76
+
77
+ const distantData = Buffer.from(`_END_SPARSE_${platform.toUpperCase()}`);
78
+ const distantOffset = size - distantData.length - 200;
79
+ await fileHandle.write(
80
+ distantData,
81
+ 0,
82
+ distantData.length,
83
+ distantOffset
84
+ );
85
+
86
+ await fileHandle.close();
87
+ console.log(`Initial and distant data written to "${currentFilePath}".`);
88
+ return true;
89
+ } catch (error) {
90
+ console.error(
91
+ `Error in createSparseFile for ${platform} at "${currentFilePath}": ${error.message}`
92
+ );
93
+ return false;
94
+ }
95
+ }
96
+
97
+ (async () => {
98
+ const currentPlatform = process.platform;
99
+ const platformFilePath = path.join(
100
+ testDir,
101
+ `${currentPlatform}-${baseFileName}`
102
+ );
103
+
104
+ const creationSuccess = await createSparseFile(
105
+ currentPlatform,
106
+ platformFilePath,
107
+ logicalFileSize
108
+ );
109
+
110
+ if (creationSuccess) {
111
+ console.log(`Sparse file created successfully at "${platformFilePath}".`);
112
+ } else {
113
+ console.error(`Failed to create sparse file at "${platformFilePath}".`);
114
+ }
115
+
116
+ if (currentPlatform === 'win32') {
117
+ const queryFlag = await runCommand(['fsutil', 'sparse', 'queryflag', `"${platformFilePath}"`], { stdio: 'pipe' });
118
+ console.log(`Windows: File "${platformFilePath}" is sparse: ${queryFlag.stdout}, error: ${queryFlag.stderr}`);
119
+
120
+ const queryRange = await runCommand(['fsutil', 'sparse', 'queryrange', `"${platformFilePath}"`], { stdio: 'pipe' });
121
+ console.log(`Windows: Query range for "${platformFilePath}": ${queryRange.stdout}, error: ${queryRange.stderr}`);
122
+ }
123
+ })();
@@ -0,0 +1,49 @@
1
+ import { fileExists, MiLogger, spawnAsync } from "@milaboratories/ts-helpers";
2
+ import * as fs from 'node:fs/promises';
3
+
4
+ /** Creates a sparse file for all systems
5
+ * Table of what supports sparse files:
6
+ * https://en.wikipedia.org/wiki/Comparison_of_file_systems#Allocation_and_layout_policies */
7
+ export async function createSparseFile(
8
+ logger: MiLogger,
9
+ path: string,
10
+ platform: NodeJS.Platform,
11
+ ) {
12
+ try {
13
+ const ensureCreated = await fs.open(path, 'w');
14
+ await ensureCreated.close();
15
+
16
+ await ensureSparseOnWindows(path, platform);
17
+ } catch (error: unknown) {
18
+ logger.error(`Error creating file ${path} on platform ${platform}: ${error}`);
19
+ }
20
+ }
21
+
22
+ /** Sets a sparse flag on Windows.
23
+ * We could check the file is sparse by running:
24
+ * `fsutil sparse queryflag <path>`
25
+ * and
26
+ * `fsutil sparse queryrange <path>`
27
+ */
28
+ async function ensureSparseOnWindows(path: string, platform: NodeJS.Platform) {
29
+ if (platform === 'win32') {
30
+ await spawnAsync('fsutil', ['sparse', 'setflag', `"${path}"`], { stdio: 'pipe' });
31
+ }
32
+ }
33
+
34
+ /** Ensures the file is created and writes to it. */
35
+ export async function writeToSparseFile(
36
+ logger: MiLogger,
37
+ platform: NodeJS.Platform,
38
+ path: string,
39
+ data: Uint8Array,
40
+ from: number
41
+ ) {
42
+ if (!(await fileExists(path))) {
43
+ await createSparseFile(logger, path, platform);
44
+ }
45
+
46
+ const fileHandle = await fs.open(path, 'r+');
47
+ await fileHandle.write(data, 0, data.length, from);
48
+ await fileHandle.close();
49
+ }
@@ -0,0 +1,115 @@
1
+ import { Ranges, normalizeRanges } from "./ranges";
2
+ import { describe, it, expect } from 'vitest';
3
+
4
+ describe('normalizeRanges', () => {
5
+ const cases: { name: string, input: Ranges, expected: Ranges }[] = [
6
+ {
7
+ name: 'empty ranges',
8
+ input: { ranges: [] },
9
+ expected: { ranges: [] },
10
+ },
11
+ {
12
+ name: 'single range',
13
+ input: { ranges: [{ from: 0, to: 10 }] },
14
+ expected: { ranges: [{ from: 0, to: 10 }] },
15
+ },
16
+ {
17
+ name: 'two unsorted non-overlapping ranges',
18
+ input: {
19
+ ranges: [
20
+ { from: 20, to: 30 },
21
+ { from: 0, to: 10 },
22
+ ]
23
+ },
24
+ expected: {
25
+ ranges: [
26
+ { from: 0, to: 10 },
27
+ { from: 20, to: 30 },
28
+ ]
29
+ },
30
+ },
31
+ {
32
+ name: 'two overlapping ranges',
33
+ input: {
34
+ ranges: [
35
+ { from: 0, to: 10 },
36
+ { from: 5, to: 15 },
37
+ ]
38
+ },
39
+ expected: { ranges: [{ from: 0, to: 15 }] },
40
+ },
41
+ {
42
+ name: 'two adjacent ranges',
43
+ input: {
44
+ ranges: [
45
+ { from: 0, to: 10 },
46
+ { from: 10, to: 20 },
47
+ ]
48
+ },
49
+ expected: { ranges: [{ from: 0, to: 20 }] },
50
+ },
51
+ {
52
+ name: 'multiple overlapping ranges',
53
+ input: {
54
+ ranges: [
55
+ { from: 0, to: 10 },
56
+ { from: 5, to: 15 },
57
+ { from: 12, to: 20 },
58
+ ]
59
+ },
60
+ expected: { ranges: [{ from: 0, to: 20 }] },
61
+ },
62
+ {
63
+ name: 'inner range',
64
+ input: {
65
+ ranges: [
66
+ { from: 0, to: 20 },
67
+ { from: 5, to: 15 },
68
+ ]
69
+ },
70
+ expected: { ranges: [{ from: 0, to: 20 }] },
71
+ },
72
+ {
73
+ name: 'inner range with outer range',
74
+ input: {
75
+ ranges: [
76
+ { from: 5, to: 15 },
77
+ { from: 20, to: 30 },
78
+ { from: 0, to: 20 },
79
+ ]
80
+ },
81
+ expected: { ranges: [{ from: 0, to: 30 }] },
82
+ },
83
+ {
84
+ name: 'more than 1 range in expected',
85
+ input: {
86
+ ranges: [
87
+ { from: 25, to: 30},
88
+ { from: 20, to: 25 },
89
+
90
+ { from: 0, to: 8 },
91
+ { from: 2, to: 10 },
92
+ { from: 1, to: 9 },
93
+
94
+ { from: 40, to: 50 },
95
+ { from: 45, to: 47 }
96
+ ]
97
+ },
98
+ expected: { ranges: [
99
+ { from: 0, to: 10 },
100
+ { from: 20, to: 30 },
101
+ { from: 40, to: 50 },
102
+ ] },
103
+ }
104
+ ];
105
+
106
+ for (const tc of cases) {
107
+ it(tc.name, () => {
108
+ normalizeRanges(tc.input);
109
+
110
+ expect(tc.input).toEqual(tc.expected);
111
+ });
112
+ }
113
+ });
114
+
115
+
@@ -0,0 +1,93 @@
1
+ import { z } from "zod";
2
+ import * as fs from 'node:fs/promises';
3
+ import { RangeBytes } from "@milaboratories/pl-model-common";
4
+ import { createPathAtomically, MiLogger } from "@milaboratories/ts-helpers";
5
+ import { CorruptedRangesError } from "./cache";
6
+
7
+ /** The content of the ranges file: ranges of bytes.
8
+ * The ranges should be normalized: sorted and no overlaps.
9
+ * For that, use `normalizeRanges` function. */
10
+ const Ranges = z.object({
11
+ ranges: z.array(RangeBytes),
12
+ });
13
+
14
+ export type Ranges = z.infer<typeof Ranges>;
15
+
16
+ export const rangesFilePostfix = '.ranges.json';
17
+
18
+ export function rangesFileName(fPath: string): string {
19
+ return fPath + rangesFilePostfix;
20
+ }
21
+
22
+ export async function readRangesFile(logger: MiLogger, path: string): Promise<Ranges> {
23
+ let ranges: Ranges = { ranges: [] };
24
+ try {
25
+ const file = await fs.readFile(path, 'utf8');
26
+ ranges = Ranges.parse(JSON.parse(file));
27
+ } catch (e: unknown) {
28
+
29
+ if (e instanceof SyntaxError || e instanceof z.ZodError) {
30
+ const msg = `readRangesFile: the file ${path} was corrupted: ${e}`;
31
+ logger.error(msg);
32
+ throw new CorruptedRangesError(msg);
33
+ }
34
+
35
+ if (!(e instanceof Error && 'code' in e && e.code === 'ENOENT')) {
36
+ throw e;
37
+ }
38
+
39
+ // If the file does not exist, assume the ranges are empty.
40
+ }
41
+
42
+ normalizeRanges(ranges);
43
+
44
+ return ranges;
45
+ }
46
+
47
+ /** Writes to a temporal file and then renames it atomically. */
48
+ export async function writeRangesFile(logger: MiLogger, path: string, ranges: Ranges) {
49
+ await createPathAtomically(logger, path, async (tempPath: string) => {
50
+ await fs.writeFile(tempPath, JSON.stringify(ranges, null, 2), { flag: 'wx' });
51
+ });
52
+ }
53
+
54
+ /** Sorts and merges overlapping ranges. */
55
+ export function normalizeRanges(s: Ranges) {
56
+ s.ranges.sort((a, b) => a.from - b.from);
57
+
58
+ for (let i = 0; i < s.ranges.length - 1; i++) {
59
+ if (s.ranges[i].to >= s.ranges[i + 1].from) {
60
+ mergeRanges(s, i);
61
+ i--;
62
+ }
63
+ }
64
+ }
65
+
66
+ function mergeRanges(s: Ranges, i: number) {
67
+ const from = Math.min(s.ranges[i].from, s.ranges[i + 1].from);
68
+ const to = Math.max(s.ranges[i].to, s.ranges[i + 1].to);
69
+
70
+ s.ranges.splice(i, 2, { from, to });
71
+ }
72
+
73
+ export function rangesSize(s: Ranges) {
74
+ return s.ranges.reduce((acc, range) => acc + range.to - range.from, 0);
75
+ }
76
+
77
+ export function doesRangeExist(allRanges: Ranges, range: RangeBytes): boolean {
78
+ for (const r of allRanges.ranges) {
79
+ if (r.from <= range.from && range.to <= r.to) {
80
+ return true;
81
+ }
82
+ }
83
+
84
+ return false;
85
+ }
86
+
87
+ export function addRange(s: Ranges, range: RangeBytes) {
88
+ s.ranges.push(range);
89
+ normalizeRanges(s);
90
+
91
+ return s;
92
+ }
93
+
@@ -58,7 +58,7 @@ export class DownloadUrlDriver implements DownloadUrlSyncReader {
58
58
  httpClient: Dispatcher,
59
59
  private readonly saveDir: string,
60
60
  private readonly opts: DownloadUrlDriverOps = {
61
- cacheSoftSizeBytes: 50 * 1024 * 1024,
61
+ cacheSoftSizeBytes: 1 * 1024 * 1024 * 1024, // 1 GB
62
62
  withGunzip: true,
63
63
  nConcurrentDownloads: 50,
64
64
  },
@@ -3,19 +3,20 @@
3
3
 
4
4
  import type { Signer } from '@milaboratories/ts-helpers';
5
5
  import type { OnDemandBlobResourceSnapshot } from '../types';
6
- import type { RemoteBlobHandle, RangeBytes } from '@milaboratories/pl-model-common';
7
- import { bigintToResourceId, ResourceId, ResourceType } from '@milaboratories/pl-client';
6
+ import type { RemoteBlobHandle } from '@milaboratories/pl-model-common';
7
+ import { bigintToResourceId } from '@milaboratories/pl-client';
8
8
  import { ResourceInfo } from '@milaboratories/pl-tree';
9
+ import { getSize } from '../types';
9
10
 
10
- // https://regex101.com/r/Q4YdTa/4
11
+ // https://regex101.com/r/Q4YdTa/5
11
12
  const remoteHandleRegex
12
- = /^blob\+remote:\/\/download\/(?<content>(?<resourceType>.+)\/(?<resourceVersion>.+?)\/(?<resourceId>\d+?))#(?<signature>.*)$/;
13
+ = /^blob\+remote:\/\/download\/(?<content>(?<resourceType>.+)\/(?<resourceVersion>.+?)\/(?<resourceId>\d+?)\/(?<size>\d+?))#(?<signature>.*)$/;
13
14
 
14
15
  export function newRemoteHandle(
15
16
  rInfo: OnDemandBlobResourceSnapshot,
16
17
  signer: Signer,
17
18
  ): RemoteBlobHandle {
18
- let content = `${rInfo.type.name}/${rInfo.type.version}/${BigInt(rInfo.id)}`;
19
+ let content = `${rInfo.type.name}/${rInfo.type.version}/${BigInt(rInfo.id)}/${getSize(rInfo)}`;
19
20
 
20
21
  return `blob+remote://download/${content}#${signer.sign(content)}` as RemoteBlobHandle;
21
22
  }
@@ -24,18 +25,24 @@ export function isRemoteBlobHandle(handle: string): handle is RemoteBlobHandle {
24
25
  return Boolean(handle.match(remoteHandleRegex));
25
26
  }
26
27
 
27
- export function parseRemoteHandle(handle: RemoteBlobHandle, signer: Signer): ResourceInfo {
28
+ export function parseRemoteHandle(handle: RemoteBlobHandle, signer: Signer): {
29
+ info: ResourceInfo;
30
+ size: number;
31
+ } {
28
32
  const parsed = handle.match(remoteHandleRegex);
29
33
  if (parsed === null) {
30
34
  throw new Error(`Remote handle is malformed: ${handle}, matches: ${parsed}`);
31
35
  }
32
36
 
33
- const { content, resourceType, resourceVersion, resourceId, signature } = parsed.groups!;
37
+ const { content, resourceType, resourceVersion, resourceId, size, signature } = parsed.groups!;
34
38
 
35
39
  signer.verify(content, signature, `Signature verification failed for ${handle}`);
36
40
 
37
41
  return {
38
- id: bigintToResourceId(BigInt(resourceId)),
39
- type: { name: resourceType, version: resourceVersion },
42
+ info:{
43
+ id: bigintToResourceId(BigInt(resourceId)),
44
+ type: { name: resourceType, version: resourceVersion },
45
+ },
46
+ size: Number(size),
40
47
  };
41
48
  }
@@ -25,6 +25,12 @@ import { DownloadDriver } from './download_blob/download_blob';
25
25
  import { LogsDriver } from './logs';
26
26
  import { LogsStreamDriver } from './logs_stream';
27
27
 
28
+ const downloadDriverOps = {
29
+ cacheSoftSizeBytes: 700 * 1024,
30
+ nConcurrentDownloads: 10,
31
+ rangesCacheMaxSizeBytes: 1024,
32
+ };
33
+
28
34
  test('should get all logs', async () => {
29
35
  await TestHelpers.withTempRoot(async (client) => {
30
36
  const logger = new ConsoleLoggerAdapter();
@@ -35,13 +41,15 @@ test('should get all logs', async () => {
35
41
  });
36
42
  const logsStream = new LogsStreamDriver(logger, createLogsClient(client, logger));
37
43
  const dir = await fsp.mkdtemp(path.join(os.tmpdir(), 'test-logs-1-'));
38
- const download = new DownloadDriver(
44
+ const rangesCacheDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'test-logs-ranges'));
45
+ const download = await DownloadDriver.init(
39
46
  logger,
40
47
  createDownloadClient(logger, client, []),
41
48
  createLogsClient(client, logger),
42
49
  dir,
50
+ rangesCacheDir,
43
51
  new HmacSha256Signer(HmacSha256Signer.generateSecret()),
44
- { cacheSoftSizeBytes: 700 * 1024, nConcurrentDownloads: 10 },
52
+ downloadDriverOps,
45
53
  );
46
54
  const logs = new LogsDriver(logger, logsStream, download);
47
55
 
@@ -80,13 +88,15 @@ test('should get last line with a prefix', async () => {
80
88
  });
81
89
  const logsStream = new LogsStreamDriver(logger, createLogsClient(client, logger));
82
90
  const dir = await fsp.mkdtemp(path.join(os.tmpdir(), 'test-logs-2-'));
83
- const download = new DownloadDriver(
91
+ const rangesCacheDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'test-logs-ranges'));
92
+ const download = await DownloadDriver.init(
84
93
  logger,
85
94
  createDownloadClient(logger, client, []),
86
95
  createLogsClient(client, logger),
87
96
  dir,
97
+ rangesCacheDir,
88
98
  new HmacSha256Signer(HmacSha256Signer.generateSecret()),
89
- { cacheSoftSizeBytes: 700 * 1024, nConcurrentDownloads: 10 },
99
+ downloadDriverOps,
90
100
  );
91
101
  const logs = new LogsDriver(logger, logsStream, download);
92
102
 
@@ -133,13 +143,15 @@ test('should get log smart object and get log lines from that', async () => {
133
143
  });
134
144
  const logsStream = new LogsStreamDriver(logger, createLogsClient(client, logger));
135
145
  const dir = await fsp.mkdtemp(path.join(os.tmpdir(), 'test-logs-3-'));
136
- const download = new DownloadDriver(
146
+ const rangesCacheDir = await fsp.mkdtemp(path.join(os.tmpdir(), 'test-logs-ranges'));
147
+ const download = await DownloadDriver.init(
137
148
  logger,
138
149
  createDownloadClient(logger, client, []),
139
150
  createLogsClient(client, logger),
140
151
  dir,
152
+ rangesCacheDir,
141
153
  new HmacSha256Signer(HmacSha256Signer.generateSecret()),
142
- { cacheSoftSizeBytes: 700 * 1024, nConcurrentDownloads: 10 },
154
+ downloadDriverOps,
143
155
  );
144
156
  const logs = new LogsDriver(logger, logsStream, download);
145
157