dicom-curate 0.26.2 → 0.28.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/esm/applyMappingsWorker.js +72 -32
- package/dist/esm/collectMappings.js +3 -3
- package/dist/esm/config/dicom/tagConversion.js +1 -1
- package/dist/esm/config/sampleCompositeSpecification.js +1 -1
- package/dist/esm/curateDict.js +3 -3
- package/dist/esm/curateOne.js +71 -32
- package/dist/esm/deidentifyPS315E.js +3 -3
- package/dist/esm/hash.js +28 -4
- package/dist/esm/index.js +113 -38
- package/dist/esm/mappingWorkerPool.js +32 -5
- package/dist/esm/s3Client.js +1 -1
- package/dist/esm/scanDirectoryWorker.js +37 -3
- package/dist/types/applyMappingsWorker.d.ts +1 -0
- package/dist/types/curateOne.d.ts +2 -1
- package/dist/types/hash.d.ts +1 -1
- package/dist/types/mappingWorkerPool.d.ts +12 -1
- package/dist/types/scanDirectoryWorker.d.ts +2 -0
- package/dist/types/types.d.ts +3 -1
- package/dist/umd/dicom-curate.umd.js +300 -83
- package/dist/umd/dicom-curate.umd.js.map +1 -1
- package/dist/umd/dicom-curate.umd.min.js +7 -7
- package/dist/umd/dicom-curate.umd.min.js.map +1 -1
- package/package.json +1 -1
|
@@ -50,7 +50,7 @@
|
|
|
50
50
|
const { createRequire } = await Promise.resolve().then(function () { return _polyfillNode_module; });
|
|
51
51
|
const req = createRequire((_documentCurrentScript && _documentCurrentScript.tagName.toUpperCase() === 'SCRIPT' && _documentCurrentScript.src || new URL('scanDirectoryWorker.js', document.baseURI).href));
|
|
52
52
|
const mod = req('@aws-sdk/client-s3');
|
|
53
|
-
cachedS3Client = mod?.default
|
|
53
|
+
cachedS3Client = mod?.default ?? mod;
|
|
54
54
|
}
|
|
55
55
|
else {
|
|
56
56
|
// browser-friendly dynamic import -> code-split chunk
|
|
@@ -95,6 +95,31 @@
|
|
|
95
95
|
'.ds_store',
|
|
96
96
|
];
|
|
97
97
|
let keepScanning = true;
|
|
98
|
+
// Backpressure gate: when the main thread signals 'stop', the scan worker
|
|
99
|
+
// awaits this promise before emitting the next file. 'resume' resolves it.
|
|
100
|
+
let pauseResolve = null;
|
|
101
|
+
let pausePromise = null;
|
|
102
|
+
function pauseScanning() {
|
|
103
|
+
if (!pausePromise) {
|
|
104
|
+
pausePromise = new Promise((resolve) => {
|
|
105
|
+
pauseResolve = resolve;
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
function resumeScanning() {
|
|
110
|
+
if (pauseResolve) {
|
|
111
|
+
pauseResolve();
|
|
112
|
+
pauseResolve = null;
|
|
113
|
+
pausePromise = null;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
/** If paused, wait until resumed. Returns false if scanning was aborted. */
|
|
117
|
+
async function waitIfPaused() {
|
|
118
|
+
if (pausePromise) {
|
|
119
|
+
await pausePromise;
|
|
120
|
+
}
|
|
121
|
+
return keepScanning;
|
|
122
|
+
}
|
|
98
123
|
let excludedFiletypes = [];
|
|
99
124
|
// Compiled regexes from glob patterns, used to exclude files by path
|
|
100
125
|
let excludedPathRegexes = [];
|
|
@@ -272,7 +297,13 @@
|
|
|
272
297
|
break;
|
|
273
298
|
}
|
|
274
299
|
case 'stop': {
|
|
275
|
-
|
|
300
|
+
// Pause scanning — the scan loop will await waitIfPaused()
|
|
301
|
+
pauseScanning();
|
|
302
|
+
break;
|
|
303
|
+
}
|
|
304
|
+
case 'resume': {
|
|
305
|
+
// Resume scanning after a pause
|
|
306
|
+
resumeScanning();
|
|
276
307
|
break;
|
|
277
308
|
}
|
|
278
309
|
default:
|
|
@@ -356,6 +387,9 @@
|
|
|
356
387
|
for await (const entry of dir.values()) {
|
|
357
388
|
if (!keepScanning)
|
|
358
389
|
return;
|
|
390
|
+
// Backpressure: if the main thread paused us, wait here until resumed
|
|
391
|
+
if (!(await waitIfPaused()))
|
|
392
|
+
return;
|
|
359
393
|
if (entry.kind === 'file') {
|
|
360
394
|
const file = await entry.getFile();
|
|
361
395
|
const fileAnomalies = [];
|
|
@@ -422,6 +456,9 @@
|
|
|
422
456
|
for (const entry of entries) {
|
|
423
457
|
if (!keepScanning)
|
|
424
458
|
return;
|
|
459
|
+
// Backpressure: if the main thread paused us, wait here until resumed
|
|
460
|
+
if (!(await waitIfPaused()))
|
|
461
|
+
return;
|
|
425
462
|
if (entry.isFile()) {
|
|
426
463
|
const filePath = path.join(currentPath, entry.name);
|
|
427
464
|
const stats = await fs.stat(filePath);
|
|
@@ -28715,7 +28752,7 @@
|
|
|
28715
28752
|
// For private tags (which don't have keywords), keep as-is
|
|
28716
28753
|
const tagId = isPrivateTag(keyword)
|
|
28717
28754
|
? keyword
|
|
28718
|
-
: data$1.DicomMetaDictionary.nameMap[keyword]?.tag
|
|
28755
|
+
: (data$1.DicomMetaDictionary.nameMap[keyword]?.tag ?? keyword);
|
|
28719
28756
|
// Remove parentheses and commas, convert to the format used in dictionary keys
|
|
28720
28757
|
return tagId.replace(/[(),]/g, '').toLowerCase();
|
|
28721
28758
|
}
|
|
@@ -46996,7 +47033,7 @@
|
|
|
46996
47033
|
// Deal with dcmjs quirk of labeling retired tags with a
|
|
46997
47034
|
// "RETIRED_" prefix
|
|
46998
47035
|
function getVr(keyword) {
|
|
46999
|
-
const element = nameMap[keyword]
|
|
47036
|
+
const element = nameMap[keyword] ?? nameMap[`RETIRED_${keyword}`];
|
|
47000
47037
|
return element?.vr;
|
|
47001
47038
|
}
|
|
47002
47039
|
function temporalVr(vr) {
|
|
@@ -47071,7 +47108,7 @@
|
|
|
47071
47108
|
}
|
|
47072
47109
|
}
|
|
47073
47110
|
}
|
|
47074
|
-
return current[tagName]
|
|
47111
|
+
return current[tagName] ?? null;
|
|
47075
47112
|
}
|
|
47076
47113
|
const { cleanDescriptorsOption, cleanDescriptorsExceptions, retainLongitudinalTemporalInformationOptions, retainPatientCharacteristicsOption, retainDeviceIdentityOption, retainUIDsOption, retainSafePrivateOption, retainInstitutionIdentityOption, } = dicomPS315EOptions;
|
|
47077
47114
|
const taggedps315EEls = ps315EElements.reduce((acc, item) => {
|
|
@@ -48717,17 +48754,20 @@
|
|
|
48717
48754
|
|
|
48718
48755
|
var crcExports = requireCrc();
|
|
48719
48756
|
|
|
48720
|
-
|
|
48757
|
+
const DEFAULT_HASH_PART_SIZE = 5 * 1024 * 1024; // 5 MB — matches @aws-sdk/lib-storage default
|
|
48758
|
+
async function hash(buffer, hashMethod, hashPartSize) {
|
|
48721
48759
|
switch (hashMethod) {
|
|
48722
48760
|
case 'sha256':
|
|
48723
48761
|
return await sha256Hex(buffer);
|
|
48724
48762
|
case 'crc32':
|
|
48725
48763
|
return crc32Hex(buffer);
|
|
48726
|
-
case 'md5':
|
|
48727
|
-
return md5Hex(buffer);
|
|
48728
48764
|
case 'crc64':
|
|
48729
|
-
default:
|
|
48730
48765
|
return crc64Hex(buffer);
|
|
48766
|
+
case 'aws-s3-etag-2025':
|
|
48767
|
+
return awsS3Etag(buffer, hashPartSize ?? DEFAULT_HASH_PART_SIZE);
|
|
48768
|
+
case 'md5':
|
|
48769
|
+
default:
|
|
48770
|
+
return md5Hex(buffer);
|
|
48731
48771
|
}
|
|
48732
48772
|
}
|
|
48733
48773
|
// helper: compute sha256 hex
|
|
@@ -48739,6 +48779,49 @@
|
|
|
48739
48779
|
function md5Hex(buffer) {
|
|
48740
48780
|
return md5(new Uint8Array(buffer));
|
|
48741
48781
|
}
|
|
48782
|
+
/**
|
|
48783
|
+
* Compute a hash that matches the S3 ETag for the given buffer.
|
|
48784
|
+
*
|
|
48785
|
+
* - Single-part (buffer.byteLength <= partSize): plain MD5 hex string.
|
|
48786
|
+
* This matches the documented S3 ETag behaviour for objects created via
|
|
48787
|
+
* PUT Object with SSE-S3 (AES256) encryption.
|
|
48788
|
+
*
|
|
48789
|
+
* - Multi-part (buffer.byteLength > partSize): the undocumented but stable
|
|
48790
|
+
* composite format md5(concat(md5_raw(part1) … md5_raw(partN)))-N
|
|
48791
|
+
* that S3 returns for objects created via the Multipart Upload API.
|
|
48792
|
+
*/
|
|
48793
|
+
function awsS3Etag(buffer, partSize) {
|
|
48794
|
+
if (buffer.byteLength <= partSize) {
|
|
48795
|
+
return md5Hex(buffer);
|
|
48796
|
+
}
|
|
48797
|
+
return multipartMd5(buffer, partSize);
|
|
48798
|
+
}
|
|
48799
|
+
/**
|
|
48800
|
+
* Reproduce the S3 multipart ETag for a buffer given a known part size.
|
|
48801
|
+
*
|
|
48802
|
+
* Algorithm (empirically stable since ~2006, undocumented by AWS):
|
|
48803
|
+
* 1. Split buffer into ceil(size / partSize) chunks
|
|
48804
|
+
* 2. Compute raw MD5 (16 bytes) of each chunk
|
|
48805
|
+
* 3. Concatenate all raw digests
|
|
48806
|
+
* 4. Compute MD5 of the concatenation → hex
|
|
48807
|
+
* 5. Append "-" + number of parts
|
|
48808
|
+
*/
|
|
48809
|
+
function multipartMd5(buffer, partSize) {
|
|
48810
|
+
const totalSize = buffer.byteLength;
|
|
48811
|
+
const partCount = Math.ceil(totalSize / partSize);
|
|
48812
|
+
const rawDigests = new Uint8Array(partCount * 16);
|
|
48813
|
+
for (let i = 0; i < partCount; i++) {
|
|
48814
|
+
const start = i * partSize;
|
|
48815
|
+
const end = Math.min(start + partSize, totalSize);
|
|
48816
|
+
const partBuffer = buffer.slice(start, end);
|
|
48817
|
+
// md5() returns a 32-char hex string; convert to 16 raw bytes
|
|
48818
|
+
const hex = md5(new Uint8Array(partBuffer));
|
|
48819
|
+
for (let j = 0; j < 16; j++) {
|
|
48820
|
+
rawDigests[i * 16 + j] = parseInt(hex.slice(j * 2, j * 2 + 2), 16);
|
|
48821
|
+
}
|
|
48822
|
+
}
|
|
48823
|
+
return `${md5(rawDigests)}-${partCount}`;
|
|
48824
|
+
}
|
|
48742
48825
|
// helper: compute crc32 hex (use js-crc). Accepts ArrayBuffer and returns
|
|
48743
48826
|
// lowercase, zero-padded 8-character hex string.
|
|
48744
48827
|
// Accept ArrayBuffer, Uint8Array or Node Buffer and always compute the CRC32
|
|
@@ -48827,7 +48910,7 @@
|
|
|
48827
48910
|
const { createRequire } = await Promise.resolve().then(function () { return _polyfillNode_module; });
|
|
48828
48911
|
const req = createRequire((_documentCurrentScript && _documentCurrentScript.tagName.toUpperCase() === 'SCRIPT' && _documentCurrentScript.src || new URL('applyMappingsWorker.js', document.baseURI).href));
|
|
48829
48912
|
const mod = req('@aws-sdk/client-s3');
|
|
48830
|
-
cachedS3Client = mod?.default
|
|
48913
|
+
cachedS3Client = mod?.default ?? mod;
|
|
48831
48914
|
}
|
|
48832
48915
|
else {
|
|
48833
48916
|
// browser-friendly dynamic import -> code-split chunk
|
|
@@ -48836,7 +48919,7 @@
|
|
|
48836
48919
|
return cachedS3Client;
|
|
48837
48920
|
}
|
|
48838
48921
|
|
|
48839
|
-
async function curateOne({ fileInfo, outputTarget, mappingOptions, hashMethod, previousSourceFileInfo, previousMappedFileInfo, }) {
|
|
48922
|
+
async function curateOne({ fileInfo, outputTarget, mappingOptions, hashMethod, hashPartSize, previousSourceFileInfo, previousMappedFileInfo, }) {
|
|
48840
48923
|
const startTime = performance.now();
|
|
48841
48924
|
let mtime;
|
|
48842
48925
|
// 1) Read the file (from handle or blob)
|
|
@@ -48864,7 +48947,7 @@
|
|
|
48864
48947
|
throw new Error(`Failed to fetch ${fileInfo.url}: ${resp.status} ${resp.statusText}`);
|
|
48865
48948
|
}
|
|
48866
48949
|
file = await resp.blob();
|
|
48867
|
-
const lastModifiedHeader = resp.headers.get('last-modified')
|
|
48950
|
+
const lastModifiedHeader = resp.headers.get('last-modified');
|
|
48868
48951
|
if (lastModifiedHeader) {
|
|
48869
48952
|
mtime = new Date(lastModifiedHeader).toISOString();
|
|
48870
48953
|
}
|
|
@@ -48937,7 +49020,9 @@
|
|
|
48937
49020
|
}
|
|
48938
49021
|
}
|
|
48939
49022
|
// 3) read bytes (needed for deep hash)
|
|
48940
|
-
|
|
49023
|
+
// Use let so we can null the reference after the last use, allowing GC to
|
|
49024
|
+
// reclaim the buffer while the rest of the function (upload, hashing) runs.
|
|
49025
|
+
let fileArrayBuffer = await file.arrayBuffer();
|
|
48941
49026
|
let preMappedHash;
|
|
48942
49027
|
let postMappedHash;
|
|
48943
49028
|
const postMappedHashHeader = 'x-source-file-hash';
|
|
@@ -48945,8 +49030,8 @@
|
|
|
48945
49030
|
let canSkip = false;
|
|
48946
49031
|
if (previousSourceFileInfo?.preMappedHash !== undefined) {
|
|
48947
49032
|
try {
|
|
48948
|
-
// choose hashing algorithm: default to
|
|
48949
|
-
preMappedHash = await hash(fileArrayBuffer, hashMethod
|
|
49033
|
+
// choose hashing algorithm: default to md5 for S3 ETag compatibility
|
|
49034
|
+
preMappedHash = await hash(fileArrayBuffer, hashMethod ?? 'md5', hashPartSize);
|
|
48950
49035
|
}
|
|
48951
49036
|
catch (e) {
|
|
48952
49037
|
console.warn(`Failed to compute preMappedHash for ${fileInfo.name}`, e);
|
|
@@ -49059,8 +49144,8 @@
|
|
|
49059
49144
|
// If we didn't compute preMappedHash yet, do it now
|
|
49060
49145
|
if (!preMappedHash) {
|
|
49061
49146
|
try {
|
|
49062
|
-
// choose hashing algorithm: default to
|
|
49063
|
-
preMappedHash = await hash(fileArrayBuffer, hashMethod
|
|
49147
|
+
// choose hashing algorithm: default to md5 for S3 ETag compatibility
|
|
49148
|
+
preMappedHash = await hash(fileArrayBuffer, hashMethod ?? 'md5', hashPartSize);
|
|
49064
49149
|
}
|
|
49065
49150
|
catch (e) {
|
|
49066
49151
|
console.warn(`Failed to compute preMappedHash for ${fileInfo.name}`, e);
|
|
@@ -49077,7 +49162,13 @@
|
|
|
49077
49162
|
allowInvalidVRLength: true,
|
|
49078
49163
|
});
|
|
49079
49164
|
// Always calculate post-mapped hash even if deep compare is not requested
|
|
49080
|
-
postMappedHash = await hash(modifiedArrayBuffer, hashMethod
|
|
49165
|
+
postMappedHash = await hash(modifiedArrayBuffer, hashMethod ?? 'md5', hashPartSize);
|
|
49166
|
+
// Release the original file buffer — the modifiedArrayBuffer is all we
|
|
49167
|
+
// need from this point. In the passthrough case (no header changes),
|
|
49168
|
+
// modifiedArrayBuffer === fileArrayBuffer so the data stays alive through
|
|
49169
|
+
// that reference; in the modified case, fileArrayBuffer is a separate
|
|
49170
|
+
// allocation that can now be GC'd.
|
|
49171
|
+
fileArrayBuffer = null;
|
|
49081
49172
|
const previousPostMappedHash = previousMappedFileInfo
|
|
49082
49173
|
? previousMappedFileInfo(clonedMapResults.outputFilePath)?.postMappedHash
|
|
49083
49174
|
: undefined;
|
|
@@ -49116,15 +49207,16 @@
|
|
|
49116
49207
|
const fullFilePath = path.join(fullDirPath, fileName);
|
|
49117
49208
|
await fs.writeFile(fullFilePath, new DataView(modifiedArrayBuffer));
|
|
49118
49209
|
}
|
|
49119
|
-
else {
|
|
49210
|
+
else if (!outputTarget?.http && !outputTarget?.s3) {
|
|
49211
|
+
// Only create mappedBlob when there is no output target at all (no
|
|
49212
|
+
// directory, no HTTP endpoint, no S3 bucket). When an upload target is
|
|
49213
|
+
// present the blob has already been consumed and keeping it around
|
|
49214
|
+
// retains the full file content in memory for every processed file,
|
|
49215
|
+
// causing OOM crashes at scale.
|
|
49120
49216
|
clonedMapResults.mappedBlob = new Blob([modifiedArrayBuffer], {
|
|
49121
49217
|
type: 'application/octet-stream',
|
|
49122
49218
|
});
|
|
49123
49219
|
}
|
|
49124
|
-
// If no directory or even if directory present, expose mappedBlob for consumers
|
|
49125
|
-
clonedMapResults.mappedBlob = new Blob([modifiedArrayBuffer], {
|
|
49126
|
-
type: 'application/octet-stream',
|
|
49127
|
-
});
|
|
49128
49220
|
// If upload URL (bucket) is provided, perform an HTTP PUT upload to the server
|
|
49129
49221
|
if (outputTarget?.http) {
|
|
49130
49222
|
try {
|
|
@@ -49137,32 +49229,36 @@
|
|
|
49137
49229
|
const uploadUrl = `${outputTarget.http.url}/${key}`;
|
|
49138
49230
|
// Create headers per helper described by the user
|
|
49139
49231
|
const headers = {
|
|
49140
|
-
'Content-Type':
|
|
49232
|
+
'Content-Type': 'application/octet-stream',
|
|
49141
49233
|
'X-File-Name': fileName,
|
|
49142
|
-
'X-File-Type':
|
|
49234
|
+
'X-File-Type': 'application/octet-stream',
|
|
49143
49235
|
'X-File-Size': String(modifiedArrayBuffer.byteLength),
|
|
49144
|
-
'X-Source-File-Size': String(clonedMapResults.fileInfo?.size
|
|
49145
|
-
'X-Source-File-Modified-Time': mtime
|
|
49146
|
-
'X-Source-File-Hash': preMappedHash
|
|
49236
|
+
'X-Source-File-Size': String(clonedMapResults.fileInfo?.size ?? ''),
|
|
49237
|
+
'X-Source-File-Modified-Time': mtime ?? '',
|
|
49238
|
+
'X-Source-File-Hash': preMappedHash ?? '',
|
|
49147
49239
|
};
|
|
49148
49240
|
if (outputTarget.http.headers) {
|
|
49149
49241
|
Object.assign(headers, outputTarget.http.headers);
|
|
49150
49242
|
}
|
|
49151
49243
|
if (postMappedHashHeader && postMappedHash)
|
|
49152
49244
|
headers[postMappedHashHeader] = postMappedHash;
|
|
49245
|
+
// Send the ArrayBuffer directly instead of wrapping in a Blob first —
|
|
49246
|
+
// avoids an extra copy in memory.
|
|
49153
49247
|
const resp = await fetchWithRetry(uploadUrl, {
|
|
49154
49248
|
method: 'PUT',
|
|
49155
49249
|
headers,
|
|
49156
|
-
body:
|
|
49250
|
+
body: new Blob([modifiedArrayBuffer], {
|
|
49251
|
+
type: 'application/octet-stream',
|
|
49252
|
+
}),
|
|
49157
49253
|
});
|
|
49158
49254
|
if (!resp.ok) {
|
|
49159
49255
|
console.error(`Upload failed for ${uploadUrl}: ${resp.status} ${resp.statusText}`);
|
|
49160
|
-
clonedMapResults.errors = clonedMapResults.errors
|
|
49256
|
+
clonedMapResults.errors = clonedMapResults.errors ?? [];
|
|
49161
49257
|
clonedMapResults.errors.push(`Upload failed: ${resp.status} ${resp.statusText}`);
|
|
49162
49258
|
}
|
|
49163
49259
|
else {
|
|
49164
49260
|
// attach upload info if available
|
|
49165
|
-
clonedMapResults.outputUpload = clonedMapResults.outputUpload
|
|
49261
|
+
clonedMapResults.outputUpload = clonedMapResults.outputUpload ?? {
|
|
49166
49262
|
url: uploadUrl,
|
|
49167
49263
|
status: resp.status,
|
|
49168
49264
|
};
|
|
@@ -49170,7 +49266,7 @@
|
|
|
49170
49266
|
}
|
|
49171
49267
|
catch (e) {
|
|
49172
49268
|
console.error('Upload error', e);
|
|
49173
|
-
clonedMapResults.errors = clonedMapResults.errors
|
|
49269
|
+
clonedMapResults.errors = clonedMapResults.errors ?? [];
|
|
49174
49270
|
clonedMapResults.errors.push(`Upload error: ${e instanceof Error ? e.message : String(e)}`);
|
|
49175
49271
|
}
|
|
49176
49272
|
}
|
|
@@ -49193,12 +49289,14 @@
|
|
|
49193
49289
|
await client.send(new s3.PutObjectCommand({
|
|
49194
49290
|
Bucket: outputTarget.s3.bucketName,
|
|
49195
49291
|
Key: key,
|
|
49196
|
-
|
|
49197
|
-
|
|
49292
|
+
// Use the ArrayBuffer directly — going through Blob.arrayBuffer()
|
|
49293
|
+
// would create yet another copy of the data in memory.
|
|
49294
|
+
Body: new Uint8Array(modifiedArrayBuffer),
|
|
49295
|
+
ContentType: 'application/octet-stream',
|
|
49198
49296
|
Metadata: {
|
|
49199
|
-
'source-file-size': String(clonedMapResults.fileInfo?.size
|
|
49200
|
-
'source-file-modified-time': mtime
|
|
49201
|
-
'source-file-hash': preMappedHash
|
|
49297
|
+
'source-file-size': String(clonedMapResults.fileInfo?.size ?? ''),
|
|
49298
|
+
'source-file-modified-time': mtime ?? '',
|
|
49299
|
+
'source-file-hash': preMappedHash ?? '',
|
|
49202
49300
|
...(postMappedHash
|
|
49203
49301
|
? { 'source-file-post-mapped-hash': postMappedHash }
|
|
49204
49302
|
: {}),
|
|
@@ -49213,7 +49311,7 @@
|
|
|
49213
49311
|
}
|
|
49214
49312
|
catch (e) {
|
|
49215
49313
|
console.error('S3 Upload error', e);
|
|
49216
|
-
clonedMapResults.errors = clonedMapResults.errors
|
|
49314
|
+
clonedMapResults.errors = clonedMapResults.errors ?? [];
|
|
49217
49315
|
clonedMapResults.errors.push(`S3 Upload error: ${e instanceof Error ? e.message : String(e)}`);
|
|
49218
49316
|
}
|
|
49219
49317
|
}
|
|
@@ -62464,6 +62562,7 @@
|
|
|
62464
62562
|
fileInfo,
|
|
62465
62563
|
outputTarget: event.data.outputTarget ?? {},
|
|
62466
62564
|
hashMethod: event.data.hashMethod,
|
|
62565
|
+
hashPartSize: event.data.hashPartSize,
|
|
62467
62566
|
mappingOptions,
|
|
62468
62567
|
previousSourceFileInfo: event.data.previousFileInfo,
|
|
62469
62568
|
previousMappedFileInfo: (targetName) => {
|
|
@@ -90771,7 +90870,7 @@
|
|
|
90771
90870
|
// For private tags (which don't have keywords), keep as-is
|
|
90772
90871
|
const tagId = isPrivateTag(keyword)
|
|
90773
90872
|
? keyword
|
|
90774
|
-
: data$1.DicomMetaDictionary.nameMap[keyword]?.tag
|
|
90873
|
+
: (data$1.DicomMetaDictionary.nameMap[keyword]?.tag ?? keyword);
|
|
90775
90874
|
// Remove parentheses and commas, convert to the format used in dictionary keys
|
|
90776
90875
|
return tagId.replace(/[(),]/g, '').toLowerCase();
|
|
90777
90876
|
}
|
|
@@ -109053,7 +109152,7 @@
|
|
|
109053
109152
|
// Deal with dcmjs quirk of labeling retired tags with a
|
|
109054
109153
|
// "RETIRED_" prefix
|
|
109055
109154
|
function getVr(keyword) {
|
|
109056
|
-
const element = nameMap[keyword]
|
|
109155
|
+
const element = nameMap[keyword] ?? nameMap[`RETIRED_${keyword}`];
|
|
109057
109156
|
return element?.vr;
|
|
109058
109157
|
}
|
|
109059
109158
|
function temporalVr(vr) {
|
|
@@ -109128,7 +109227,7 @@
|
|
|
109128
109227
|
}
|
|
109129
109228
|
}
|
|
109130
109229
|
}
|
|
109131
|
-
return current[tagName]
|
|
109230
|
+
return current[tagName] ?? null;
|
|
109132
109231
|
}
|
|
109133
109232
|
const { cleanDescriptorsOption, cleanDescriptorsExceptions, retainLongitudinalTemporalInformationOptions, retainPatientCharacteristicsOption, retainDeviceIdentityOption, retainUIDsOption, retainSafePrivateOption, retainInstitutionIdentityOption, } = dicomPS315EOptions;
|
|
109134
109233
|
const taggedps315EEls = ps315EElements.reduce((acc, item) => {
|
|
@@ -110768,17 +110867,20 @@
|
|
|
110768
110867
|
|
|
110769
110868
|
var crcExports = requireCrc();
|
|
110770
110869
|
|
|
110771
|
-
|
|
110870
|
+
const DEFAULT_HASH_PART_SIZE = 5 * 1024 * 1024; // 5 MB — matches @aws-sdk/lib-storage default
|
|
110871
|
+
async function hash(buffer, hashMethod, hashPartSize) {
|
|
110772
110872
|
switch (hashMethod) {
|
|
110773
110873
|
case 'sha256':
|
|
110774
110874
|
return await sha256Hex(buffer);
|
|
110775
110875
|
case 'crc32':
|
|
110776
110876
|
return crc32Hex(buffer);
|
|
110777
|
-
case 'md5':
|
|
110778
|
-
return md5Hex(buffer);
|
|
110779
110877
|
case 'crc64':
|
|
110780
|
-
default:
|
|
110781
110878
|
return crc64Hex(buffer);
|
|
110879
|
+
case 'aws-s3-etag-2025':
|
|
110880
|
+
return awsS3Etag(buffer, hashPartSize ?? DEFAULT_HASH_PART_SIZE);
|
|
110881
|
+
case 'md5':
|
|
110882
|
+
default:
|
|
110883
|
+
return md5Hex(buffer);
|
|
110782
110884
|
}
|
|
110783
110885
|
}
|
|
110784
110886
|
// helper: compute sha256 hex
|
|
@@ -110790,6 +110892,49 @@
|
|
|
110790
110892
|
function md5Hex(buffer) {
|
|
110791
110893
|
return md5(new Uint8Array(buffer));
|
|
110792
110894
|
}
|
|
110895
|
+
/**
|
|
110896
|
+
* Compute a hash that matches the S3 ETag for the given buffer.
|
|
110897
|
+
*
|
|
110898
|
+
* - Single-part (buffer.byteLength <= partSize): plain MD5 hex string.
|
|
110899
|
+
* This matches the documented S3 ETag behaviour for objects created via
|
|
110900
|
+
* PUT Object with SSE-S3 (AES256) encryption.
|
|
110901
|
+
*
|
|
110902
|
+
* - Multi-part (buffer.byteLength > partSize): the undocumented but stable
|
|
110903
|
+
* composite format md5(concat(md5_raw(part1) … md5_raw(partN)))-N
|
|
110904
|
+
* that S3 returns for objects created via the Multipart Upload API.
|
|
110905
|
+
*/
|
|
110906
|
+
function awsS3Etag(buffer, partSize) {
|
|
110907
|
+
if (buffer.byteLength <= partSize) {
|
|
110908
|
+
return md5Hex(buffer);
|
|
110909
|
+
}
|
|
110910
|
+
return multipartMd5(buffer, partSize);
|
|
110911
|
+
}
|
|
110912
|
+
/**
|
|
110913
|
+
* Reproduce the S3 multipart ETag for a buffer given a known part size.
|
|
110914
|
+
*
|
|
110915
|
+
* Algorithm (empirically stable since ~2006, undocumented by AWS):
|
|
110916
|
+
* 1. Split buffer into ceil(size / partSize) chunks
|
|
110917
|
+
* 2. Compute raw MD5 (16 bytes) of each chunk
|
|
110918
|
+
* 3. Concatenate all raw digests
|
|
110919
|
+
* 4. Compute MD5 of the concatenation → hex
|
|
110920
|
+
* 5. Append "-" + number of parts
|
|
110921
|
+
*/
|
|
110922
|
+
function multipartMd5(buffer, partSize) {
|
|
110923
|
+
const totalSize = buffer.byteLength;
|
|
110924
|
+
const partCount = Math.ceil(totalSize / partSize);
|
|
110925
|
+
const rawDigests = new Uint8Array(partCount * 16);
|
|
110926
|
+
for (let i = 0; i < partCount; i++) {
|
|
110927
|
+
const start = i * partSize;
|
|
110928
|
+
const end = Math.min(start + partSize, totalSize);
|
|
110929
|
+
const partBuffer = buffer.slice(start, end);
|
|
110930
|
+
// md5() returns a 32-char hex string; convert to 16 raw bytes
|
|
110931
|
+
const hex = md5(new Uint8Array(partBuffer));
|
|
110932
|
+
for (let j = 0; j < 16; j++) {
|
|
110933
|
+
rawDigests[i * 16 + j] = parseInt(hex.slice(j * 2, j * 2 + 2), 16);
|
|
110934
|
+
}
|
|
110935
|
+
}
|
|
110936
|
+
return `${md5(rawDigests)}-${partCount}`;
|
|
110937
|
+
}
|
|
110793
110938
|
// helper: compute crc32 hex (use js-crc). Accepts ArrayBuffer and returns
|
|
110794
110939
|
// lowercase, zero-padded 8-character hex string.
|
|
110795
110940
|
// Accept ArrayBuffer, Uint8Array or Node Buffer and always compute the CRC32
|
|
@@ -110878,7 +111023,7 @@
|
|
|
110878
111023
|
const { createRequire } = await Promise.resolve().then(function () { return _polyfillNode_module; });
|
|
110879
111024
|
const req = createRequire((typeof document === 'undefined' && typeof location === 'undefined' ? require('u' + 'rl').pathToFileURL(__filename).href : typeof document === 'undefined' ? location.href : (_documentCurrentScript && _documentCurrentScript.tagName.toUpperCase() === 'SCRIPT' && _documentCurrentScript.src || new URL('dicom-curate.umd.js', document.baseURI).href)));
|
|
110880
111025
|
const mod = req('@aws-sdk/client-s3');
|
|
110881
|
-
cachedS3Client = mod?.default
|
|
111026
|
+
cachedS3Client = mod?.default ?? mod;
|
|
110882
111027
|
}
|
|
110883
111028
|
else {
|
|
110884
111029
|
// browser-friendly dynamic import -> code-split chunk
|
|
@@ -110887,7 +111032,7 @@
|
|
|
110887
111032
|
return cachedS3Client;
|
|
110888
111033
|
}
|
|
110889
111034
|
|
|
110890
|
-
async function curateOne({ fileInfo, outputTarget, mappingOptions, hashMethod, previousSourceFileInfo, previousMappedFileInfo, }) {
|
|
111035
|
+
async function curateOne({ fileInfo, outputTarget, mappingOptions, hashMethod, hashPartSize, previousSourceFileInfo, previousMappedFileInfo, }) {
|
|
110891
111036
|
const startTime = performance.now();
|
|
110892
111037
|
let mtime;
|
|
110893
111038
|
// 1) Read the file (from handle or blob)
|
|
@@ -110915,7 +111060,7 @@
|
|
|
110915
111060
|
throw new Error(`Failed to fetch ${fileInfo.url}: ${resp.status} ${resp.statusText}`);
|
|
110916
111061
|
}
|
|
110917
111062
|
file = await resp.blob();
|
|
110918
|
-
const lastModifiedHeader = resp.headers.get('last-modified')
|
|
111063
|
+
const lastModifiedHeader = resp.headers.get('last-modified');
|
|
110919
111064
|
if (lastModifiedHeader) {
|
|
110920
111065
|
mtime = new Date(lastModifiedHeader).toISOString();
|
|
110921
111066
|
}
|
|
@@ -110988,7 +111133,9 @@
|
|
|
110988
111133
|
}
|
|
110989
111134
|
}
|
|
110990
111135
|
// 3) read bytes (needed for deep hash)
|
|
110991
|
-
|
|
111136
|
+
// Use let so we can null the reference after the last use, allowing GC to
|
|
111137
|
+
// reclaim the buffer while the rest of the function (upload, hashing) runs.
|
|
111138
|
+
let fileArrayBuffer = await file.arrayBuffer();
|
|
110992
111139
|
let preMappedHash;
|
|
110993
111140
|
let postMappedHash;
|
|
110994
111141
|
const postMappedHashHeader = 'x-source-file-hash';
|
|
@@ -110996,8 +111143,8 @@
|
|
|
110996
111143
|
let canSkip = false;
|
|
110997
111144
|
if (previousSourceFileInfo?.preMappedHash !== undefined) {
|
|
110998
111145
|
try {
|
|
110999
|
-
// choose hashing algorithm: default to
|
|
111000
|
-
preMappedHash = await hash(fileArrayBuffer, hashMethod
|
|
111146
|
+
// choose hashing algorithm: default to md5 for S3 ETag compatibility
|
|
111147
|
+
preMappedHash = await hash(fileArrayBuffer, hashMethod ?? 'md5', hashPartSize);
|
|
111001
111148
|
}
|
|
111002
111149
|
catch (e) {
|
|
111003
111150
|
console.warn(`Failed to compute preMappedHash for ${fileInfo.name}`, e);
|
|
@@ -111110,8 +111257,8 @@
|
|
|
111110
111257
|
// If we didn't compute preMappedHash yet, do it now
|
|
111111
111258
|
if (!preMappedHash) {
|
|
111112
111259
|
try {
|
|
111113
|
-
// choose hashing algorithm: default to
|
|
111114
|
-
preMappedHash = await hash(fileArrayBuffer, hashMethod
|
|
111260
|
+
// choose hashing algorithm: default to md5 for S3 ETag compatibility
|
|
111261
|
+
preMappedHash = await hash(fileArrayBuffer, hashMethod ?? 'md5', hashPartSize);
|
|
111115
111262
|
}
|
|
111116
111263
|
catch (e) {
|
|
111117
111264
|
console.warn(`Failed to compute preMappedHash for ${fileInfo.name}`, e);
|
|
@@ -111128,7 +111275,13 @@
|
|
|
111128
111275
|
allowInvalidVRLength: true,
|
|
111129
111276
|
});
|
|
111130
111277
|
// Always calculate post-mapped hash even if deep compare is not requested
|
|
111131
|
-
postMappedHash = await hash(modifiedArrayBuffer, hashMethod
|
|
111278
|
+
postMappedHash = await hash(modifiedArrayBuffer, hashMethod ?? 'md5', hashPartSize);
|
|
111279
|
+
// Release the original file buffer — the modifiedArrayBuffer is all we
|
|
111280
|
+
// need from this point. In the passthrough case (no header changes),
|
|
111281
|
+
// modifiedArrayBuffer === fileArrayBuffer so the data stays alive through
|
|
111282
|
+
// that reference; in the modified case, fileArrayBuffer is a separate
|
|
111283
|
+
// allocation that can now be GC'd.
|
|
111284
|
+
fileArrayBuffer = null;
|
|
111132
111285
|
const previousPostMappedHash = previousMappedFileInfo
|
|
111133
111286
|
? previousMappedFileInfo(clonedMapResults.outputFilePath)?.postMappedHash
|
|
111134
111287
|
: undefined;
|
|
@@ -111167,15 +111320,16 @@
|
|
|
111167
111320
|
const fullFilePath = path.join(fullDirPath, fileName);
|
|
111168
111321
|
await fs.writeFile(fullFilePath, new DataView(modifiedArrayBuffer));
|
|
111169
111322
|
}
|
|
111170
|
-
else {
|
|
111323
|
+
else if (!outputTarget?.http && !outputTarget?.s3) {
|
|
111324
|
+
// Only create mappedBlob when there is no output target at all (no
|
|
111325
|
+
// directory, no HTTP endpoint, no S3 bucket). When an upload target is
|
|
111326
|
+
// present the blob has already been consumed and keeping it around
|
|
111327
|
+
// retains the full file content in memory for every processed file,
|
|
111328
|
+
// causing OOM crashes at scale.
|
|
111171
111329
|
clonedMapResults.mappedBlob = new Blob([modifiedArrayBuffer], {
|
|
111172
111330
|
type: 'application/octet-stream',
|
|
111173
111331
|
});
|
|
111174
111332
|
}
|
|
111175
|
-
// If no directory or even if directory present, expose mappedBlob for consumers
|
|
111176
|
-
clonedMapResults.mappedBlob = new Blob([modifiedArrayBuffer], {
|
|
111177
|
-
type: 'application/octet-stream',
|
|
111178
|
-
});
|
|
111179
111333
|
// If upload URL (bucket) is provided, perform an HTTP PUT upload to the server
|
|
111180
111334
|
if (outputTarget?.http) {
|
|
111181
111335
|
try {
|
|
@@ -111188,32 +111342,36 @@
|
|
|
111188
111342
|
const uploadUrl = `${outputTarget.http.url}/${key}`;
|
|
111189
111343
|
// Create headers per helper described by the user
|
|
111190
111344
|
const headers = {
|
|
111191
|
-
'Content-Type':
|
|
111345
|
+
'Content-Type': 'application/octet-stream',
|
|
111192
111346
|
'X-File-Name': fileName,
|
|
111193
|
-
'X-File-Type':
|
|
111347
|
+
'X-File-Type': 'application/octet-stream',
|
|
111194
111348
|
'X-File-Size': String(modifiedArrayBuffer.byteLength),
|
|
111195
|
-
'X-Source-File-Size': String(clonedMapResults.fileInfo?.size
|
|
111196
|
-
'X-Source-File-Modified-Time': mtime
|
|
111197
|
-
'X-Source-File-Hash': preMappedHash
|
|
111349
|
+
'X-Source-File-Size': String(clonedMapResults.fileInfo?.size ?? ''),
|
|
111350
|
+
'X-Source-File-Modified-Time': mtime ?? '',
|
|
111351
|
+
'X-Source-File-Hash': preMappedHash ?? '',
|
|
111198
111352
|
};
|
|
111199
111353
|
if (outputTarget.http.headers) {
|
|
111200
111354
|
Object.assign(headers, outputTarget.http.headers);
|
|
111201
111355
|
}
|
|
111202
111356
|
if (postMappedHashHeader && postMappedHash)
|
|
111203
111357
|
headers[postMappedHashHeader] = postMappedHash;
|
|
111358
|
+
// Send the ArrayBuffer directly instead of wrapping in a Blob first —
|
|
111359
|
+
// avoids an extra copy in memory.
|
|
111204
111360
|
const resp = await fetchWithRetry(uploadUrl, {
|
|
111205
111361
|
method: 'PUT',
|
|
111206
111362
|
headers,
|
|
111207
|
-
body:
|
|
111363
|
+
body: new Blob([modifiedArrayBuffer], {
|
|
111364
|
+
type: 'application/octet-stream',
|
|
111365
|
+
}),
|
|
111208
111366
|
});
|
|
111209
111367
|
if (!resp.ok) {
|
|
111210
111368
|
console.error(`Upload failed for ${uploadUrl}: ${resp.status} ${resp.statusText}`);
|
|
111211
|
-
clonedMapResults.errors = clonedMapResults.errors
|
|
111369
|
+
clonedMapResults.errors = clonedMapResults.errors ?? [];
|
|
111212
111370
|
clonedMapResults.errors.push(`Upload failed: ${resp.status} ${resp.statusText}`);
|
|
111213
111371
|
}
|
|
111214
111372
|
else {
|
|
111215
111373
|
// attach upload info if available
|
|
111216
|
-
clonedMapResults.outputUpload = clonedMapResults.outputUpload
|
|
111374
|
+
clonedMapResults.outputUpload = clonedMapResults.outputUpload ?? {
|
|
111217
111375
|
url: uploadUrl,
|
|
111218
111376
|
status: resp.status,
|
|
111219
111377
|
};
|
|
@@ -111221,7 +111379,7 @@
|
|
|
111221
111379
|
}
|
|
111222
111380
|
catch (e) {
|
|
111223
111381
|
console.error('Upload error', e);
|
|
111224
|
-
clonedMapResults.errors = clonedMapResults.errors
|
|
111382
|
+
clonedMapResults.errors = clonedMapResults.errors ?? [];
|
|
111225
111383
|
clonedMapResults.errors.push(`Upload error: ${e instanceof Error ? e.message : String(e)}`);
|
|
111226
111384
|
}
|
|
111227
111385
|
}
|
|
@@ -111244,12 +111402,14 @@
|
|
|
111244
111402
|
await client.send(new s3.PutObjectCommand({
|
|
111245
111403
|
Bucket: outputTarget.s3.bucketName,
|
|
111246
111404
|
Key: key,
|
|
111247
|
-
|
|
111248
|
-
|
|
111405
|
+
// Use the ArrayBuffer directly — going through Blob.arrayBuffer()
|
|
111406
|
+
// would create yet another copy of the data in memory.
|
|
111407
|
+
Body: new Uint8Array(modifiedArrayBuffer),
|
|
111408
|
+
ContentType: 'application/octet-stream',
|
|
111249
111409
|
Metadata: {
|
|
111250
|
-
'source-file-size': String(clonedMapResults.fileInfo?.size
|
|
111251
|
-
'source-file-modified-time': mtime
|
|
111252
|
-
'source-file-hash': preMappedHash
|
|
111410
|
+
'source-file-size': String(clonedMapResults.fileInfo?.size ?? ''),
|
|
111411
|
+
'source-file-modified-time': mtime ?? '',
|
|
111412
|
+
'source-file-hash': preMappedHash ?? '',
|
|
111253
111413
|
...(postMappedHash
|
|
111254
111414
|
? { 'source-file-post-mapped-hash': postMappedHash }
|
|
111255
111415
|
: {}),
|
|
@@ -111264,7 +111424,7 @@
|
|
|
111264
111424
|
}
|
|
111265
111425
|
catch (e) {
|
|
111266
111426
|
console.error('S3 Upload error', e);
|
|
111267
|
-
clonedMapResults.errors = clonedMapResults.errors
|
|
111427
|
+
clonedMapResults.errors = clonedMapResults.errors ?? [];
|
|
111268
111428
|
clonedMapResults.errors.push(`S3 Upload error: ${e instanceof Error ? e.message : String(e)}`);
|
|
111269
111429
|
}
|
|
111270
111430
|
}
|
|
@@ -126760,16 +126920,40 @@
|
|
|
126760
126920
|
let scanAnomalies = [];
|
|
126761
126921
|
// Callbacks set by curateMany, stored here for use by the dispatch loop.
|
|
126762
126922
|
let progressCallback = () => { };
|
|
126923
|
+
// Callback to resume the scan worker when the processing queue drains below
|
|
126924
|
+
// the low-water mark. Set by curateMany via setScanResumeCallback().
|
|
126925
|
+
let scanResumeCallback = null;
|
|
126926
|
+
let scanPaused = false;
|
|
126927
|
+
/**
|
|
126928
|
+
* Low-water mark for the file processing queue. When the queue size drops
|
|
126929
|
+
* below this threshold after a dispatch, the scan worker is resumed.
|
|
126930
|
+
*/
|
|
126931
|
+
const LOW_WATER_MARK = 50;
|
|
126763
126932
|
// -------------------------------------------------------------------------
|
|
126764
126933
|
// Public API
|
|
126765
126934
|
// -------------------------------------------------------------------------
|
|
126766
126935
|
function setMappingWorkerOptions(opts) {
|
|
126767
126936
|
mappingWorkerOptions = opts;
|
|
126768
126937
|
}
|
|
126938
|
+
/**
|
|
126939
|
+
* Register a callback that resumes the scan worker. Called by curateMany
|
|
126940
|
+
* after the scan worker is created.
|
|
126941
|
+
*/
|
|
126942
|
+
function setScanResumeCallback(cb) {
|
|
126943
|
+
scanResumeCallback = cb;
|
|
126944
|
+
scanPaused = false;
|
|
126945
|
+
}
|
|
126946
|
+
/**
|
|
126947
|
+
* Mark the scan as paused. Called from the scan worker message handler in
|
|
126948
|
+
* index.ts when the queue exceeds the high-water mark.
|
|
126949
|
+
*/
|
|
126950
|
+
function markScanPaused() {
|
|
126951
|
+
scanPaused = true;
|
|
126952
|
+
}
|
|
126769
126953
|
/**
|
|
126770
126954
|
* Initialize the mapping worker pool. Call once per curateMany invocation.
|
|
126771
126955
|
*/
|
|
126772
|
-
async function initializeMappingWorkers(skipCollectingMappings, fileInfoIndex, progressCb) {
|
|
126956
|
+
async function initializeMappingWorkers(skipCollectingMappings, fileInfoIndex, progressCb, workerCount) {
|
|
126773
126957
|
mappingWorkerOptions = {};
|
|
126774
126958
|
workersActive = 0;
|
|
126775
126959
|
mapResultsList = skipCollectingMappings ? undefined : [];
|
|
@@ -126783,8 +126967,8 @@
|
|
|
126783
126967
|
scanAnomalies = [];
|
|
126784
126968
|
if (progressCb)
|
|
126785
126969
|
progressCallback = progressCb;
|
|
126786
|
-
const
|
|
126787
|
-
const workers = await Promise.all(Array.from({ length:
|
|
126970
|
+
const effectiveWorkerCount = workerCount ?? Math.min(await getHardwareConcurrency(), 8);
|
|
126971
|
+
const workers = await Promise.all(Array.from({ length: effectiveWorkerCount }, () => createMappingWorker(fileInfoIndex)));
|
|
126788
126972
|
availableMappingWorkers.push(...workers);
|
|
126789
126973
|
}
|
|
126790
126974
|
/**
|
|
@@ -126799,7 +126983,7 @@
|
|
|
126799
126983
|
// Track which file this worker is processing so we can identify it
|
|
126800
126984
|
// if the worker crashes.
|
|
126801
126985
|
workerCurrentFile.set(mappingWorker, fileInfo);
|
|
126802
|
-
const { outputTarget, hashMethod, ...mappingOptions } =
|
|
126986
|
+
const { outputTarget, hashMethod, hashPartSize, ...mappingOptions } =
|
|
126803
126987
|
// Not partial anymore.
|
|
126804
126988
|
mappingWorkerOptions;
|
|
126805
126989
|
mappingWorker.postMessage({
|
|
@@ -126808,10 +126992,20 @@
|
|
|
126808
126992
|
outputTarget: await getHttpOutputHeaders(outputTarget),
|
|
126809
126993
|
previousFileInfo,
|
|
126810
126994
|
hashMethod,
|
|
126995
|
+
hashPartSize,
|
|
126811
126996
|
serializedMappingOptions: serializeMappingOptions(mappingOptions),
|
|
126812
126997
|
});
|
|
126813
126998
|
workersActive += 1;
|
|
126814
126999
|
}
|
|
127000
|
+
// Backpressure: resume the scan worker when the queue drains below the
|
|
127001
|
+
// low-water mark. This prevents the queue from staying empty while the
|
|
127002
|
+
// scan worker is paused.
|
|
127003
|
+
if (scanPaused &&
|
|
127004
|
+
filesToProcess.length < LOW_WATER_MARK &&
|
|
127005
|
+
scanResumeCallback) {
|
|
127006
|
+
scanPaused = false;
|
|
127007
|
+
scanResumeCallback();
|
|
127008
|
+
}
|
|
126815
127009
|
if (workersActive === 0 &&
|
|
126816
127010
|
pendingReplacements === 0 &&
|
|
126817
127011
|
directoryScanFinished &&
|
|
@@ -126851,6 +127045,18 @@
|
|
|
126851
127045
|
// -------------------------------------------------------------------------
|
|
126852
127046
|
// Internal helpers
|
|
126853
127047
|
// -------------------------------------------------------------------------
|
|
127048
|
+
/**
|
|
127049
|
+
* Return the number of logical CPUs available, working in both browser and
|
|
127050
|
+
* Node.js environments. Falls back to `os.cpus().length` when the global
|
|
127051
|
+
* `navigator` object is not available (Node.js < 21).
|
|
127052
|
+
*/
|
|
127053
|
+
async function getHardwareConcurrency() {
|
|
127054
|
+
if (typeof navigator !== 'undefined' && navigator.hardwareConcurrency) {
|
|
127055
|
+
return navigator.hardwareConcurrency;
|
|
127056
|
+
}
|
|
127057
|
+
const { cpus } = await import('node:os');
|
|
127058
|
+
return cpus().length;
|
|
127059
|
+
}
|
|
126854
127060
|
/**
|
|
126855
127061
|
* Recover from a mapping worker crash. Returns the worker slot, counts the
|
|
126856
127062
|
* in-flight file as a mapping error, and re-dispatches. Called from onerror,
|
|
@@ -127049,10 +127255,14 @@
|
|
|
127049
127255
|
scanAnomalies: [], // Files sent to processing have no scan anomalies
|
|
127050
127256
|
previousFileInfo,
|
|
127051
127257
|
});
|
|
127052
|
-
//
|
|
127053
|
-
//
|
|
127054
|
-
//
|
|
127055
|
-
|
|
127258
|
+
// Backpressure: when the queue grows too large, pause the scan
|
|
127259
|
+
// worker so file handles don't accumulate unboundedly in memory.
|
|
127260
|
+
// The scan worker supports 'stop' and 'resume' commands.
|
|
127261
|
+
const HIGH_WATER_MARK = 100;
|
|
127262
|
+
if (filesToProcess.length > HIGH_WATER_MARK) {
|
|
127263
|
+
fileListWorker.postMessage({ request: 'stop' });
|
|
127264
|
+
markScanPaused();
|
|
127265
|
+
}
|
|
127056
127266
|
dispatchMappingJobs();
|
|
127057
127267
|
break;
|
|
127058
127268
|
}
|
|
@@ -127123,6 +127333,7 @@
|
|
|
127123
127333
|
const skipModifications = organizeOptions.skipModifications ?? false;
|
|
127124
127334
|
const skipValidation = organizeOptions.skipValidation ?? false;
|
|
127125
127335
|
const hashMethod = organizeOptions.hashMethod;
|
|
127336
|
+
const hashPartSize = organizeOptions.hashPartSize;
|
|
127126
127337
|
const dateOffset = organizeOptions.dateOffset;
|
|
127127
127338
|
if (requiresDateOffset(deIdOpts) && !dateOffset?.match(iso8601)) {
|
|
127128
127339
|
throw new Error('When using "Offset" for retainLongitudinalTemporalInformationOptions, an iso8601 compatible dateOffset must be provided.');
|
|
@@ -127136,6 +127347,7 @@
|
|
|
127136
127347
|
skipValidation,
|
|
127137
127348
|
dateOffset,
|
|
127138
127349
|
hashMethod,
|
|
127350
|
+
hashPartSize,
|
|
127139
127351
|
};
|
|
127140
127352
|
}
|
|
127141
127353
|
function queueFilesForMapping(organizeOptions) {
|
|
@@ -127215,7 +127427,7 @@
|
|
|
127215
127427
|
};
|
|
127216
127428
|
try {
|
|
127217
127429
|
// create the mapping workers
|
|
127218
|
-
await initializeMappingWorkers(organizeOptions.skipCollectingMappings, organizeOptions.fileInfoIndex, progressCallback);
|
|
127430
|
+
await initializeMappingWorkers(organizeOptions.skipCollectingMappings, organizeOptions.fileInfoIndex, progressCallback, organizeOptions.workerCount);
|
|
127219
127431
|
// Set global mappingWorkerOptions
|
|
127220
127432
|
setMappingWorkerOptions((await collectMappingOptions(organizeOptions)));
|
|
127221
127433
|
//
|
|
@@ -127228,6 +127440,11 @@
|
|
|
127228
127440
|
organizeOptions.inputType === 'path' ||
|
|
127229
127441
|
organizeOptions.inputType === 's3') {
|
|
127230
127442
|
const fileListWorker = await initializeFileListWorker(rejectCallback);
|
|
127443
|
+
// Wire up backpressure resume: when the dispatch loop drains the
|
|
127444
|
+
// queue below the low-water mark, it calls this to resume scanning.
|
|
127445
|
+
setScanResumeCallback(() => {
|
|
127446
|
+
fileListWorker.postMessage({ request: 'resume' });
|
|
127447
|
+
});
|
|
127231
127448
|
let specExcludedFiletypes;
|
|
127232
127449
|
let noDicomSignatureCheck = false;
|
|
127233
127450
|
let noDefaultExclusions = false;
|