dicom-curate 0.26.2 → 0.28.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/esm/applyMappingsWorker.js +72 -32
- package/dist/esm/collectMappings.js +3 -3
- package/dist/esm/config/dicom/tagConversion.js +1 -1
- package/dist/esm/config/sampleCompositeSpecification.js +1 -1
- package/dist/esm/curateDict.js +3 -3
- package/dist/esm/curateOne.js +71 -32
- package/dist/esm/deidentifyPS315E.js +3 -3
- package/dist/esm/hash.js +28 -4
- package/dist/esm/index.js +113 -38
- package/dist/esm/mappingWorkerPool.js +32 -5
- package/dist/esm/s3Client.js +1 -1
- package/dist/esm/scanDirectoryWorker.js +37 -3
- package/dist/types/applyMappingsWorker.d.ts +1 -0
- package/dist/types/curateOne.d.ts +2 -1
- package/dist/types/hash.d.ts +1 -1
- package/dist/types/mappingWorkerPool.d.ts +12 -1
- package/dist/types/scanDirectoryWorker.d.ts +2 -0
- package/dist/types/types.d.ts +3 -1
- package/dist/umd/dicom-curate.umd.js +300 -83
- package/dist/umd/dicom-curate.umd.js.map +1 -1
- package/dist/umd/dicom-curate.umd.min.js +7 -7
- package/dist/umd/dicom-curate.umd.min.js.map +1 -1
- package/package.json +1 -1
package/dist/esm/index.js
CHANGED
|
@@ -79567,7 +79567,7 @@ function isPrivateTag(tagId) {
|
|
|
79567
79567
|
return false;
|
|
79568
79568
|
}
|
|
79569
79569
|
function convertKeywordToTagId(keyword) {
|
|
79570
|
-
const tagId = isPrivateTag(keyword) ? keyword : dcmjs.data.DicomMetaDictionary.nameMap[keyword]?.tag
|
|
79570
|
+
const tagId = isPrivateTag(keyword) ? keyword : dcmjs.data.DicomMetaDictionary.nameMap[keyword]?.tag ?? keyword;
|
|
79571
79571
|
return tagId.replace(/[(),]/g, "").toLowerCase();
|
|
79572
79572
|
}
|
|
79573
79573
|
function convertKeywordPathToTagIdPath(keywordPath) {
|
|
@@ -80427,7 +80427,7 @@ function getCid7050Codes(options) {
|
|
|
80427
80427
|
var import_lodash = __toESM(require_lodash(), 1);
|
|
80428
80428
|
var nameMap = dcmjs2.data.DicomMetaDictionary.nameMap;
|
|
80429
80429
|
function getVr(keyword) {
|
|
80430
|
-
const element = nameMap[keyword]
|
|
80430
|
+
const element = nameMap[keyword] ?? nameMap[`RETIRED_${keyword}`];
|
|
80431
80431
|
return element?.vr;
|
|
80432
80432
|
}
|
|
80433
80433
|
function temporalVr(vr) {
|
|
@@ -80499,7 +80499,7 @@ function deidentifyPS315E({
|
|
|
80499
80499
|
}
|
|
80500
80500
|
}
|
|
80501
80501
|
}
|
|
80502
|
-
return current2[tagName]
|
|
80502
|
+
return current2[tagName] ?? null;
|
|
80503
80503
|
}
|
|
80504
80504
|
const {
|
|
80505
80505
|
cleanDescriptorsOption,
|
|
@@ -81248,17 +81248,20 @@ async function fetchWithRetry(...args) {
|
|
|
81248
81248
|
// src/hash.ts
|
|
81249
81249
|
var import_md5 = __toESM(require_md5(), 1);
|
|
81250
81250
|
var import_js_crc = __toESM(require_crc(), 1);
|
|
81251
|
-
|
|
81251
|
+
var DEFAULT_HASH_PART_SIZE = 5 * 1024 * 1024;
|
|
81252
|
+
async function hash(buffer, hashMethod, hashPartSize) {
|
|
81252
81253
|
switch (hashMethod) {
|
|
81253
81254
|
case "sha256":
|
|
81254
81255
|
return await sha256Hex(buffer);
|
|
81255
81256
|
case "crc32":
|
|
81256
81257
|
return crc32Hex(buffer);
|
|
81257
|
-
case "md5":
|
|
81258
|
-
return md5Hex(buffer);
|
|
81259
81258
|
case "crc64":
|
|
81260
|
-
default:
|
|
81261
81259
|
return crc64Hex(buffer);
|
|
81260
|
+
case "aws-s3-etag-2025":
|
|
81261
|
+
return awsS3Etag(buffer, hashPartSize ?? DEFAULT_HASH_PART_SIZE);
|
|
81262
|
+
case "md5":
|
|
81263
|
+
default:
|
|
81264
|
+
return md5Hex(buffer);
|
|
81262
81265
|
}
|
|
81263
81266
|
}
|
|
81264
81267
|
async function sha256Hex(buffer) {
|
|
@@ -81269,6 +81272,27 @@ async function sha256Hex(buffer) {
|
|
|
81269
81272
|
function md5Hex(buffer) {
|
|
81270
81273
|
return (0, import_md5.default)(new Uint8Array(buffer));
|
|
81271
81274
|
}
|
|
81275
|
+
function awsS3Etag(buffer, partSize) {
|
|
81276
|
+
if (buffer.byteLength <= partSize) {
|
|
81277
|
+
return md5Hex(buffer);
|
|
81278
|
+
}
|
|
81279
|
+
return multipartMd5(buffer, partSize);
|
|
81280
|
+
}
|
|
81281
|
+
function multipartMd5(buffer, partSize) {
|
|
81282
|
+
const totalSize = buffer.byteLength;
|
|
81283
|
+
const partCount = Math.ceil(totalSize / partSize);
|
|
81284
|
+
const rawDigests = new Uint8Array(partCount * 16);
|
|
81285
|
+
for (let i4 = 0; i4 < partCount; i4++) {
|
|
81286
|
+
const start = i4 * partSize;
|
|
81287
|
+
const end = Math.min(start + partSize, totalSize);
|
|
81288
|
+
const partBuffer = buffer.slice(start, end);
|
|
81289
|
+
const hex = (0, import_md5.default)(new Uint8Array(partBuffer));
|
|
81290
|
+
for (let j4 = 0; j4 < 16; j4++) {
|
|
81291
|
+
rawDigests[i4 * 16 + j4] = parseInt(hex.slice(j4 * 2, j4 * 2 + 2), 16);
|
|
81292
|
+
}
|
|
81293
|
+
}
|
|
81294
|
+
return `${(0, import_md5.default)(rawDigests)}-${partCount}`;
|
|
81295
|
+
}
|
|
81272
81296
|
function crc32Hex(input) {
|
|
81273
81297
|
let bytes;
|
|
81274
81298
|
if (input instanceof Uint8Array) {
|
|
@@ -81332,7 +81356,7 @@ async function loadS3Client() {
|
|
|
81332
81356
|
const { createRequire } = await import("module");
|
|
81333
81357
|
const req = createRequire(import.meta.url);
|
|
81334
81358
|
const mod = req("@aws-sdk/client-s3");
|
|
81335
|
-
cachedS3Client = mod?.default
|
|
81359
|
+
cachedS3Client = mod?.default ?? mod;
|
|
81336
81360
|
} else {
|
|
81337
81361
|
cachedS3Client = await Promise.resolve().then(() => __toESM(require_dist_cjs71(), 1));
|
|
81338
81362
|
}
|
|
@@ -81345,6 +81369,7 @@ async function curateOne({
|
|
|
81345
81369
|
outputTarget,
|
|
81346
81370
|
mappingOptions,
|
|
81347
81371
|
hashMethod,
|
|
81372
|
+
hashPartSize,
|
|
81348
81373
|
previousSourceFileInfo,
|
|
81349
81374
|
previousMappedFileInfo
|
|
81350
81375
|
}) {
|
|
@@ -81371,7 +81396,7 @@ async function curateOne({
|
|
|
81371
81396
|
);
|
|
81372
81397
|
}
|
|
81373
81398
|
file = await resp.blob();
|
|
81374
|
-
const lastModifiedHeader = resp.headers.get("last-modified")
|
|
81399
|
+
const lastModifiedHeader = resp.headers.get("last-modified");
|
|
81375
81400
|
if (lastModifiedHeader) {
|
|
81376
81401
|
mtime = new Date(lastModifiedHeader).toISOString();
|
|
81377
81402
|
}
|
|
@@ -81437,14 +81462,18 @@ async function curateOne({
|
|
|
81437
81462
|
} catch (e4) {
|
|
81438
81463
|
}
|
|
81439
81464
|
}
|
|
81440
|
-
|
|
81465
|
+
let fileArrayBuffer = await file.arrayBuffer();
|
|
81441
81466
|
let preMappedHash;
|
|
81442
81467
|
let postMappedHash;
|
|
81443
81468
|
const postMappedHashHeader = "x-source-file-hash";
|
|
81444
81469
|
let canSkip = false;
|
|
81445
81470
|
if (previousSourceFileInfo?.preMappedHash !== void 0) {
|
|
81446
81471
|
try {
|
|
81447
|
-
preMappedHash = await hash(
|
|
81472
|
+
preMappedHash = await hash(
|
|
81473
|
+
fileArrayBuffer,
|
|
81474
|
+
hashMethod ?? "md5",
|
|
81475
|
+
hashPartSize
|
|
81476
|
+
);
|
|
81448
81477
|
} catch (e4) {
|
|
81449
81478
|
console.warn(`Failed to compute preMappedHash for ${fileInfo.name}`, e4);
|
|
81450
81479
|
}
|
|
@@ -81549,7 +81578,11 @@ async function curateOne({
|
|
|
81549
81578
|
}
|
|
81550
81579
|
if (!preMappedHash) {
|
|
81551
81580
|
try {
|
|
81552
|
-
preMappedHash = await hash(
|
|
81581
|
+
preMappedHash = await hash(
|
|
81582
|
+
fileArrayBuffer,
|
|
81583
|
+
hashMethod ?? "md5",
|
|
81584
|
+
hashPartSize
|
|
81585
|
+
);
|
|
81553
81586
|
} catch (e4) {
|
|
81554
81587
|
console.warn(`Failed to compute preMappedHash for ${fileInfo.name}`, e4);
|
|
81555
81588
|
}
|
|
@@ -81560,7 +81593,12 @@ async function curateOne({
|
|
|
81560
81593
|
const modifiedArrayBuffer = mappedDicomData.write({
|
|
81561
81594
|
allowInvalidVRLength: true
|
|
81562
81595
|
});
|
|
81563
|
-
postMappedHash = await hash(
|
|
81596
|
+
postMappedHash = await hash(
|
|
81597
|
+
modifiedArrayBuffer,
|
|
81598
|
+
hashMethod ?? "md5",
|
|
81599
|
+
hashPartSize
|
|
81600
|
+
);
|
|
81601
|
+
fileArrayBuffer = null;
|
|
81564
81602
|
const previousPostMappedHash = previousMappedFileInfo ? previousMappedFileInfo(clonedMapResults.outputFilePath)?.postMappedHash : void 0;
|
|
81565
81603
|
if (previousPostMappedHash !== void 0 && previousPostMappedHash === postMappedHash) {
|
|
81566
81604
|
return noMapResult(clonedMapResults.outputFilePath);
|
|
@@ -81592,26 +81630,23 @@ async function curateOne({
|
|
|
81592
81630
|
}
|
|
81593
81631
|
const fullFilePath = path.join(fullDirPath, fileName);
|
|
81594
81632
|
await fs.writeFile(fullFilePath, new DataView(modifiedArrayBuffer));
|
|
81595
|
-
} else {
|
|
81633
|
+
} else if (!outputTarget?.http && !outputTarget?.s3) {
|
|
81596
81634
|
clonedMapResults.mappedBlob = new Blob([modifiedArrayBuffer], {
|
|
81597
81635
|
type: "application/octet-stream"
|
|
81598
81636
|
});
|
|
81599
81637
|
}
|
|
81600
|
-
clonedMapResults.mappedBlob = new Blob([modifiedArrayBuffer], {
|
|
81601
|
-
type: "application/octet-stream"
|
|
81602
|
-
});
|
|
81603
81638
|
if (outputTarget?.http) {
|
|
81604
81639
|
try {
|
|
81605
81640
|
const key = clonedMapResults.outputFilePath.split("/").map(encodeURIComponent).join("/");
|
|
81606
81641
|
const uploadUrl = `${outputTarget.http.url}/${key}`;
|
|
81607
81642
|
const headers = {
|
|
81608
|
-
"Content-Type":
|
|
81643
|
+
"Content-Type": "application/octet-stream",
|
|
81609
81644
|
"X-File-Name": fileName,
|
|
81610
|
-
"X-File-Type":
|
|
81645
|
+
"X-File-Type": "application/octet-stream",
|
|
81611
81646
|
"X-File-Size": String(modifiedArrayBuffer.byteLength),
|
|
81612
|
-
"X-Source-File-Size": String(clonedMapResults.fileInfo?.size
|
|
81613
|
-
"X-Source-File-Modified-Time": mtime
|
|
81614
|
-
"X-Source-File-Hash": preMappedHash
|
|
81647
|
+
"X-Source-File-Size": String(clonedMapResults.fileInfo?.size ?? ""),
|
|
81648
|
+
"X-Source-File-Modified-Time": mtime ?? "",
|
|
81649
|
+
"X-Source-File-Hash": preMappedHash ?? ""
|
|
81615
81650
|
};
|
|
81616
81651
|
if (outputTarget.http.headers) {
|
|
81617
81652
|
Object.assign(headers, outputTarget.http.headers);
|
|
@@ -81621,25 +81656,27 @@ async function curateOne({
|
|
|
81621
81656
|
const resp = await fetchWithRetry(uploadUrl, {
|
|
81622
81657
|
method: "PUT",
|
|
81623
81658
|
headers,
|
|
81624
|
-
body:
|
|
81659
|
+
body: new Blob([modifiedArrayBuffer], {
|
|
81660
|
+
type: "application/octet-stream"
|
|
81661
|
+
})
|
|
81625
81662
|
});
|
|
81626
81663
|
if (!resp.ok) {
|
|
81627
81664
|
console.error(
|
|
81628
81665
|
`Upload failed for ${uploadUrl}: ${resp.status} ${resp.statusText}`
|
|
81629
81666
|
);
|
|
81630
|
-
clonedMapResults.errors = clonedMapResults.errors
|
|
81667
|
+
clonedMapResults.errors = clonedMapResults.errors ?? [];
|
|
81631
81668
|
clonedMapResults.errors.push(
|
|
81632
81669
|
`Upload failed: ${resp.status} ${resp.statusText}`
|
|
81633
81670
|
);
|
|
81634
81671
|
} else {
|
|
81635
|
-
clonedMapResults.outputUpload = clonedMapResults.outputUpload
|
|
81672
|
+
clonedMapResults.outputUpload = clonedMapResults.outputUpload ?? {
|
|
81636
81673
|
url: uploadUrl,
|
|
81637
81674
|
status: resp.status
|
|
81638
81675
|
};
|
|
81639
81676
|
}
|
|
81640
81677
|
} catch (e4) {
|
|
81641
81678
|
console.error("Upload error", e4);
|
|
81642
|
-
clonedMapResults.errors = clonedMapResults.errors
|
|
81679
|
+
clonedMapResults.errors = clonedMapResults.errors ?? [];
|
|
81643
81680
|
clonedMapResults.errors.push(
|
|
81644
81681
|
`Upload error: ${e4 instanceof Error ? e4.message : String(e4)}`
|
|
81645
81682
|
);
|
|
@@ -81659,12 +81696,14 @@ async function curateOne({
|
|
|
81659
81696
|
new s32.PutObjectCommand({
|
|
81660
81697
|
Bucket: outputTarget.s3.bucketName,
|
|
81661
81698
|
Key: key,
|
|
81662
|
-
|
|
81663
|
-
|
|
81699
|
+
// Use the ArrayBuffer directly — going through Blob.arrayBuffer()
|
|
81700
|
+
// would create yet another copy of the data in memory.
|
|
81701
|
+
Body: new Uint8Array(modifiedArrayBuffer),
|
|
81702
|
+
ContentType: "application/octet-stream",
|
|
81664
81703
|
Metadata: {
|
|
81665
|
-
"source-file-size": String(clonedMapResults.fileInfo?.size
|
|
81666
|
-
"source-file-modified-time": mtime
|
|
81667
|
-
"source-file-hash": preMappedHash
|
|
81704
|
+
"source-file-size": String(clonedMapResults.fileInfo?.size ?? ""),
|
|
81705
|
+
"source-file-modified-time": mtime ?? "",
|
|
81706
|
+
"source-file-hash": preMappedHash ?? "",
|
|
81668
81707
|
...postMappedHash ? { "source-file-post-mapped-hash": postMappedHash } : {}
|
|
81669
81708
|
}
|
|
81670
81709
|
})
|
|
@@ -81676,7 +81715,7 @@ async function curateOne({
|
|
|
81676
81715
|
};
|
|
81677
81716
|
} catch (e4) {
|
|
81678
81717
|
console.error("S3 Upload error", e4);
|
|
81679
|
-
clonedMapResults.errors = clonedMapResults.errors
|
|
81718
|
+
clonedMapResults.errors = clonedMapResults.errors ?? [];
|
|
81680
81719
|
clonedMapResults.errors.push(
|
|
81681
81720
|
`S3 Upload error: ${e4 instanceof Error ? e4.message : String(e4)}`
|
|
81682
81721
|
);
|
|
@@ -87444,10 +87483,20 @@ function setDirectoryScanFinished(value) {
|
|
|
87444
87483
|
var scanAnomalies = [];
|
|
87445
87484
|
var progressCallback = () => {
|
|
87446
87485
|
};
|
|
87486
|
+
var scanResumeCallback = null;
|
|
87487
|
+
var scanPaused = false;
|
|
87488
|
+
var LOW_WATER_MARK = 50;
|
|
87447
87489
|
function setMappingWorkerOptions(opts) {
|
|
87448
87490
|
mappingWorkerOptions = opts;
|
|
87449
87491
|
}
|
|
87450
|
-
|
|
87492
|
+
function setScanResumeCallback(cb) {
|
|
87493
|
+
scanResumeCallback = cb;
|
|
87494
|
+
scanPaused = false;
|
|
87495
|
+
}
|
|
87496
|
+
function markScanPaused() {
|
|
87497
|
+
scanPaused = true;
|
|
87498
|
+
}
|
|
87499
|
+
async function initializeMappingWorkers(skipCollectingMappings, fileInfoIndex, progressCb, workerCount) {
|
|
87451
87500
|
mappingWorkerOptions = {};
|
|
87452
87501
|
workersActive = 0;
|
|
87453
87502
|
mapResultsList = skipCollectingMappings ? void 0 : [];
|
|
@@ -87461,9 +87510,12 @@ async function initializeMappingWorkers(skipCollectingMappings, fileInfoIndex, p
|
|
|
87461
87510
|
scanAnomalies = [];
|
|
87462
87511
|
if (progressCb)
|
|
87463
87512
|
progressCallback = progressCb;
|
|
87464
|
-
const
|
|
87513
|
+
const effectiveWorkerCount = workerCount ?? Math.min(await getHardwareConcurrency(), 8);
|
|
87465
87514
|
const workers = await Promise.all(
|
|
87466
|
-
Array.from(
|
|
87515
|
+
Array.from(
|
|
87516
|
+
{ length: effectiveWorkerCount },
|
|
87517
|
+
() => createMappingWorker(fileInfoIndex)
|
|
87518
|
+
)
|
|
87467
87519
|
);
|
|
87468
87520
|
availableMappingWorkers.push(...workers);
|
|
87469
87521
|
}
|
|
@@ -87472,7 +87524,7 @@ async function dispatchMappingJobs() {
|
|
|
87472
87524
|
const { fileInfo, previousFileInfo } = filesToProcess.pop();
|
|
87473
87525
|
const mappingWorker = availableMappingWorkers.pop();
|
|
87474
87526
|
workerCurrentFile.set(mappingWorker, fileInfo);
|
|
87475
|
-
const { outputTarget, hashMethod, ...mappingOptions } = (
|
|
87527
|
+
const { outputTarget, hashMethod, hashPartSize, ...mappingOptions } = (
|
|
87476
87528
|
// Not partial anymore.
|
|
87477
87529
|
mappingWorkerOptions
|
|
87478
87530
|
);
|
|
@@ -87482,10 +87534,15 @@ async function dispatchMappingJobs() {
|
|
|
87482
87534
|
outputTarget: await getHttpOutputHeaders(outputTarget),
|
|
87483
87535
|
previousFileInfo,
|
|
87484
87536
|
hashMethod,
|
|
87537
|
+
hashPartSize,
|
|
87485
87538
|
serializedMappingOptions: serializeMappingOptions(mappingOptions)
|
|
87486
87539
|
});
|
|
87487
87540
|
workersActive += 1;
|
|
87488
87541
|
}
|
|
87542
|
+
if (scanPaused && filesToProcess.length < LOW_WATER_MARK && scanResumeCallback) {
|
|
87543
|
+
scanPaused = false;
|
|
87544
|
+
scanResumeCallback();
|
|
87545
|
+
}
|
|
87489
87546
|
if (workersActive === 0 && pendingReplacements === 0 && directoryScanFinished && filesToProcess.length === 0) {
|
|
87490
87547
|
while (availableMappingWorkers.length) {
|
|
87491
87548
|
availableMappingWorkers.pop().terminate();
|
|
@@ -87517,6 +87574,13 @@ async function dispatchMappingJobs() {
|
|
|
87517
87574
|
});
|
|
87518
87575
|
}
|
|
87519
87576
|
}
|
|
87577
|
+
async function getHardwareConcurrency() {
|
|
87578
|
+
if (typeof navigator !== "undefined" && navigator.hardwareConcurrency) {
|
|
87579
|
+
return navigator.hardwareConcurrency;
|
|
87580
|
+
}
|
|
87581
|
+
const { cpus } = await import("os");
|
|
87582
|
+
return cpus().length;
|
|
87583
|
+
}
|
|
87520
87584
|
function recoverCrashedWorker(mappingWorker, errorMessage) {
|
|
87521
87585
|
if (!workerCurrentFile.has(mappingWorker)) {
|
|
87522
87586
|
return;
|
|
@@ -87686,6 +87750,11 @@ async function initializeFileListWorker(rejectCallback) {
|
|
|
87686
87750
|
// Files sent to processing have no scan anomalies
|
|
87687
87751
|
previousFileInfo
|
|
87688
87752
|
});
|
|
87753
|
+
const HIGH_WATER_MARK = 100;
|
|
87754
|
+
if (filesToProcess.length > HIGH_WATER_MARK) {
|
|
87755
|
+
fileListWorker.postMessage({ request: "stop" });
|
|
87756
|
+
markScanPaused();
|
|
87757
|
+
}
|
|
87689
87758
|
dispatchMappingJobs();
|
|
87690
87759
|
break;
|
|
87691
87760
|
}
|
|
@@ -87746,6 +87815,7 @@ async function collectMappingOptions(organizeOptions) {
|
|
|
87746
87815
|
const skipModifications = organizeOptions.skipModifications ?? false;
|
|
87747
87816
|
const skipValidation = organizeOptions.skipValidation ?? false;
|
|
87748
87817
|
const hashMethod = organizeOptions.hashMethod;
|
|
87818
|
+
const hashPartSize = organizeOptions.hashPartSize;
|
|
87749
87819
|
const dateOffset = organizeOptions.dateOffset;
|
|
87750
87820
|
if (requiresDateOffset(deIdOpts) && !dateOffset?.match(iso8601)) {
|
|
87751
87821
|
throw new Error(
|
|
@@ -87760,7 +87830,8 @@ async function collectMappingOptions(organizeOptions) {
|
|
|
87760
87830
|
skipModifications,
|
|
87761
87831
|
skipValidation,
|
|
87762
87832
|
dateOffset,
|
|
87763
|
-
hashMethod
|
|
87833
|
+
hashMethod,
|
|
87834
|
+
hashPartSize
|
|
87764
87835
|
};
|
|
87765
87836
|
}
|
|
87766
87837
|
function queueFilesForMapping(organizeOptions) {
|
|
@@ -87830,13 +87901,17 @@ async function curateMany(organizeOptions, onProgress) {
|
|
|
87830
87901
|
await initializeMappingWorkers(
|
|
87831
87902
|
organizeOptions.skipCollectingMappings,
|
|
87832
87903
|
organizeOptions.fileInfoIndex,
|
|
87833
|
-
progressCallback2
|
|
87904
|
+
progressCallback2,
|
|
87905
|
+
organizeOptions.workerCount
|
|
87834
87906
|
);
|
|
87835
87907
|
setMappingWorkerOptions(
|
|
87836
87908
|
await collectMappingOptions(organizeOptions)
|
|
87837
87909
|
);
|
|
87838
87910
|
if (organizeOptions.inputType === "directory" || organizeOptions.inputType === "path" || organizeOptions.inputType === "s3") {
|
|
87839
87911
|
const fileListWorker = await initializeFileListWorker(rejectCallback);
|
|
87912
|
+
setScanResumeCallback(() => {
|
|
87913
|
+
fileListWorker.postMessage({ request: "resume" });
|
|
87914
|
+
});
|
|
87840
87915
|
let specExcludedFiletypes;
|
|
87841
87916
|
let noDicomSignatureCheck = false;
|
|
87842
87917
|
let noDefaultExclusions = false;
|
|
@@ -12061,10 +12061,20 @@ function setDirectoryScanFinished(value) {
|
|
|
12061
12061
|
var scanAnomalies = [];
|
|
12062
12062
|
var progressCallback = () => {
|
|
12063
12063
|
};
|
|
12064
|
+
var scanResumeCallback = null;
|
|
12065
|
+
var scanPaused = false;
|
|
12066
|
+
var LOW_WATER_MARK = 50;
|
|
12064
12067
|
function setMappingWorkerOptions(opts) {
|
|
12065
12068
|
mappingWorkerOptions = opts;
|
|
12066
12069
|
}
|
|
12067
|
-
|
|
12070
|
+
function setScanResumeCallback(cb) {
|
|
12071
|
+
scanResumeCallback = cb;
|
|
12072
|
+
scanPaused = false;
|
|
12073
|
+
}
|
|
12074
|
+
function markScanPaused() {
|
|
12075
|
+
scanPaused = true;
|
|
12076
|
+
}
|
|
12077
|
+
async function initializeMappingWorkers(skipCollectingMappings, fileInfoIndex, progressCb, workerCount) {
|
|
12068
12078
|
mappingWorkerOptions = {};
|
|
12069
12079
|
workersActive = 0;
|
|
12070
12080
|
mapResultsList = skipCollectingMappings ? void 0 : [];
|
|
@@ -12078,9 +12088,12 @@ async function initializeMappingWorkers(skipCollectingMappings, fileInfoIndex, p
|
|
|
12078
12088
|
scanAnomalies = [];
|
|
12079
12089
|
if (progressCb)
|
|
12080
12090
|
progressCallback = progressCb;
|
|
12081
|
-
const
|
|
12091
|
+
const effectiveWorkerCount = workerCount ?? Math.min(await getHardwareConcurrency(), 8);
|
|
12082
12092
|
const workers = await Promise.all(
|
|
12083
|
-
Array.from(
|
|
12093
|
+
Array.from(
|
|
12094
|
+
{ length: effectiveWorkerCount },
|
|
12095
|
+
() => createMappingWorker(fileInfoIndex)
|
|
12096
|
+
)
|
|
12084
12097
|
);
|
|
12085
12098
|
availableMappingWorkers.push(...workers);
|
|
12086
12099
|
}
|
|
@@ -12089,7 +12102,7 @@ async function dispatchMappingJobs() {
|
|
|
12089
12102
|
const { fileInfo, previousFileInfo } = filesToProcess.pop();
|
|
12090
12103
|
const mappingWorker = availableMappingWorkers.pop();
|
|
12091
12104
|
workerCurrentFile.set(mappingWorker, fileInfo);
|
|
12092
|
-
const { outputTarget, hashMethod, ...mappingOptions } = (
|
|
12105
|
+
const { outputTarget, hashMethod, hashPartSize, ...mappingOptions } = (
|
|
12093
12106
|
// Not partial anymore.
|
|
12094
12107
|
mappingWorkerOptions
|
|
12095
12108
|
);
|
|
@@ -12099,10 +12112,15 @@ async function dispatchMappingJobs() {
|
|
|
12099
12112
|
outputTarget: await getHttpOutputHeaders(outputTarget),
|
|
12100
12113
|
previousFileInfo,
|
|
12101
12114
|
hashMethod,
|
|
12115
|
+
hashPartSize,
|
|
12102
12116
|
serializedMappingOptions: serializeMappingOptions(mappingOptions)
|
|
12103
12117
|
});
|
|
12104
12118
|
workersActive += 1;
|
|
12105
12119
|
}
|
|
12120
|
+
if (scanPaused && filesToProcess.length < LOW_WATER_MARK && scanResumeCallback) {
|
|
12121
|
+
scanPaused = false;
|
|
12122
|
+
scanResumeCallback();
|
|
12123
|
+
}
|
|
12106
12124
|
if (workersActive === 0 && pendingReplacements === 0 && directoryScanFinished && filesToProcess.length === 0) {
|
|
12107
12125
|
while (availableMappingWorkers.length) {
|
|
12108
12126
|
availableMappingWorkers.pop().terminate();
|
|
@@ -12134,6 +12152,13 @@ async function dispatchMappingJobs() {
|
|
|
12134
12152
|
});
|
|
12135
12153
|
}
|
|
12136
12154
|
}
|
|
12155
|
+
async function getHardwareConcurrency() {
|
|
12156
|
+
if (typeof navigator !== "undefined" && navigator.hardwareConcurrency) {
|
|
12157
|
+
return navigator.hardwareConcurrency;
|
|
12158
|
+
}
|
|
12159
|
+
const { cpus } = await import("os");
|
|
12160
|
+
return cpus().length;
|
|
12161
|
+
}
|
|
12137
12162
|
function recoverCrashedWorker(mappingWorker, errorMessage) {
|
|
12138
12163
|
if (!workerCurrentFile.has(mappingWorker)) {
|
|
12139
12164
|
return;
|
|
@@ -12276,7 +12301,9 @@ export {
|
|
|
12276
12301
|
getWorkerCurrentFile,
|
|
12277
12302
|
getWorkersActive,
|
|
12278
12303
|
initializeMappingWorkers,
|
|
12304
|
+
markScanPaused,
|
|
12279
12305
|
scanAnomalies,
|
|
12280
12306
|
setDirectoryScanFinished,
|
|
12281
|
-
setMappingWorkerOptions
|
|
12307
|
+
setMappingWorkerOptions,
|
|
12308
|
+
setScanResumeCallback
|
|
12282
12309
|
};
|
package/dist/esm/s3Client.js
CHANGED
|
@@ -37097,7 +37097,7 @@ async function loadS3Client() {
|
|
|
37097
37097
|
const { createRequire } = await import("module");
|
|
37098
37098
|
const req = createRequire(import.meta.url);
|
|
37099
37099
|
const mod = req("@aws-sdk/client-s3");
|
|
37100
|
-
cachedS3Client = mod?.default
|
|
37100
|
+
cachedS3Client = mod?.default ?? mod;
|
|
37101
37101
|
} else {
|
|
37102
37102
|
cachedS3Client = await Promise.resolve().then(() => __toESM(require_dist_cjs71(), 1));
|
|
37103
37103
|
}
|
|
@@ -37097,7 +37097,7 @@ async function loadS3Client() {
|
|
|
37097
37097
|
const { createRequire } = await import("module");
|
|
37098
37098
|
const req = createRequire(import.meta.url);
|
|
37099
37099
|
const mod = req("@aws-sdk/client-s3");
|
|
37100
|
-
cachedS3Client = mod?.default
|
|
37100
|
+
cachedS3Client = mod?.default ?? mod;
|
|
37101
37101
|
} else {
|
|
37102
37102
|
cachedS3Client = await Promise.resolve().then(() => __toESM(require_dist_cjs71(), 1));
|
|
37103
37103
|
}
|
|
@@ -37137,6 +37137,28 @@ var DEFAULT_EXCLUDED_FILETYPES = [
|
|
|
37137
37137
|
".ds_store"
|
|
37138
37138
|
];
|
|
37139
37139
|
var keepScanning = true;
|
|
37140
|
+
var pauseResolve = null;
|
|
37141
|
+
var pausePromise = null;
|
|
37142
|
+
function pauseScanning() {
|
|
37143
|
+
if (!pausePromise) {
|
|
37144
|
+
pausePromise = new Promise((resolve) => {
|
|
37145
|
+
pauseResolve = resolve;
|
|
37146
|
+
});
|
|
37147
|
+
}
|
|
37148
|
+
}
|
|
37149
|
+
function resumeScanning() {
|
|
37150
|
+
if (pauseResolve) {
|
|
37151
|
+
pauseResolve();
|
|
37152
|
+
pauseResolve = null;
|
|
37153
|
+
pausePromise = null;
|
|
37154
|
+
}
|
|
37155
|
+
}
|
|
37156
|
+
async function waitIfPaused() {
|
|
37157
|
+
if (pausePromise) {
|
|
37158
|
+
await pausePromise;
|
|
37159
|
+
}
|
|
37160
|
+
return keepScanning;
|
|
37161
|
+
}
|
|
37140
37162
|
var excludedFiletypes = [];
|
|
37141
37163
|
var excludedPathRegexes = [];
|
|
37142
37164
|
var noDicomSignatureCheck = false;
|
|
@@ -37294,7 +37316,11 @@ fixupNodeWorkerEnvironment().then(() => {
|
|
|
37294
37316
|
break;
|
|
37295
37317
|
}
|
|
37296
37318
|
case "stop": {
|
|
37297
|
-
|
|
37319
|
+
pauseScanning();
|
|
37320
|
+
break;
|
|
37321
|
+
}
|
|
37322
|
+
case "resume": {
|
|
37323
|
+
resumeScanning();
|
|
37298
37324
|
break;
|
|
37299
37325
|
}
|
|
37300
37326
|
default:
|
|
@@ -37371,10 +37397,16 @@ async function scanDirectory(dir) {
|
|
|
37371
37397
|
for await (const entry of dir2.values()) {
|
|
37372
37398
|
if (!keepScanning)
|
|
37373
37399
|
return;
|
|
37400
|
+
if (!await waitIfPaused())
|
|
37401
|
+
return;
|
|
37374
37402
|
if (entry.kind === "file") {
|
|
37375
37403
|
const file = await entry.getFile();
|
|
37376
37404
|
const fileAnomalies = [];
|
|
37377
|
-
if (await shouldProcessFile(
|
|
37405
|
+
if (await shouldProcessFile(
|
|
37406
|
+
file,
|
|
37407
|
+
fileAnomalies,
|
|
37408
|
+
`${prefix}/${entry.name}`
|
|
37409
|
+
)) {
|
|
37378
37410
|
const key = `${prefix}/${entry.name}`;
|
|
37379
37411
|
const prev = previousIndex ? previousIndex[key] : void 0;
|
|
37380
37412
|
globalThis.postMessage({
|
|
@@ -37434,6 +37466,8 @@ async function scanDirectoryNode(dirPath) {
|
|
|
37434
37466
|
for (const entry of entries) {
|
|
37435
37467
|
if (!keepScanning)
|
|
37436
37468
|
return;
|
|
37469
|
+
if (!await waitIfPaused())
|
|
37470
|
+
return;
|
|
37437
37471
|
if (entry.isFile()) {
|
|
37438
37472
|
const filePath = path.join(currentPath, entry.name);
|
|
37439
37473
|
const stats = await fs.stat(filePath);
|
|
@@ -4,6 +4,7 @@ export type TCurateOneArgs = {
|
|
|
4
4
|
outputTarget: TOutputTarget;
|
|
5
5
|
mappingOptions: TMappingOptions;
|
|
6
6
|
hashMethod?: THashMethod;
|
|
7
|
+
hashPartSize?: number;
|
|
7
8
|
previousSourceFileInfo?: {
|
|
8
9
|
size?: number;
|
|
9
10
|
mtime?: string;
|
|
@@ -13,6 +14,6 @@ export type TCurateOneArgs = {
|
|
|
13
14
|
postMappedHash?: string;
|
|
14
15
|
} | undefined;
|
|
15
16
|
};
|
|
16
|
-
export declare function curateOne({ fileInfo, outputTarget, mappingOptions, hashMethod, previousSourceFileInfo, previousMappedFileInfo, }: TCurateOneArgs): Promise<Omit<Partial<TMapResults>, 'anomalies'> & {
|
|
17
|
+
export declare function curateOne({ fileInfo, outputTarget, mappingOptions, hashMethod, hashPartSize, previousSourceFileInfo, previousMappedFileInfo, }: TCurateOneArgs): Promise<Omit<Partial<TMapResults>, 'anomalies'> & {
|
|
17
18
|
anomalies: TMapResults['anomalies'];
|
|
18
19
|
}>;
|
package/dist/types/hash.d.ts
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
import { THashMethod } from './types';
|
|
2
|
-
export declare function hash(buffer: ArrayBuffer, hashMethod: THashMethod): Promise<string>;
|
|
2
|
+
export declare function hash(buffer: ArrayBuffer, hashMethod: THashMethod, hashPartSize?: number): Promise<string>;
|
|
@@ -9,6 +9,7 @@ import type { TMappingOptions, TFileInfo, TProgressMessage, TOutputTarget, TFile
|
|
|
9
9
|
export type TMappingWorkerOptions = TMappingOptions & {
|
|
10
10
|
outputTarget?: TOutputTarget;
|
|
11
11
|
hashMethod?: THashMethod;
|
|
12
|
+
hashPartSize?: number;
|
|
12
13
|
};
|
|
13
14
|
export type ProgressCallback = (message: TProgressMessage) => void;
|
|
14
15
|
export declare const availableMappingWorkers: Worker[];
|
|
@@ -28,10 +29,20 @@ export declare let scanAnomalies: {
|
|
|
28
29
|
anomalies: string[];
|
|
29
30
|
}[];
|
|
30
31
|
export declare function setMappingWorkerOptions(opts: TMappingWorkerOptions): void;
|
|
32
|
+
/**
|
|
33
|
+
* Register a callback that resumes the scan worker. Called by curateMany
|
|
34
|
+
* after the scan worker is created.
|
|
35
|
+
*/
|
|
36
|
+
export declare function setScanResumeCallback(cb: (() => void) | null): void;
|
|
37
|
+
/**
|
|
38
|
+
* Mark the scan as paused. Called from the scan worker message handler in
|
|
39
|
+
* index.ts when the queue exceeds the high-water mark.
|
|
40
|
+
*/
|
|
41
|
+
export declare function markScanPaused(): void;
|
|
31
42
|
/**
|
|
32
43
|
* Initialize the mapping worker pool. Call once per curateMany invocation.
|
|
33
44
|
*/
|
|
34
|
-
export declare function initializeMappingWorkers(skipCollectingMappings?: boolean, fileInfoIndex?: TFileInfoIndex, progressCb?: ProgressCallback): Promise<void>;
|
|
45
|
+
export declare function initializeMappingWorkers(skipCollectingMappings?: boolean, fileInfoIndex?: TFileInfoIndex, progressCb?: ProgressCallback, workerCount?: number): Promise<void>;
|
|
35
46
|
/**
|
|
36
47
|
* Dispatch queued files to available mapping workers.
|
|
37
48
|
* Also checks the termination condition (all files processed, no pending
|
package/dist/types/types.d.ts
CHANGED
|
@@ -29,8 +29,10 @@ export type OrganizeOptions = {
|
|
|
29
29
|
dateOffset?: Iso8601Duration;
|
|
30
30
|
skipCollectingMappings?: boolean;
|
|
31
31
|
hashMethod?: THashMethod;
|
|
32
|
+
hashPartSize?: number;
|
|
32
33
|
fileInfoIndex?: TFileInfoIndex;
|
|
33
34
|
excludedPathGlobs?: string[];
|
|
35
|
+
workerCount?: number;
|
|
34
36
|
} & ({
|
|
35
37
|
inputType: 'directory';
|
|
36
38
|
inputDirectory: FileSystemDirectoryHandle;
|
|
@@ -48,7 +50,7 @@ export type OrganizeOptions = {
|
|
|
48
50
|
inputType: 's3';
|
|
49
51
|
inputS3Bucket: TS3BucketOptions;
|
|
50
52
|
});
|
|
51
|
-
export type THashMethod = 'crc64' | 'crc32' | 'sha256' | 'md5';
|
|
53
|
+
export type THashMethod = 'crc64' | 'crc32' | 'sha256' | 'md5' | 'aws-s3-etag-2025';
|
|
52
54
|
export type THTTPHeaderProvider = () => Promise<Record<string, string>> | Record<string, string>;
|
|
53
55
|
export type THTTPOptions = {
|
|
54
56
|
url: string;
|