dicom-curate 0.26.2 → 0.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/esm/index.js CHANGED
@@ -79567,7 +79567,7 @@ function isPrivateTag(tagId) {
79567
79567
  return false;
79568
79568
  }
79569
79569
  function convertKeywordToTagId(keyword) {
79570
- const tagId = isPrivateTag(keyword) ? keyword : dcmjs.data.DicomMetaDictionary.nameMap[keyword]?.tag || keyword;
79570
+ const tagId = isPrivateTag(keyword) ? keyword : dcmjs.data.DicomMetaDictionary.nameMap[keyword]?.tag ?? keyword;
79571
79571
  return tagId.replace(/[(),]/g, "").toLowerCase();
79572
79572
  }
79573
79573
  function convertKeywordPathToTagIdPath(keywordPath) {
@@ -80427,7 +80427,7 @@ function getCid7050Codes(options) {
80427
80427
  var import_lodash = __toESM(require_lodash(), 1);
80428
80428
  var nameMap = dcmjs2.data.DicomMetaDictionary.nameMap;
80429
80429
  function getVr(keyword) {
80430
- const element = nameMap[keyword] || nameMap[`RETIRED_${keyword}`];
80430
+ const element = nameMap[keyword] ?? nameMap[`RETIRED_${keyword}`];
80431
80431
  return element?.vr;
80432
80432
  }
80433
80433
  function temporalVr(vr) {
@@ -80499,7 +80499,7 @@ function deidentifyPS315E({
80499
80499
  }
80500
80500
  }
80501
80501
  }
80502
- return current2[tagName] || null;
80502
+ return current2[tagName] ?? null;
80503
80503
  }
80504
80504
  const {
80505
80505
  cleanDescriptorsOption,
@@ -81248,17 +81248,20 @@ async function fetchWithRetry(...args) {
81248
81248
  // src/hash.ts
81249
81249
  var import_md5 = __toESM(require_md5(), 1);
81250
81250
  var import_js_crc = __toESM(require_crc(), 1);
81251
- async function hash(buffer, hashMethod) {
81251
+ var DEFAULT_HASH_PART_SIZE = 5 * 1024 * 1024;
81252
+ async function hash(buffer, hashMethod, hashPartSize) {
81252
81253
  switch (hashMethod) {
81253
81254
  case "sha256":
81254
81255
  return await sha256Hex(buffer);
81255
81256
  case "crc32":
81256
81257
  return crc32Hex(buffer);
81257
- case "md5":
81258
- return md5Hex(buffer);
81259
81258
  case "crc64":
81260
- default:
81261
81259
  return crc64Hex(buffer);
81260
+ case "aws-s3-etag-2025":
81261
+ return awsS3Etag(buffer, hashPartSize ?? DEFAULT_HASH_PART_SIZE);
81262
+ case "md5":
81263
+ default:
81264
+ return md5Hex(buffer);
81262
81265
  }
81263
81266
  }
81264
81267
  async function sha256Hex(buffer) {
@@ -81269,6 +81272,27 @@ async function sha256Hex(buffer) {
81269
81272
  function md5Hex(buffer) {
81270
81273
  return (0, import_md5.default)(new Uint8Array(buffer));
81271
81274
  }
81275
+ function awsS3Etag(buffer, partSize) {
81276
+ if (buffer.byteLength <= partSize) {
81277
+ return md5Hex(buffer);
81278
+ }
81279
+ return multipartMd5(buffer, partSize);
81280
+ }
81281
+ function multipartMd5(buffer, partSize) {
81282
+ const totalSize = buffer.byteLength;
81283
+ const partCount = Math.ceil(totalSize / partSize);
81284
+ const rawDigests = new Uint8Array(partCount * 16);
81285
+ for (let i4 = 0; i4 < partCount; i4++) {
81286
+ const start = i4 * partSize;
81287
+ const end = Math.min(start + partSize, totalSize);
81288
+ const partBuffer = buffer.slice(start, end);
81289
+ const hex = (0, import_md5.default)(new Uint8Array(partBuffer));
81290
+ for (let j4 = 0; j4 < 16; j4++) {
81291
+ rawDigests[i4 * 16 + j4] = parseInt(hex.slice(j4 * 2, j4 * 2 + 2), 16);
81292
+ }
81293
+ }
81294
+ return `${(0, import_md5.default)(rawDigests)}-${partCount}`;
81295
+ }
81272
81296
  function crc32Hex(input) {
81273
81297
  let bytes;
81274
81298
  if (input instanceof Uint8Array) {
@@ -81332,7 +81356,7 @@ async function loadS3Client() {
81332
81356
  const { createRequire } = await import("module");
81333
81357
  const req = createRequire(import.meta.url);
81334
81358
  const mod = req("@aws-sdk/client-s3");
81335
- cachedS3Client = mod?.default || mod;
81359
+ cachedS3Client = mod?.default ?? mod;
81336
81360
  } else {
81337
81361
  cachedS3Client = await Promise.resolve().then(() => __toESM(require_dist_cjs71(), 1));
81338
81362
  }
@@ -81345,6 +81369,7 @@ async function curateOne({
81345
81369
  outputTarget,
81346
81370
  mappingOptions,
81347
81371
  hashMethod,
81372
+ hashPartSize,
81348
81373
  previousSourceFileInfo,
81349
81374
  previousMappedFileInfo
81350
81375
  }) {
@@ -81371,7 +81396,7 @@ async function curateOne({
81371
81396
  );
81372
81397
  }
81373
81398
  file = await resp.blob();
81374
- const lastModifiedHeader = resp.headers.get("last-modified") || void 0;
81399
+ const lastModifiedHeader = resp.headers.get("last-modified");
81375
81400
  if (lastModifiedHeader) {
81376
81401
  mtime = new Date(lastModifiedHeader).toISOString();
81377
81402
  }
@@ -81437,14 +81462,18 @@ async function curateOne({
81437
81462
  } catch (e4) {
81438
81463
  }
81439
81464
  }
81440
- const fileArrayBuffer = await file.arrayBuffer();
81465
+ let fileArrayBuffer = await file.arrayBuffer();
81441
81466
  let preMappedHash;
81442
81467
  let postMappedHash;
81443
81468
  const postMappedHashHeader = "x-source-file-hash";
81444
81469
  let canSkip = false;
81445
81470
  if (previousSourceFileInfo?.preMappedHash !== void 0) {
81446
81471
  try {
81447
- preMappedHash = await hash(fileArrayBuffer, hashMethod || "crc64");
81472
+ preMappedHash = await hash(
81473
+ fileArrayBuffer,
81474
+ hashMethod ?? "md5",
81475
+ hashPartSize
81476
+ );
81448
81477
  } catch (e4) {
81449
81478
  console.warn(`Failed to compute preMappedHash for ${fileInfo.name}`, e4);
81450
81479
  }
@@ -81549,7 +81578,11 @@ async function curateOne({
81549
81578
  }
81550
81579
  if (!preMappedHash) {
81551
81580
  try {
81552
- preMappedHash = await hash(fileArrayBuffer, hashMethod || "crc64");
81581
+ preMappedHash = await hash(
81582
+ fileArrayBuffer,
81583
+ hashMethod ?? "md5",
81584
+ hashPartSize
81585
+ );
81553
81586
  } catch (e4) {
81554
81587
  console.warn(`Failed to compute preMappedHash for ${fileInfo.name}`, e4);
81555
81588
  }
@@ -81560,7 +81593,12 @@ async function curateOne({
81560
81593
  const modifiedArrayBuffer = mappedDicomData.write({
81561
81594
  allowInvalidVRLength: true
81562
81595
  });
81563
- postMappedHash = await hash(modifiedArrayBuffer, hashMethod || "crc64");
81596
+ postMappedHash = await hash(
81597
+ modifiedArrayBuffer,
81598
+ hashMethod ?? "md5",
81599
+ hashPartSize
81600
+ );
81601
+ fileArrayBuffer = null;
81564
81602
  const previousPostMappedHash = previousMappedFileInfo ? previousMappedFileInfo(clonedMapResults.outputFilePath)?.postMappedHash : void 0;
81565
81603
  if (previousPostMappedHash !== void 0 && previousPostMappedHash === postMappedHash) {
81566
81604
  return noMapResult(clonedMapResults.outputFilePath);
@@ -81592,26 +81630,23 @@ async function curateOne({
81592
81630
  }
81593
81631
  const fullFilePath = path.join(fullDirPath, fileName);
81594
81632
  await fs.writeFile(fullFilePath, new DataView(modifiedArrayBuffer));
81595
- } else {
81633
+ } else if (!outputTarget?.http && !outputTarget?.s3) {
81596
81634
  clonedMapResults.mappedBlob = new Blob([modifiedArrayBuffer], {
81597
81635
  type: "application/octet-stream"
81598
81636
  });
81599
81637
  }
81600
- clonedMapResults.mappedBlob = new Blob([modifiedArrayBuffer], {
81601
- type: "application/octet-stream"
81602
- });
81603
81638
  if (outputTarget?.http) {
81604
81639
  try {
81605
81640
  const key = clonedMapResults.outputFilePath.split("/").map(encodeURIComponent).join("/");
81606
81641
  const uploadUrl = `${outputTarget.http.url}/${key}`;
81607
81642
  const headers = {
81608
- "Content-Type": clonedMapResults.mappedBlob.type || "application/octet-stream",
81643
+ "Content-Type": "application/octet-stream",
81609
81644
  "X-File-Name": fileName,
81610
- "X-File-Type": clonedMapResults.mappedBlob.type || "application/octet-stream",
81645
+ "X-File-Type": "application/octet-stream",
81611
81646
  "X-File-Size": String(modifiedArrayBuffer.byteLength),
81612
- "X-Source-File-Size": String(clonedMapResults.fileInfo?.size || ""),
81613
- "X-Source-File-Modified-Time": mtime || "",
81614
- "X-Source-File-Hash": preMappedHash || ""
81647
+ "X-Source-File-Size": String(clonedMapResults.fileInfo?.size ?? ""),
81648
+ "X-Source-File-Modified-Time": mtime ?? "",
81649
+ "X-Source-File-Hash": preMappedHash ?? ""
81615
81650
  };
81616
81651
  if (outputTarget.http.headers) {
81617
81652
  Object.assign(headers, outputTarget.http.headers);
@@ -81621,25 +81656,27 @@ async function curateOne({
81621
81656
  const resp = await fetchWithRetry(uploadUrl, {
81622
81657
  method: "PUT",
81623
81658
  headers,
81624
- body: clonedMapResults.mappedBlob
81659
+ body: new Blob([modifiedArrayBuffer], {
81660
+ type: "application/octet-stream"
81661
+ })
81625
81662
  });
81626
81663
  if (!resp.ok) {
81627
81664
  console.error(
81628
81665
  `Upload failed for ${uploadUrl}: ${resp.status} ${resp.statusText}`
81629
81666
  );
81630
- clonedMapResults.errors = clonedMapResults.errors || [];
81667
+ clonedMapResults.errors = clonedMapResults.errors ?? [];
81631
81668
  clonedMapResults.errors.push(
81632
81669
  `Upload failed: ${resp.status} ${resp.statusText}`
81633
81670
  );
81634
81671
  } else {
81635
- clonedMapResults.outputUpload = clonedMapResults.outputUpload || {
81672
+ clonedMapResults.outputUpload = clonedMapResults.outputUpload ?? {
81636
81673
  url: uploadUrl,
81637
81674
  status: resp.status
81638
81675
  };
81639
81676
  }
81640
81677
  } catch (e4) {
81641
81678
  console.error("Upload error", e4);
81642
- clonedMapResults.errors = clonedMapResults.errors || [];
81679
+ clonedMapResults.errors = clonedMapResults.errors ?? [];
81643
81680
  clonedMapResults.errors.push(
81644
81681
  `Upload error: ${e4 instanceof Error ? e4.message : String(e4)}`
81645
81682
  );
@@ -81659,12 +81696,14 @@ async function curateOne({
81659
81696
  new s32.PutObjectCommand({
81660
81697
  Bucket: outputTarget.s3.bucketName,
81661
81698
  Key: key,
81662
- Body: await clonedMapResults.mappedBlob.arrayBuffer(),
81663
- ContentType: clonedMapResults.mappedBlob.type || "application/octet-stream",
81699
+ // Use the ArrayBuffer directly — going through Blob.arrayBuffer()
81700
+ // would create yet another copy of the data in memory.
81701
+ Body: new Uint8Array(modifiedArrayBuffer),
81702
+ ContentType: "application/octet-stream",
81664
81703
  Metadata: {
81665
- "source-file-size": String(clonedMapResults.fileInfo?.size || ""),
81666
- "source-file-modified-time": mtime || "",
81667
- "source-file-hash": preMappedHash || "",
81704
+ "source-file-size": String(clonedMapResults.fileInfo?.size ?? ""),
81705
+ "source-file-modified-time": mtime ?? "",
81706
+ "source-file-hash": preMappedHash ?? "",
81668
81707
  ...postMappedHash ? { "source-file-post-mapped-hash": postMappedHash } : {}
81669
81708
  }
81670
81709
  })
@@ -81676,7 +81715,7 @@ async function curateOne({
81676
81715
  };
81677
81716
  } catch (e4) {
81678
81717
  console.error("S3 Upload error", e4);
81679
- clonedMapResults.errors = clonedMapResults.errors || [];
81718
+ clonedMapResults.errors = clonedMapResults.errors ?? [];
81680
81719
  clonedMapResults.errors.push(
81681
81720
  `S3 Upload error: ${e4 instanceof Error ? e4.message : String(e4)}`
81682
81721
  );
@@ -87444,10 +87483,20 @@ function setDirectoryScanFinished(value) {
87444
87483
  var scanAnomalies = [];
87445
87484
  var progressCallback = () => {
87446
87485
  };
87486
+ var scanResumeCallback = null;
87487
+ var scanPaused = false;
87488
+ var LOW_WATER_MARK = 50;
87447
87489
  function setMappingWorkerOptions(opts) {
87448
87490
  mappingWorkerOptions = opts;
87449
87491
  }
87450
- async function initializeMappingWorkers(skipCollectingMappings, fileInfoIndex, progressCb) {
87492
+ function setScanResumeCallback(cb) {
87493
+ scanResumeCallback = cb;
87494
+ scanPaused = false;
87495
+ }
87496
+ function markScanPaused() {
87497
+ scanPaused = true;
87498
+ }
87499
+ async function initializeMappingWorkers(skipCollectingMappings, fileInfoIndex, progressCb, workerCount) {
87451
87500
  mappingWorkerOptions = {};
87452
87501
  workersActive = 0;
87453
87502
  mapResultsList = skipCollectingMappings ? void 0 : [];
@@ -87461,9 +87510,12 @@ async function initializeMappingWorkers(skipCollectingMappings, fileInfoIndex, p
87461
87510
  scanAnomalies = [];
87462
87511
  if (progressCb)
87463
87512
  progressCallback = progressCb;
87464
- const workerCount = navigator.hardwareConcurrency;
87513
+ const effectiveWorkerCount = workerCount ?? Math.min(await getHardwareConcurrency(), 8);
87465
87514
  const workers = await Promise.all(
87466
- Array.from({ length: workerCount }, () => createMappingWorker(fileInfoIndex))
87515
+ Array.from(
87516
+ { length: effectiveWorkerCount },
87517
+ () => createMappingWorker(fileInfoIndex)
87518
+ )
87467
87519
  );
87468
87520
  availableMappingWorkers.push(...workers);
87469
87521
  }
@@ -87472,7 +87524,7 @@ async function dispatchMappingJobs() {
87472
87524
  const { fileInfo, previousFileInfo } = filesToProcess.pop();
87473
87525
  const mappingWorker = availableMappingWorkers.pop();
87474
87526
  workerCurrentFile.set(mappingWorker, fileInfo);
87475
- const { outputTarget, hashMethod, ...mappingOptions } = (
87527
+ const { outputTarget, hashMethod, hashPartSize, ...mappingOptions } = (
87476
87528
  // Not partial anymore.
87477
87529
  mappingWorkerOptions
87478
87530
  );
@@ -87482,10 +87534,15 @@ async function dispatchMappingJobs() {
87482
87534
  outputTarget: await getHttpOutputHeaders(outputTarget),
87483
87535
  previousFileInfo,
87484
87536
  hashMethod,
87537
+ hashPartSize,
87485
87538
  serializedMappingOptions: serializeMappingOptions(mappingOptions)
87486
87539
  });
87487
87540
  workersActive += 1;
87488
87541
  }
87542
+ if (scanPaused && filesToProcess.length < LOW_WATER_MARK && scanResumeCallback) {
87543
+ scanPaused = false;
87544
+ scanResumeCallback();
87545
+ }
87489
87546
  if (workersActive === 0 && pendingReplacements === 0 && directoryScanFinished && filesToProcess.length === 0) {
87490
87547
  while (availableMappingWorkers.length) {
87491
87548
  availableMappingWorkers.pop().terminate();
@@ -87517,6 +87574,13 @@ async function dispatchMappingJobs() {
87517
87574
  });
87518
87575
  }
87519
87576
  }
87577
+ async function getHardwareConcurrency() {
87578
+ if (typeof navigator !== "undefined" && navigator.hardwareConcurrency) {
87579
+ return navigator.hardwareConcurrency;
87580
+ }
87581
+ const { cpus } = await import("os");
87582
+ return cpus().length;
87583
+ }
87520
87584
  function recoverCrashedWorker(mappingWorker, errorMessage) {
87521
87585
  if (!workerCurrentFile.has(mappingWorker)) {
87522
87586
  return;
@@ -87686,6 +87750,11 @@ async function initializeFileListWorker(rejectCallback) {
87686
87750
  // Files sent to processing have no scan anomalies
87687
87751
  previousFileInfo
87688
87752
  });
87753
+ const HIGH_WATER_MARK = 100;
87754
+ if (filesToProcess.length > HIGH_WATER_MARK) {
87755
+ fileListWorker.postMessage({ request: "stop" });
87756
+ markScanPaused();
87757
+ }
87689
87758
  dispatchMappingJobs();
87690
87759
  break;
87691
87760
  }
@@ -87746,6 +87815,7 @@ async function collectMappingOptions(organizeOptions) {
87746
87815
  const skipModifications = organizeOptions.skipModifications ?? false;
87747
87816
  const skipValidation = organizeOptions.skipValidation ?? false;
87748
87817
  const hashMethod = organizeOptions.hashMethod;
87818
+ const hashPartSize = organizeOptions.hashPartSize;
87749
87819
  const dateOffset = organizeOptions.dateOffset;
87750
87820
  if (requiresDateOffset(deIdOpts) && !dateOffset?.match(iso8601)) {
87751
87821
  throw new Error(
@@ -87760,7 +87830,8 @@ async function collectMappingOptions(organizeOptions) {
87760
87830
  skipModifications,
87761
87831
  skipValidation,
87762
87832
  dateOffset,
87763
- hashMethod
87833
+ hashMethod,
87834
+ hashPartSize
87764
87835
  };
87765
87836
  }
87766
87837
  function queueFilesForMapping(organizeOptions) {
@@ -87830,13 +87901,17 @@ async function curateMany(organizeOptions, onProgress) {
87830
87901
  await initializeMappingWorkers(
87831
87902
  organizeOptions.skipCollectingMappings,
87832
87903
  organizeOptions.fileInfoIndex,
87833
- progressCallback2
87904
+ progressCallback2,
87905
+ organizeOptions.workerCount
87834
87906
  );
87835
87907
  setMappingWorkerOptions(
87836
87908
  await collectMappingOptions(organizeOptions)
87837
87909
  );
87838
87910
  if (organizeOptions.inputType === "directory" || organizeOptions.inputType === "path" || organizeOptions.inputType === "s3") {
87839
87911
  const fileListWorker = await initializeFileListWorker(rejectCallback);
87912
+ setScanResumeCallback(() => {
87913
+ fileListWorker.postMessage({ request: "resume" });
87914
+ });
87840
87915
  let specExcludedFiletypes;
87841
87916
  let noDicomSignatureCheck = false;
87842
87917
  let noDefaultExclusions = false;
@@ -12061,10 +12061,20 @@ function setDirectoryScanFinished(value) {
12061
12061
  var scanAnomalies = [];
12062
12062
  var progressCallback = () => {
12063
12063
  };
12064
+ var scanResumeCallback = null;
12065
+ var scanPaused = false;
12066
+ var LOW_WATER_MARK = 50;
12064
12067
  function setMappingWorkerOptions(opts) {
12065
12068
  mappingWorkerOptions = opts;
12066
12069
  }
12067
- async function initializeMappingWorkers(skipCollectingMappings, fileInfoIndex, progressCb) {
12070
+ function setScanResumeCallback(cb) {
12071
+ scanResumeCallback = cb;
12072
+ scanPaused = false;
12073
+ }
12074
+ function markScanPaused() {
12075
+ scanPaused = true;
12076
+ }
12077
+ async function initializeMappingWorkers(skipCollectingMappings, fileInfoIndex, progressCb, workerCount) {
12068
12078
  mappingWorkerOptions = {};
12069
12079
  workersActive = 0;
12070
12080
  mapResultsList = skipCollectingMappings ? void 0 : [];
@@ -12078,9 +12088,12 @@ async function initializeMappingWorkers(skipCollectingMappings, fileInfoIndex, p
12078
12088
  scanAnomalies = [];
12079
12089
  if (progressCb)
12080
12090
  progressCallback = progressCb;
12081
- const workerCount = navigator.hardwareConcurrency;
12091
+ const effectiveWorkerCount = workerCount ?? Math.min(await getHardwareConcurrency(), 8);
12082
12092
  const workers = await Promise.all(
12083
- Array.from({ length: workerCount }, () => createMappingWorker(fileInfoIndex))
12093
+ Array.from(
12094
+ { length: effectiveWorkerCount },
12095
+ () => createMappingWorker(fileInfoIndex)
12096
+ )
12084
12097
  );
12085
12098
  availableMappingWorkers.push(...workers);
12086
12099
  }
@@ -12089,7 +12102,7 @@ async function dispatchMappingJobs() {
12089
12102
  const { fileInfo, previousFileInfo } = filesToProcess.pop();
12090
12103
  const mappingWorker = availableMappingWorkers.pop();
12091
12104
  workerCurrentFile.set(mappingWorker, fileInfo);
12092
- const { outputTarget, hashMethod, ...mappingOptions } = (
12105
+ const { outputTarget, hashMethod, hashPartSize, ...mappingOptions } = (
12093
12106
  // Not partial anymore.
12094
12107
  mappingWorkerOptions
12095
12108
  );
@@ -12099,10 +12112,15 @@ async function dispatchMappingJobs() {
12099
12112
  outputTarget: await getHttpOutputHeaders(outputTarget),
12100
12113
  previousFileInfo,
12101
12114
  hashMethod,
12115
+ hashPartSize,
12102
12116
  serializedMappingOptions: serializeMappingOptions(mappingOptions)
12103
12117
  });
12104
12118
  workersActive += 1;
12105
12119
  }
12120
+ if (scanPaused && filesToProcess.length < LOW_WATER_MARK && scanResumeCallback) {
12121
+ scanPaused = false;
12122
+ scanResumeCallback();
12123
+ }
12106
12124
  if (workersActive === 0 && pendingReplacements === 0 && directoryScanFinished && filesToProcess.length === 0) {
12107
12125
  while (availableMappingWorkers.length) {
12108
12126
  availableMappingWorkers.pop().terminate();
@@ -12134,6 +12152,13 @@ async function dispatchMappingJobs() {
12134
12152
  });
12135
12153
  }
12136
12154
  }
12155
+ async function getHardwareConcurrency() {
12156
+ if (typeof navigator !== "undefined" && navigator.hardwareConcurrency) {
12157
+ return navigator.hardwareConcurrency;
12158
+ }
12159
+ const { cpus } = await import("os");
12160
+ return cpus().length;
12161
+ }
12137
12162
  function recoverCrashedWorker(mappingWorker, errorMessage) {
12138
12163
  if (!workerCurrentFile.has(mappingWorker)) {
12139
12164
  return;
@@ -12276,7 +12301,9 @@ export {
12276
12301
  getWorkerCurrentFile,
12277
12302
  getWorkersActive,
12278
12303
  initializeMappingWorkers,
12304
+ markScanPaused,
12279
12305
  scanAnomalies,
12280
12306
  setDirectoryScanFinished,
12281
- setMappingWorkerOptions
12307
+ setMappingWorkerOptions,
12308
+ setScanResumeCallback
12282
12309
  };
@@ -37097,7 +37097,7 @@ async function loadS3Client() {
37097
37097
  const { createRequire } = await import("module");
37098
37098
  const req = createRequire(import.meta.url);
37099
37099
  const mod = req("@aws-sdk/client-s3");
37100
- cachedS3Client = mod?.default || mod;
37100
+ cachedS3Client = mod?.default ?? mod;
37101
37101
  } else {
37102
37102
  cachedS3Client = await Promise.resolve().then(() => __toESM(require_dist_cjs71(), 1));
37103
37103
  }
@@ -37097,7 +37097,7 @@ async function loadS3Client() {
37097
37097
  const { createRequire } = await import("module");
37098
37098
  const req = createRequire(import.meta.url);
37099
37099
  const mod = req("@aws-sdk/client-s3");
37100
- cachedS3Client = mod?.default || mod;
37100
+ cachedS3Client = mod?.default ?? mod;
37101
37101
  } else {
37102
37102
  cachedS3Client = await Promise.resolve().then(() => __toESM(require_dist_cjs71(), 1));
37103
37103
  }
@@ -37137,6 +37137,28 @@ var DEFAULT_EXCLUDED_FILETYPES = [
37137
37137
  ".ds_store"
37138
37138
  ];
37139
37139
  var keepScanning = true;
37140
+ var pauseResolve = null;
37141
+ var pausePromise = null;
37142
+ function pauseScanning() {
37143
+ if (!pausePromise) {
37144
+ pausePromise = new Promise((resolve) => {
37145
+ pauseResolve = resolve;
37146
+ });
37147
+ }
37148
+ }
37149
+ function resumeScanning() {
37150
+ if (pauseResolve) {
37151
+ pauseResolve();
37152
+ pauseResolve = null;
37153
+ pausePromise = null;
37154
+ }
37155
+ }
37156
+ async function waitIfPaused() {
37157
+ if (pausePromise) {
37158
+ await pausePromise;
37159
+ }
37160
+ return keepScanning;
37161
+ }
37140
37162
  var excludedFiletypes = [];
37141
37163
  var excludedPathRegexes = [];
37142
37164
  var noDicomSignatureCheck = false;
@@ -37294,7 +37316,11 @@ fixupNodeWorkerEnvironment().then(() => {
37294
37316
  break;
37295
37317
  }
37296
37318
  case "stop": {
37297
- keepScanning = false;
37319
+ pauseScanning();
37320
+ break;
37321
+ }
37322
+ case "resume": {
37323
+ resumeScanning();
37298
37324
  break;
37299
37325
  }
37300
37326
  default:
@@ -37371,10 +37397,16 @@ async function scanDirectory(dir) {
37371
37397
  for await (const entry of dir2.values()) {
37372
37398
  if (!keepScanning)
37373
37399
  return;
37400
+ if (!await waitIfPaused())
37401
+ return;
37374
37402
  if (entry.kind === "file") {
37375
37403
  const file = await entry.getFile();
37376
37404
  const fileAnomalies = [];
37377
- if (await shouldProcessFile(file, fileAnomalies, `${prefix}/${entry.name}`)) {
37405
+ if (await shouldProcessFile(
37406
+ file,
37407
+ fileAnomalies,
37408
+ `${prefix}/${entry.name}`
37409
+ )) {
37378
37410
  const key = `${prefix}/${entry.name}`;
37379
37411
  const prev = previousIndex ? previousIndex[key] : void 0;
37380
37412
  globalThis.postMessage({
@@ -37434,6 +37466,8 @@ async function scanDirectoryNode(dirPath) {
37434
37466
  for (const entry of entries) {
37435
37467
  if (!keepScanning)
37436
37468
  return;
37469
+ if (!await waitIfPaused())
37470
+ return;
37437
37471
  if (entry.isFile()) {
37438
37472
  const filePath = path.join(currentPath, entry.name);
37439
37473
  const stats = await fs.stat(filePath);
@@ -9,6 +9,7 @@ export type MappingRequest = {
9
9
  preMappedHash?: string;
10
10
  };
11
11
  hashMethod?: THashMethod;
12
+ hashPartSize?: number;
12
13
  serializedMappingOptions: TSerializedMappingOptions;
13
14
  } | {
14
15
  request: 'fileInfoIndex';
@@ -4,6 +4,7 @@ export type TCurateOneArgs = {
4
4
  outputTarget: TOutputTarget;
5
5
  mappingOptions: TMappingOptions;
6
6
  hashMethod?: THashMethod;
7
+ hashPartSize?: number;
7
8
  previousSourceFileInfo?: {
8
9
  size?: number;
9
10
  mtime?: string;
@@ -13,6 +14,6 @@ export type TCurateOneArgs = {
13
14
  postMappedHash?: string;
14
15
  } | undefined;
15
16
  };
16
- export declare function curateOne({ fileInfo, outputTarget, mappingOptions, hashMethod, previousSourceFileInfo, previousMappedFileInfo, }: TCurateOneArgs): Promise<Omit<Partial<TMapResults>, 'anomalies'> & {
17
+ export declare function curateOne({ fileInfo, outputTarget, mappingOptions, hashMethod, hashPartSize, previousSourceFileInfo, previousMappedFileInfo, }: TCurateOneArgs): Promise<Omit<Partial<TMapResults>, 'anomalies'> & {
17
18
  anomalies: TMapResults['anomalies'];
18
19
  }>;
@@ -1,2 +1,2 @@
1
1
  import { THashMethod } from './types';
2
- export declare function hash(buffer: ArrayBuffer, hashMethod: THashMethod): Promise<string>;
2
+ export declare function hash(buffer: ArrayBuffer, hashMethod: THashMethod, hashPartSize?: number): Promise<string>;
@@ -9,6 +9,7 @@ import type { TMappingOptions, TFileInfo, TProgressMessage, TOutputTarget, TFile
9
9
  export type TMappingWorkerOptions = TMappingOptions & {
10
10
  outputTarget?: TOutputTarget;
11
11
  hashMethod?: THashMethod;
12
+ hashPartSize?: number;
12
13
  };
13
14
  export type ProgressCallback = (message: TProgressMessage) => void;
14
15
  export declare const availableMappingWorkers: Worker[];
@@ -28,10 +29,20 @@ export declare let scanAnomalies: {
28
29
  anomalies: string[];
29
30
  }[];
30
31
  export declare function setMappingWorkerOptions(opts: TMappingWorkerOptions): void;
32
+ /**
33
+ * Register a callback that resumes the scan worker. Called by curateMany
34
+ * after the scan worker is created.
35
+ */
36
+ export declare function setScanResumeCallback(cb: (() => void) | null): void;
37
+ /**
38
+ * Mark the scan as paused. Called from the scan worker message handler in
39
+ * index.ts when the queue exceeds the high-water mark.
40
+ */
41
+ export declare function markScanPaused(): void;
31
42
  /**
32
43
  * Initialize the mapping worker pool. Call once per curateMany invocation.
33
44
  */
34
- export declare function initializeMappingWorkers(skipCollectingMappings?: boolean, fileInfoIndex?: TFileInfoIndex, progressCb?: ProgressCallback): Promise<void>;
45
+ export declare function initializeMappingWorkers(skipCollectingMappings?: boolean, fileInfoIndex?: TFileInfoIndex, progressCb?: ProgressCallback, workerCount?: number): Promise<void>;
35
46
  /**
36
47
  * Dispatch queued files to available mapping workers.
37
48
  * Also checks the termination condition (all files processed, no pending
@@ -41,4 +41,6 @@ export type FileScanRequest = ({
41
41
  bucketOptions: TS3BucketOptions;
42
42
  } & CommonFileScanRequestFields) | {
43
43
  request: 'stop';
44
+ } | {
45
+ request: 'resume';
44
46
  };
@@ -29,8 +29,10 @@ export type OrganizeOptions = {
29
29
  dateOffset?: Iso8601Duration;
30
30
  skipCollectingMappings?: boolean;
31
31
  hashMethod?: THashMethod;
32
+ hashPartSize?: number;
32
33
  fileInfoIndex?: TFileInfoIndex;
33
34
  excludedPathGlobs?: string[];
35
+ workerCount?: number;
34
36
  } & ({
35
37
  inputType: 'directory';
36
38
  inputDirectory: FileSystemDirectoryHandle;
@@ -48,7 +50,7 @@ export type OrganizeOptions = {
48
50
  inputType: 's3';
49
51
  inputS3Bucket: TS3BucketOptions;
50
52
  });
51
- export type THashMethod = 'crc64' | 'crc32' | 'sha256' | 'md5';
53
+ export type THashMethod = 'crc64' | 'crc32' | 'sha256' | 'md5' | 'aws-s3-etag-2025';
52
54
  export type THTTPHeaderProvider = () => Promise<Record<string, string>> | Record<string, string>;
53
55
  export type THTTPOptions = {
54
56
  url: string;