dicom-curate 0.27.0 → 0.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -79688,17 +79688,20 @@ async function fetchWithRetry(...args) {
79688
79688
  // src/hash.ts
79689
79689
  var import_md5 = __toESM(require_md5(), 1);
79690
79690
  var import_js_crc = __toESM(require_crc(), 1);
79691
- async function hash(buffer, hashMethod) {
79691
+ var DEFAULT_HASH_PART_SIZE = 5 * 1024 * 1024;
79692
+ async function hash(buffer, hashMethod, hashPartSize) {
79692
79693
  switch (hashMethod) {
79693
79694
  case "sha256":
79694
79695
  return await sha256Hex(buffer);
79695
79696
  case "crc32":
79696
79697
  return crc32Hex(buffer);
79698
+ case "crc64":
79699
+ return crc64Hex(buffer);
79700
+ case "aws-s3-etag-2025":
79701
+ return awsS3Etag(buffer, hashPartSize ?? DEFAULT_HASH_PART_SIZE);
79697
79702
  case "md5":
79698
79703
  default:
79699
79704
  return md5Hex(buffer);
79700
- case "crc64":
79701
- return crc64Hex(buffer);
79702
79705
  }
79703
79706
  }
79704
79707
  async function sha256Hex(buffer) {
@@ -79709,6 +79712,27 @@ async function sha256Hex(buffer) {
79709
79712
  function md5Hex(buffer) {
79710
79713
  return (0, import_md5.default)(new Uint8Array(buffer));
79711
79714
  }
79715
+ function awsS3Etag(buffer, partSize) {
79716
+ if (buffer.byteLength <= partSize) {
79717
+ return md5Hex(buffer);
79718
+ }
79719
+ return multipartMd5(buffer, partSize);
79720
+ }
79721
+ function multipartMd5(buffer, partSize) {
79722
+ const totalSize = buffer.byteLength;
79723
+ const partCount = Math.ceil(totalSize / partSize);
79724
+ const rawDigests = new Uint8Array(partCount * 16);
79725
+ for (let i4 = 0; i4 < partCount; i4++) {
79726
+ const start = i4 * partSize;
79727
+ const end = Math.min(start + partSize, totalSize);
79728
+ const partBuffer = buffer.slice(start, end);
79729
+ const hex = (0, import_md5.default)(new Uint8Array(partBuffer));
79730
+ for (let j4 = 0; j4 < 16; j4++) {
79731
+ rawDigests[i4 * 16 + j4] = parseInt(hex.slice(j4 * 2, j4 * 2 + 2), 16);
79732
+ }
79733
+ }
79734
+ return `${(0, import_md5.default)(rawDigests)}-${partCount}`;
79735
+ }
79712
79736
  function crc32Hex(input) {
79713
79737
  let bytes;
79714
79738
  if (input instanceof Uint8Array) {
@@ -79785,6 +79809,7 @@ async function curateOne({
79785
79809
  outputTarget,
79786
79810
  mappingOptions,
79787
79811
  hashMethod,
79812
+ hashPartSize,
79788
79813
  previousSourceFileInfo,
79789
79814
  previousMappedFileInfo
79790
79815
  }) {
@@ -79884,7 +79909,11 @@ async function curateOne({
79884
79909
  let canSkip = false;
79885
79910
  if (previousSourceFileInfo?.preMappedHash !== void 0) {
79886
79911
  try {
79887
- preMappedHash = await hash(fileArrayBuffer, hashMethod ?? "md5");
79912
+ preMappedHash = await hash(
79913
+ fileArrayBuffer,
79914
+ hashMethod ?? "md5",
79915
+ hashPartSize
79916
+ );
79888
79917
  } catch (e4) {
79889
79918
  console.warn(`Failed to compute preMappedHash for ${fileInfo.name}`, e4);
79890
79919
  }
@@ -79989,7 +80018,11 @@ async function curateOne({
79989
80018
  }
79990
80019
  if (!preMappedHash) {
79991
80020
  try {
79992
- preMappedHash = await hash(fileArrayBuffer, hashMethod ?? "md5");
80021
+ preMappedHash = await hash(
80022
+ fileArrayBuffer,
80023
+ hashMethod ?? "md5",
80024
+ hashPartSize
80025
+ );
79993
80026
  } catch (e4) {
79994
80027
  console.warn(`Failed to compute preMappedHash for ${fileInfo.name}`, e4);
79995
80028
  }
@@ -80000,7 +80033,11 @@ async function curateOne({
80000
80033
  const modifiedArrayBuffer = mappedDicomData.write({
80001
80034
  allowInvalidVRLength: true
80002
80035
  });
80003
- postMappedHash = await hash(modifiedArrayBuffer, hashMethod ?? "md5");
80036
+ postMappedHash = await hash(
80037
+ modifiedArrayBuffer,
80038
+ hashMethod ?? "md5",
80039
+ hashPartSize
80040
+ );
80004
80041
  fileArrayBuffer = null;
80005
80042
  const previousPostMappedHash = previousMappedFileInfo ? previousMappedFileInfo(clonedMapResults.outputFilePath)?.postMappedHash : void 0;
80006
80043
  if (previousPostMappedHash !== void 0 && previousPostMappedHash === postMappedHash) {
@@ -85832,6 +85869,7 @@ fixupNodeWorkerEnvironment().then(() => {
85832
85869
  fileInfo,
85833
85870
  outputTarget: event.data.outputTarget ?? {},
85834
85871
  hashMethod: event.data.hashMethod,
85872
+ hashPartSize: event.data.hashPartSize,
85835
85873
  mappingOptions,
85836
85874
  previousSourceFileInfo: event.data.previousFileInfo,
85837
85875
  previousMappedFileInfo: (targetName) => {
@@ -73397,17 +73397,20 @@ async function fetchWithRetry(...args) {
73397
73397
  // src/hash.ts
73398
73398
  var import_md5 = __toESM(require_md5(), 1);
73399
73399
  var import_js_crc = __toESM(require_crc(), 1);
73400
- async function hash(buffer, hashMethod) {
73400
+ var DEFAULT_HASH_PART_SIZE = 5 * 1024 * 1024;
73401
+ async function hash(buffer, hashMethod, hashPartSize) {
73401
73402
  switch (hashMethod) {
73402
73403
  case "sha256":
73403
73404
  return await sha256Hex(buffer);
73404
73405
  case "crc32":
73405
73406
  return crc32Hex(buffer);
73407
+ case "crc64":
73408
+ return crc64Hex(buffer);
73409
+ case "aws-s3-etag-2025":
73410
+ return awsS3Etag(buffer, hashPartSize ?? DEFAULT_HASH_PART_SIZE);
73406
73411
  case "md5":
73407
73412
  default:
73408
73413
  return md5Hex(buffer);
73409
- case "crc64":
73410
- return crc64Hex(buffer);
73411
73414
  }
73412
73415
  }
73413
73416
  async function sha256Hex(buffer) {
@@ -73418,6 +73421,27 @@ async function sha256Hex(buffer) {
73418
73421
  function md5Hex(buffer) {
73419
73422
  return (0, import_md5.default)(new Uint8Array(buffer));
73420
73423
  }
73424
+ function awsS3Etag(buffer, partSize) {
73425
+ if (buffer.byteLength <= partSize) {
73426
+ return md5Hex(buffer);
73427
+ }
73428
+ return multipartMd5(buffer, partSize);
73429
+ }
73430
+ function multipartMd5(buffer, partSize) {
73431
+ const totalSize = buffer.byteLength;
73432
+ const partCount = Math.ceil(totalSize / partSize);
73433
+ const rawDigests = new Uint8Array(partCount * 16);
73434
+ for (let i4 = 0; i4 < partCount; i4++) {
73435
+ const start = i4 * partSize;
73436
+ const end = Math.min(start + partSize, totalSize);
73437
+ const partBuffer = buffer.slice(start, end);
73438
+ const hex = (0, import_md5.default)(new Uint8Array(partBuffer));
73439
+ for (let j4 = 0; j4 < 16; j4++) {
73440
+ rawDigests[i4 * 16 + j4] = parseInt(hex.slice(j4 * 2, j4 * 2 + 2), 16);
73441
+ }
73442
+ }
73443
+ return `${(0, import_md5.default)(rawDigests)}-${partCount}`;
73444
+ }
73421
73445
  function crc32Hex(input) {
73422
73446
  let bytes;
73423
73447
  if (input instanceof Uint8Array) {
@@ -73494,6 +73518,7 @@ async function curateOne({
73494
73518
  outputTarget,
73495
73519
  mappingOptions,
73496
73520
  hashMethod,
73521
+ hashPartSize,
73497
73522
  previousSourceFileInfo,
73498
73523
  previousMappedFileInfo
73499
73524
  }) {
@@ -73593,7 +73618,11 @@ async function curateOne({
73593
73618
  let canSkip = false;
73594
73619
  if (previousSourceFileInfo?.preMappedHash !== void 0) {
73595
73620
  try {
73596
- preMappedHash = await hash(fileArrayBuffer, hashMethod ?? "md5");
73621
+ preMappedHash = await hash(
73622
+ fileArrayBuffer,
73623
+ hashMethod ?? "md5",
73624
+ hashPartSize
73625
+ );
73597
73626
  } catch (e4) {
73598
73627
  console.warn(`Failed to compute preMappedHash for ${fileInfo.name}`, e4);
73599
73628
  }
@@ -73698,7 +73727,11 @@ async function curateOne({
73698
73727
  }
73699
73728
  if (!preMappedHash) {
73700
73729
  try {
73701
- preMappedHash = await hash(fileArrayBuffer, hashMethod ?? "md5");
73730
+ preMappedHash = await hash(
73731
+ fileArrayBuffer,
73732
+ hashMethod ?? "md5",
73733
+ hashPartSize
73734
+ );
73702
73735
  } catch (e4) {
73703
73736
  console.warn(`Failed to compute preMappedHash for ${fileInfo.name}`, e4);
73704
73737
  }
@@ -73709,7 +73742,11 @@ async function curateOne({
73709
73742
  const modifiedArrayBuffer = mappedDicomData.write({
73710
73743
  allowInvalidVRLength: true
73711
73744
  });
73712
- postMappedHash = await hash(modifiedArrayBuffer, hashMethod ?? "md5");
73745
+ postMappedHash = await hash(
73746
+ modifiedArrayBuffer,
73747
+ hashMethod ?? "md5",
73748
+ hashPartSize
73749
+ );
73713
73750
  fileArrayBuffer = null;
73714
73751
  const previousPostMappedHash = previousMappedFileInfo ? previousMappedFileInfo(clonedMapResults.outputFilePath)?.postMappedHash : void 0;
73715
73752
  if (previousPostMappedHash !== void 0 && previousPostMappedHash === postMappedHash) {
package/dist/esm/hash.js CHANGED
@@ -640,17 +640,20 @@ var require_crc = __commonJS({
640
640
  // src/hash.ts
641
641
  var import_md5 = __toESM(require_md5(), 1);
642
642
  var import_js_crc = __toESM(require_crc(), 1);
643
- async function hash(buffer, hashMethod) {
643
+ var DEFAULT_HASH_PART_SIZE = 5 * 1024 * 1024;
644
+ async function hash(buffer, hashMethod, hashPartSize) {
644
645
  switch (hashMethod) {
645
646
  case "sha256":
646
647
  return await sha256Hex(buffer);
647
648
  case "crc32":
648
649
  return crc32Hex(buffer);
650
+ case "crc64":
651
+ return crc64Hex(buffer);
652
+ case "aws-s3-etag-2025":
653
+ return awsS3Etag(buffer, hashPartSize ?? DEFAULT_HASH_PART_SIZE);
649
654
  case "md5":
650
655
  default:
651
656
  return md5Hex(buffer);
652
- case "crc64":
653
- return crc64Hex(buffer);
654
657
  }
655
658
  }
656
659
  async function sha256Hex(buffer) {
@@ -661,6 +664,27 @@ async function sha256Hex(buffer) {
661
664
  function md5Hex(buffer) {
662
665
  return (0, import_md5.default)(new Uint8Array(buffer));
663
666
  }
667
+ function awsS3Etag(buffer, partSize) {
668
+ if (buffer.byteLength <= partSize) {
669
+ return md5Hex(buffer);
670
+ }
671
+ return multipartMd5(buffer, partSize);
672
+ }
673
+ function multipartMd5(buffer, partSize) {
674
+ const totalSize = buffer.byteLength;
675
+ const partCount = Math.ceil(totalSize / partSize);
676
+ const rawDigests = new Uint8Array(partCount * 16);
677
+ for (let i = 0; i < partCount; i++) {
678
+ const start = i * partSize;
679
+ const end = Math.min(start + partSize, totalSize);
680
+ const partBuffer = buffer.slice(start, end);
681
+ const hex = (0, import_md5.default)(new Uint8Array(partBuffer));
682
+ for (let j = 0; j < 16; j++) {
683
+ rawDigests[i * 16 + j] = parseInt(hex.slice(j * 2, j * 2 + 2), 16);
684
+ }
685
+ }
686
+ return `${(0, import_md5.default)(rawDigests)}-${partCount}`;
687
+ }
664
688
  function crc32Hex(input) {
665
689
  let bytes;
666
690
  if (input instanceof Uint8Array) {
package/dist/esm/index.js CHANGED
@@ -81248,17 +81248,20 @@ async function fetchWithRetry(...args) {
81248
81248
  // src/hash.ts
81249
81249
  var import_md5 = __toESM(require_md5(), 1);
81250
81250
  var import_js_crc = __toESM(require_crc(), 1);
81251
- async function hash(buffer, hashMethod) {
81251
+ var DEFAULT_HASH_PART_SIZE = 5 * 1024 * 1024;
81252
+ async function hash(buffer, hashMethod, hashPartSize) {
81252
81253
  switch (hashMethod) {
81253
81254
  case "sha256":
81254
81255
  return await sha256Hex(buffer);
81255
81256
  case "crc32":
81256
81257
  return crc32Hex(buffer);
81258
+ case "crc64":
81259
+ return crc64Hex(buffer);
81260
+ case "aws-s3-etag-2025":
81261
+ return awsS3Etag(buffer, hashPartSize ?? DEFAULT_HASH_PART_SIZE);
81257
81262
  case "md5":
81258
81263
  default:
81259
81264
  return md5Hex(buffer);
81260
- case "crc64":
81261
- return crc64Hex(buffer);
81262
81265
  }
81263
81266
  }
81264
81267
  async function sha256Hex(buffer) {
@@ -81269,6 +81272,27 @@ async function sha256Hex(buffer) {
81269
81272
  function md5Hex(buffer) {
81270
81273
  return (0, import_md5.default)(new Uint8Array(buffer));
81271
81274
  }
81275
+ function awsS3Etag(buffer, partSize) {
81276
+ if (buffer.byteLength <= partSize) {
81277
+ return md5Hex(buffer);
81278
+ }
81279
+ return multipartMd5(buffer, partSize);
81280
+ }
81281
+ function multipartMd5(buffer, partSize) {
81282
+ const totalSize = buffer.byteLength;
81283
+ const partCount = Math.ceil(totalSize / partSize);
81284
+ const rawDigests = new Uint8Array(partCount * 16);
81285
+ for (let i4 = 0; i4 < partCount; i4++) {
81286
+ const start = i4 * partSize;
81287
+ const end = Math.min(start + partSize, totalSize);
81288
+ const partBuffer = buffer.slice(start, end);
81289
+ const hex = (0, import_md5.default)(new Uint8Array(partBuffer));
81290
+ for (let j4 = 0; j4 < 16; j4++) {
81291
+ rawDigests[i4 * 16 + j4] = parseInt(hex.slice(j4 * 2, j4 * 2 + 2), 16);
81292
+ }
81293
+ }
81294
+ return `${(0, import_md5.default)(rawDigests)}-${partCount}`;
81295
+ }
81272
81296
  function crc32Hex(input) {
81273
81297
  let bytes;
81274
81298
  if (input instanceof Uint8Array) {
@@ -81345,6 +81369,7 @@ async function curateOne({
81345
81369
  outputTarget,
81346
81370
  mappingOptions,
81347
81371
  hashMethod,
81372
+ hashPartSize,
81348
81373
  previousSourceFileInfo,
81349
81374
  previousMappedFileInfo
81350
81375
  }) {
@@ -81444,7 +81469,11 @@ async function curateOne({
81444
81469
  let canSkip = false;
81445
81470
  if (previousSourceFileInfo?.preMappedHash !== void 0) {
81446
81471
  try {
81447
- preMappedHash = await hash(fileArrayBuffer, hashMethod ?? "md5");
81472
+ preMappedHash = await hash(
81473
+ fileArrayBuffer,
81474
+ hashMethod ?? "md5",
81475
+ hashPartSize
81476
+ );
81448
81477
  } catch (e4) {
81449
81478
  console.warn(`Failed to compute preMappedHash for ${fileInfo.name}`, e4);
81450
81479
  }
@@ -81549,7 +81578,11 @@ async function curateOne({
81549
81578
  }
81550
81579
  if (!preMappedHash) {
81551
81580
  try {
81552
- preMappedHash = await hash(fileArrayBuffer, hashMethod ?? "md5");
81581
+ preMappedHash = await hash(
81582
+ fileArrayBuffer,
81583
+ hashMethod ?? "md5",
81584
+ hashPartSize
81585
+ );
81553
81586
  } catch (e4) {
81554
81587
  console.warn(`Failed to compute preMappedHash for ${fileInfo.name}`, e4);
81555
81588
  }
@@ -81560,7 +81593,11 @@ async function curateOne({
81560
81593
  const modifiedArrayBuffer = mappedDicomData.write({
81561
81594
  allowInvalidVRLength: true
81562
81595
  });
81563
- postMappedHash = await hash(modifiedArrayBuffer, hashMethod ?? "md5");
81596
+ postMappedHash = await hash(
81597
+ modifiedArrayBuffer,
81598
+ hashMethod ?? "md5",
81599
+ hashPartSize
81600
+ );
81564
81601
  fileArrayBuffer = null;
81565
81602
  const previousPostMappedHash = previousMappedFileInfo ? previousMappedFileInfo(clonedMapResults.outputFilePath)?.postMappedHash : void 0;
81566
81603
  if (previousPostMappedHash !== void 0 && previousPostMappedHash === postMappedHash) {
@@ -87487,7 +87524,7 @@ async function dispatchMappingJobs() {
87487
87524
  const { fileInfo, previousFileInfo } = filesToProcess.pop();
87488
87525
  const mappingWorker = availableMappingWorkers.pop();
87489
87526
  workerCurrentFile.set(mappingWorker, fileInfo);
87490
- const { outputTarget, hashMethod, ...mappingOptions } = (
87527
+ const { outputTarget, hashMethod, hashPartSize, ...mappingOptions } = (
87491
87528
  // Not partial anymore.
87492
87529
  mappingWorkerOptions
87493
87530
  );
@@ -87497,6 +87534,7 @@ async function dispatchMappingJobs() {
87497
87534
  outputTarget: await getHttpOutputHeaders(outputTarget),
87498
87535
  previousFileInfo,
87499
87536
  hashMethod,
87537
+ hashPartSize,
87500
87538
  serializedMappingOptions: serializeMappingOptions(mappingOptions)
87501
87539
  });
87502
87540
  workersActive += 1;
@@ -87777,6 +87815,7 @@ async function collectMappingOptions(organizeOptions) {
87777
87815
  const skipModifications = organizeOptions.skipModifications ?? false;
87778
87816
  const skipValidation = organizeOptions.skipValidation ?? false;
87779
87817
  const hashMethod = organizeOptions.hashMethod;
87818
+ const hashPartSize = organizeOptions.hashPartSize;
87780
87819
  const dateOffset = organizeOptions.dateOffset;
87781
87820
  if (requiresDateOffset(deIdOpts) && !dateOffset?.match(iso8601)) {
87782
87821
  throw new Error(
@@ -87791,7 +87830,8 @@ async function collectMappingOptions(organizeOptions) {
87791
87830
  skipModifications,
87792
87831
  skipValidation,
87793
87832
  dateOffset,
87794
- hashMethod
87833
+ hashMethod,
87834
+ hashPartSize
87795
87835
  };
87796
87836
  }
87797
87837
  function queueFilesForMapping(organizeOptions) {
@@ -12102,7 +12102,7 @@ async function dispatchMappingJobs() {
12102
12102
  const { fileInfo, previousFileInfo } = filesToProcess.pop();
12103
12103
  const mappingWorker = availableMappingWorkers.pop();
12104
12104
  workerCurrentFile.set(mappingWorker, fileInfo);
12105
- const { outputTarget, hashMethod, ...mappingOptions } = (
12105
+ const { outputTarget, hashMethod, hashPartSize, ...mappingOptions } = (
12106
12106
  // Not partial anymore.
12107
12107
  mappingWorkerOptions
12108
12108
  );
@@ -12112,6 +12112,7 @@ async function dispatchMappingJobs() {
12112
12112
  outputTarget: await getHttpOutputHeaders(outputTarget),
12113
12113
  previousFileInfo,
12114
12114
  hashMethod,
12115
+ hashPartSize,
12115
12116
  serializedMappingOptions: serializeMappingOptions(mappingOptions)
12116
12117
  });
12117
12118
  workersActive += 1;
@@ -9,6 +9,7 @@ export type MappingRequest = {
9
9
  preMappedHash?: string;
10
10
  };
11
11
  hashMethod?: THashMethod;
12
+ hashPartSize?: number;
12
13
  serializedMappingOptions: TSerializedMappingOptions;
13
14
  } | {
14
15
  request: 'fileInfoIndex';
@@ -4,6 +4,7 @@ export type TCurateOneArgs = {
4
4
  outputTarget: TOutputTarget;
5
5
  mappingOptions: TMappingOptions;
6
6
  hashMethod?: THashMethod;
7
+ hashPartSize?: number;
7
8
  previousSourceFileInfo?: {
8
9
  size?: number;
9
10
  mtime?: string;
@@ -13,6 +14,6 @@ export type TCurateOneArgs = {
13
14
  postMappedHash?: string;
14
15
  } | undefined;
15
16
  };
16
- export declare function curateOne({ fileInfo, outputTarget, mappingOptions, hashMethod, previousSourceFileInfo, previousMappedFileInfo, }: TCurateOneArgs): Promise<Omit<Partial<TMapResults>, 'anomalies'> & {
17
+ export declare function curateOne({ fileInfo, outputTarget, mappingOptions, hashMethod, hashPartSize, previousSourceFileInfo, previousMappedFileInfo, }: TCurateOneArgs): Promise<Omit<Partial<TMapResults>, 'anomalies'> & {
17
18
  anomalies: TMapResults['anomalies'];
18
19
  }>;
@@ -1,2 +1,2 @@
1
1
  import { THashMethod } from './types';
2
- export declare function hash(buffer: ArrayBuffer, hashMethod: THashMethod): Promise<string>;
2
+ export declare function hash(buffer: ArrayBuffer, hashMethod: THashMethod, hashPartSize?: number): Promise<string>;
@@ -9,6 +9,7 @@ import type { TMappingOptions, TFileInfo, TProgressMessage, TOutputTarget, TFile
9
9
  export type TMappingWorkerOptions = TMappingOptions & {
10
10
  outputTarget?: TOutputTarget;
11
11
  hashMethod?: THashMethod;
12
+ hashPartSize?: number;
12
13
  };
13
14
  export type ProgressCallback = (message: TProgressMessage) => void;
14
15
  export declare const availableMappingWorkers: Worker[];
@@ -29,6 +29,7 @@ export type OrganizeOptions = {
29
29
  dateOffset?: Iso8601Duration;
30
30
  skipCollectingMappings?: boolean;
31
31
  hashMethod?: THashMethod;
32
+ hashPartSize?: number;
32
33
  fileInfoIndex?: TFileInfoIndex;
33
34
  excludedPathGlobs?: string[];
34
35
  workerCount?: number;
@@ -49,7 +50,7 @@ export type OrganizeOptions = {
49
50
  inputType: 's3';
50
51
  inputS3Bucket: TS3BucketOptions;
51
52
  });
52
- export type THashMethod = 'crc64' | 'crc32' | 'sha256' | 'md5';
53
+ export type THashMethod = 'crc64' | 'crc32' | 'sha256' | 'md5' | 'aws-s3-etag-2025';
53
54
  export type THTTPHeaderProvider = () => Promise<Record<string, string>> | Record<string, string>;
54
55
  export type THTTPOptions = {
55
56
  url: string;