dicom-curate 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -31,8 +31,20 @@ export function curateOne(_a) {
31
31
  dicomData = dcmjs.data.DicomMessage.readFile(fileArrayBuffer);
32
32
  }
33
33
  catch (error) {
34
+ console.warn(`[dicom-curate] Could not parse ${fileInfo.name} as DICOM data:`, error);
35
+ // Create a more informative error result
34
36
  const mapResults = {
35
- anomalies: [`Could not parse ${fileInfo.name} as dicom data`],
37
+ anomalies: [`Could not parse ${fileInfo.name} as DICOM data`],
38
+ errors: [`File ${fileInfo.name} is not a valid DICOM file or is corrupted`],
39
+ sourceInstanceUID: `invalid_${fileInfo.name.replace(/[^a-zA-Z0-9]/g, '_')}`,
40
+ outputFilePath: `${fileInfo.path}/${fileInfo.name}`,
41
+ // Add metadata about the failed file
42
+ fileInfo: {
43
+ name: fileInfo.name,
44
+ size: fileInfo.size,
45
+ path: fileInfo.path,
46
+ parseError: error instanceof Error ? error.message : String(error)
47
+ }
36
48
  };
37
49
  return mapResults;
38
50
  }
package/dist/esm/index.js CHANGED
@@ -27,8 +27,11 @@ export { specVersion } from './config/specVersion.js';
27
27
  export { sample2PassCurationSpecification as sampleSpecification } from './config/sample2PassCurationSpecification.js';
28
28
  export { csvTextToRows } from './csvMapping.js';
29
29
  const mappingWorkerCount = navigator.hardwareConcurrency;
30
+ // Update the type to include scan anomalies
30
31
  let filesToProcess = [];
31
32
  let directoryScanFinished = false;
33
+ // Track scan anomalies separately since they don't go through the processing pipeline
34
+ let scanAnomalies = [];
32
35
  function requiresDateOffset(deIdOpts) {
33
36
  return (deIdOpts !== 'Off' &&
34
37
  deIdOpts.retainLongitudinalTemporalInformationOptions === 'Offset');
@@ -47,24 +50,34 @@ function requiresDateOffset(deIdOpts) {
47
50
  function initializeFileListWorker() {
48
51
  filesToProcess = [];
49
52
  directoryScanFinished = false;
53
+ scanAnomalies = [];
50
54
  const fileListWorker = new Worker(new URL('./scanDirectoryWorker.js', import.meta.url), { type: 'module' });
51
55
  fileListWorker.addEventListener('message', (event) => {
52
56
  switch (event.data.response) {
53
57
  case 'file':
54
58
  const { fileIndex, fileInfo } = event.data;
55
- filesToProcess.push({ fileIndex, fileInfo });
59
+ filesToProcess.push({
60
+ fileIndex,
61
+ fileInfo,
62
+ scanAnomalies: [], // Files sent to processing have no scan anomalies
63
+ });
56
64
  // Could do some throttling:
57
65
  // if (filesToProcess.length > 10) {
58
66
  // fileListWorker.postMessage({ request: 'stop' })
59
67
  // }
60
68
  dispatchMappingJobs();
61
69
  break;
70
+ case 'scanAnomalies':
71
+ // Handle scan anomalies separately - they don't go to processing
72
+ const { fileInfo: anomalyFileInfo, anomalies } = event.data;
73
+ scanAnomalies.push({ fileInfo: anomalyFileInfo, anomalies });
74
+ break;
62
75
  case 'done':
63
76
  console.log('directoryScanFinished');
64
77
  directoryScanFinished = true;
65
78
  break;
66
79
  default:
67
- //@ts-expect-error: response is string here, not never
80
+ // @ts-expect-error: response is string here, not never
68
81
  console.error(`Unknown response from worker ${event.data.response}`);
69
82
  }
70
83
  dispatchMappingJobs();
@@ -143,6 +156,22 @@ function dispatchMappingJobs() {
143
156
  clearCaches();
144
157
  console.log(`Finished mapping ${mapResultsList.length} files`);
145
158
  console.log('job is finished');
159
+ // Create individual mapResults entries for each scan anomaly
160
+ // Only do this during actual processing (not first pass)
161
+ if (!mappingWorkerOptions.skipWrite) {
162
+ scanAnomalies.forEach(({ fileInfo, anomalies }) => {
163
+ const scanAnomalyResult = {
164
+ sourceInstanceUID: `scan_${fileInfo.name.replace(/[^a-zA-Z0-9]/g, '_')}`,
165
+ outputFilePath: `${fileInfo.path}/${fileInfo.name}`, // Use the actual file path
166
+ mappings: {},
167
+ anomalies: anomalies, // Keep the original anomalies array
168
+ errors: [],
169
+ quarantine: {},
170
+ };
171
+ // Add each scan anomaly result to the final results
172
+ mapResultsList.push(scanAnomalyResult);
173
+ });
174
+ }
146
175
  progressCallback({
147
176
  response: 'done',
148
177
  mapResultsList: mapResultsList,
@@ -197,9 +226,14 @@ function queueFilesForMapping(organizeOptions) {
197
226
  kind: 'blob',
198
227
  blob: inputFile,
199
228
  };
200
- filesToProcess.push({ fileInfo, fileIndex });
201
- dispatchMappingJobs();
229
+ filesToProcess.push({
230
+ fileInfo,
231
+ fileIndex,
232
+ scanAnomalies: [],
233
+ });
202
234
  });
235
+ // Dispatch jobs once after all files are queued to prevent race conditions
236
+ dispatchMappingJobs();
203
237
  }
204
238
  let progressCallback;
205
239
  function curateMany(organizeOptions, onProgress) {
@@ -225,9 +259,12 @@ function curateMany(organizeOptions, onProgress) {
225
259
  //
226
260
  if (organizeOptions.inputType === 'directory') {
227
261
  const fileListWorker = initializeFileListWorker();
262
+ const curationSpec = organizeOptions.curationSpec();
263
+ const specExcludedFiletypes = curationSpec.excludedFiletypes;
228
264
  fileListWorker.postMessage({
229
265
  request: 'scan',
230
266
  directoryHandle: organizeOptions.inputDirectory,
267
+ excludedFiletypes: specExcludedFiletypes,
231
268
  });
232
269
  }
233
270
  else if (organizeOptions.inputType === 'files') {
@@ -14,11 +14,59 @@ var __asyncValues = (this && this.__asyncValues) || function (o) {
14
14
  function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }
15
15
  function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }
16
16
  };
17
+ // Case-insensitive filetypes to ALWAYS exclude from processing
18
+ const DEFAULT_EXCLUDED_FILETYPES = ['dicomdir', 'dicomdir.dir', 'dicomdir.dat', 'dicomdir.bak', 'thumbs.db', '.ds_store'];
17
19
  let keepScanning = true;
20
+ let excludedFiletypes = [];
21
+ /**
22
+ * Check if a file should be processed based on filtering rules
23
+ * @param file - The file to check
24
+ * @param fileAnomalies - Array to collect anomalies for this specific file e.g. excluded files
25
+ * @returns Promise<boolean> - True if the file should be processed
26
+ */
27
+ function shouldProcessFile(file, fileAnomalies) {
28
+ return __awaiter(this, void 0, void 0, function* () {
29
+ const allExcludedFiletypes = [
30
+ ...DEFAULT_EXCLUDED_FILETYPES,
31
+ ...excludedFiletypes
32
+ ];
33
+ try {
34
+ // Check if the file is in the list of excluded files
35
+ if (allExcludedFiletypes.some(excluded => file.name.toLowerCase() === excluded.toLowerCase())) {
36
+ fileAnomalies.push(`Skipped excluded file: ${file.name}`);
37
+ return false;
38
+ }
39
+ // Check filesize - (valid) DICOM files are at least 132 bytes (128-byte preamble + 4-byte signature)
40
+ if (file.size < 132) {
41
+ fileAnomalies.push(`Skipped very small file: ${file.name} (${file.size} bytes)`);
42
+ return false;
43
+ }
44
+ // Check for DICOM signature "DICM" at offset 128
45
+ const headerBytes = yield file.slice(128, 132).arrayBuffer();
46
+ const headerView = new Uint8Array(headerBytes);
47
+ const dicomSignature = String.fromCharCode(headerView[0], headerView[1], headerView[2], headerView[3]);
48
+ if (dicomSignature === 'DICM') {
49
+ return true;
50
+ }
51
+ // Don't parse file without DICOM signature
52
+ fileAnomalies.push(`Skipped file without DICOM signature: ${file.name}`);
53
+ return false;
54
+ }
55
+ catch (error) {
56
+ fileAnomalies.push(`Unable to determine file validity - processing anyway: ${file.name} - ${error}`);
57
+ // If vetting process fails, let the parser decide
58
+ return true;
59
+ }
60
+ });
61
+ }
18
62
  self.addEventListener('message', (event) => {
19
63
  switch (event.data.request) {
20
64
  case 'scan':
21
65
  console.log(`Starting directory scan of ${event.data.directoryHandle.name}`);
66
+ // Update excluded filetypes if provided
67
+ if (event.data.excludedFiletypes) {
68
+ excludedFiletypes = event.data.excludedFiletypes;
69
+ }
22
70
  keepScanning = true;
23
71
  scanDirectory(event.data.directoryHandle);
24
72
  break;
@@ -57,17 +105,36 @@ function scanDirectory(dir) {
57
105
  for (const entry of entries) {
58
106
  if (entry.kind === 'file' && keepScanning) {
59
107
  const file = yield entry.getFile();
60
- self.postMessage({
61
- response: 'file',
62
- fileIndex: fileIndex++,
63
- fileInfo: {
64
- path: prefix,
65
- name: entry.name,
66
- size: file.size,
67
- kind: 'handle',
68
- fileHandle: entry,
69
- },
70
- });
108
+ const fileAnomalies = [];
109
+ if (yield shouldProcessFile(file, fileAnomalies)) {
110
+ // Send file to processing pipeline
111
+ self.postMessage({
112
+ response: 'file',
113
+ fileIndex: fileIndex++,
114
+ fileInfo: {
115
+ path: prefix,
116
+ name: entry.name,
117
+ size: file.size,
118
+ kind: 'handle',
119
+ fileHandle: entry,
120
+ },
121
+ });
122
+ }
123
+ else if (fileAnomalies.length > 0) {
124
+ // Send scan anomalies as separate messsage so they are not sent to processing (curate)
125
+ self.postMessage({
126
+ response: 'scanAnomalies',
127
+ fileIndex: fileIndex++,
128
+ fileInfo: {
129
+ path: prefix,
130
+ name: entry.name,
131
+ size: file.size,
132
+ kind: 'handle',
133
+ fileHandle: entry,
134
+ },
135
+ anomalies: fileAnomalies,
136
+ });
137
+ }
71
138
  }
72
139
  else if (entry.kind === 'directory' && keepScanning) {
73
140
  yield traverse(entry, prefix + '/' + entry.name);
@@ -4,6 +4,11 @@ export type FileScanMsg = {
4
4
  response: 'file';
5
5
  fileIndex: number;
6
6
  fileInfo: TFileInfo;
7
+ } | {
8
+ response: 'scanAnomalies';
9
+ fileIndex: number;
10
+ fileInfo: TFileInfo;
11
+ anomalies: string[];
7
12
  } | {
8
13
  response: 'done';
9
14
  };
@@ -137,6 +137,7 @@ export type TCurationSpecification = {
137
137
  additionalData?: {
138
138
  mapping: TMappedValues;
139
139
  } & (TMappingInputDirect | TMappingInputTwoPass);
140
+ excludedFiletypes?: string[];
140
141
  };
141
142
  type TProgressMessageBase = {
142
143
  totalFiles?: number;