dicom-curate 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/esm/curateOne.js +13 -1
- package/dist/esm/index.js +41 -4
- package/dist/esm/scanDirectoryWorker.js +78 -11
- package/dist/types/scanDirectoryWorker.d.ts +5 -0
- package/dist/types/types.d.ts +1 -0
- package/dist/umd/dicom-curate.umd.js +275 -59
- package/dist/umd/dicom-curate.umd.js.map +1 -1
- package/dist/umd/dicom-curate.umd.min.js +3 -3
- package/dist/umd/dicom-curate.umd.min.js.map +1 -1
- package/package.json +1 -1
package/dist/esm/curateOne.js
CHANGED
|
@@ -31,8 +31,20 @@ export function curateOne(_a) {
|
|
|
31
31
|
dicomData = dcmjs.data.DicomMessage.readFile(fileArrayBuffer);
|
|
32
32
|
}
|
|
33
33
|
catch (error) {
|
|
34
|
+
console.warn(`[dicom-curate] Could not parse ${fileInfo.name} as DICOM data:`, error);
|
|
35
|
+
// Create a more informative error result
|
|
34
36
|
const mapResults = {
|
|
35
|
-
anomalies: [`Could not parse ${fileInfo.name} as
|
|
37
|
+
anomalies: [`Could not parse ${fileInfo.name} as DICOM data`],
|
|
38
|
+
errors: [`File ${fileInfo.name} is not a valid DICOM file or is corrupted`],
|
|
39
|
+
sourceInstanceUID: `invalid_${fileInfo.name.replace(/[^a-zA-Z0-9]/g, '_')}`,
|
|
40
|
+
outputFilePath: `${fileInfo.path}/${fileInfo.name}`,
|
|
41
|
+
// Add metadata about the failed file
|
|
42
|
+
fileInfo: {
|
|
43
|
+
name: fileInfo.name,
|
|
44
|
+
size: fileInfo.size,
|
|
45
|
+
path: fileInfo.path,
|
|
46
|
+
parseError: error instanceof Error ? error.message : String(error)
|
|
47
|
+
}
|
|
36
48
|
};
|
|
37
49
|
return mapResults;
|
|
38
50
|
}
|
package/dist/esm/index.js
CHANGED
|
@@ -27,8 +27,11 @@ export { specVersion } from './config/specVersion.js';
|
|
|
27
27
|
export { sample2PassCurationSpecification as sampleSpecification } from './config/sample2PassCurationSpecification.js';
|
|
28
28
|
export { csvTextToRows } from './csvMapping.js';
|
|
29
29
|
const mappingWorkerCount = navigator.hardwareConcurrency;
|
|
30
|
+
// Update the type to include scan anomalies
|
|
30
31
|
let filesToProcess = [];
|
|
31
32
|
let directoryScanFinished = false;
|
|
33
|
+
// Track scan anomalies separately since they don't go through the processing pipeline
|
|
34
|
+
let scanAnomalies = [];
|
|
32
35
|
function requiresDateOffset(deIdOpts) {
|
|
33
36
|
return (deIdOpts !== 'Off' &&
|
|
34
37
|
deIdOpts.retainLongitudinalTemporalInformationOptions === 'Offset');
|
|
@@ -47,24 +50,34 @@ function requiresDateOffset(deIdOpts) {
|
|
|
47
50
|
function initializeFileListWorker() {
|
|
48
51
|
filesToProcess = [];
|
|
49
52
|
directoryScanFinished = false;
|
|
53
|
+
scanAnomalies = [];
|
|
50
54
|
const fileListWorker = new Worker(new URL('./scanDirectoryWorker.js', import.meta.url), { type: 'module' });
|
|
51
55
|
fileListWorker.addEventListener('message', (event) => {
|
|
52
56
|
switch (event.data.response) {
|
|
53
57
|
case 'file':
|
|
54
58
|
const { fileIndex, fileInfo } = event.data;
|
|
55
|
-
filesToProcess.push({
|
|
59
|
+
filesToProcess.push({
|
|
60
|
+
fileIndex,
|
|
61
|
+
fileInfo,
|
|
62
|
+
scanAnomalies: [], // Files sent to processing have no scan anomalies
|
|
63
|
+
});
|
|
56
64
|
// Could do some throttling:
|
|
57
65
|
// if (filesToProcess.length > 10) {
|
|
58
66
|
// fileListWorker.postMessage({ request: 'stop' })
|
|
59
67
|
// }
|
|
60
68
|
dispatchMappingJobs();
|
|
61
69
|
break;
|
|
70
|
+
case 'scanAnomalies':
|
|
71
|
+
// Handle scan anomalies separately - they don't go to processing
|
|
72
|
+
const { fileInfo: anomalyFileInfo, anomalies } = event.data;
|
|
73
|
+
scanAnomalies.push({ fileInfo: anomalyFileInfo, anomalies });
|
|
74
|
+
break;
|
|
62
75
|
case 'done':
|
|
63
76
|
console.log('directoryScanFinished');
|
|
64
77
|
directoryScanFinished = true;
|
|
65
78
|
break;
|
|
66
79
|
default:
|
|
67
|
-
//
|
|
80
|
+
// @ts-expect-error: response is string here, not never
|
|
68
81
|
console.error(`Unknown response from worker ${event.data.response}`);
|
|
69
82
|
}
|
|
70
83
|
dispatchMappingJobs();
|
|
@@ -143,6 +156,22 @@ function dispatchMappingJobs() {
|
|
|
143
156
|
clearCaches();
|
|
144
157
|
console.log(`Finished mapping ${mapResultsList.length} files`);
|
|
145
158
|
console.log('job is finished');
|
|
159
|
+
// Create individual mapResults entries for each scan anomaly
|
|
160
|
+
// Only do this during actual processing (not first pass)
|
|
161
|
+
if (!mappingWorkerOptions.skipWrite) {
|
|
162
|
+
scanAnomalies.forEach(({ fileInfo, anomalies }) => {
|
|
163
|
+
const scanAnomalyResult = {
|
|
164
|
+
sourceInstanceUID: `scan_${fileInfo.name.replace(/[^a-zA-Z0-9]/g, '_')}`,
|
|
165
|
+
outputFilePath: `${fileInfo.path}/${fileInfo.name}`, // Use the actual file path
|
|
166
|
+
mappings: {},
|
|
167
|
+
anomalies: anomalies, // Keep the original anomalies array
|
|
168
|
+
errors: [],
|
|
169
|
+
quarantine: {},
|
|
170
|
+
};
|
|
171
|
+
// Add each scan anomaly result to the final results
|
|
172
|
+
mapResultsList.push(scanAnomalyResult);
|
|
173
|
+
});
|
|
174
|
+
}
|
|
146
175
|
progressCallback({
|
|
147
176
|
response: 'done',
|
|
148
177
|
mapResultsList: mapResultsList,
|
|
@@ -197,9 +226,14 @@ function queueFilesForMapping(organizeOptions) {
|
|
|
197
226
|
kind: 'blob',
|
|
198
227
|
blob: inputFile,
|
|
199
228
|
};
|
|
200
|
-
filesToProcess.push({
|
|
201
|
-
|
|
229
|
+
filesToProcess.push({
|
|
230
|
+
fileInfo,
|
|
231
|
+
fileIndex,
|
|
232
|
+
scanAnomalies: [],
|
|
233
|
+
});
|
|
202
234
|
});
|
|
235
|
+
// Dispatch jobs once after all files are queued to prevent race conditions
|
|
236
|
+
dispatchMappingJobs();
|
|
203
237
|
}
|
|
204
238
|
let progressCallback;
|
|
205
239
|
function curateMany(organizeOptions, onProgress) {
|
|
@@ -225,9 +259,12 @@ function curateMany(organizeOptions, onProgress) {
|
|
|
225
259
|
//
|
|
226
260
|
if (organizeOptions.inputType === 'directory') {
|
|
227
261
|
const fileListWorker = initializeFileListWorker();
|
|
262
|
+
const curationSpec = organizeOptions.curationSpec();
|
|
263
|
+
const specExcludedFiletypes = curationSpec.excludedFiletypes;
|
|
228
264
|
fileListWorker.postMessage({
|
|
229
265
|
request: 'scan',
|
|
230
266
|
directoryHandle: organizeOptions.inputDirectory,
|
|
267
|
+
excludedFiletypes: specExcludedFiletypes,
|
|
231
268
|
});
|
|
232
269
|
}
|
|
233
270
|
else if (organizeOptions.inputType === 'files') {
|
|
@@ -14,11 +14,59 @@ var __asyncValues = (this && this.__asyncValues) || function (o) {
|
|
|
14
14
|
function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }
|
|
15
15
|
function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }
|
|
16
16
|
};
|
|
17
|
+
// Case-insensitive filetypes to ALWAYS exclude from processing
|
|
18
|
+
const DEFAULT_EXCLUDED_FILETYPES = ['dicomdir', 'dicomdir.dir', 'dicomdir.dat', 'dicomdir.bak', 'thumbs.db', '.ds_store'];
|
|
17
19
|
let keepScanning = true;
|
|
20
|
+
let excludedFiletypes = [];
|
|
21
|
+
/**
|
|
22
|
+
* Check if a file should be processed based on filtering rules
|
|
23
|
+
* @param file - The file to check
|
|
24
|
+
* @param fileAnomalies - Array to collect anomalies for this specific file e.g. excluded files
|
|
25
|
+
* @returns Promise<boolean> - True if the file should be processed
|
|
26
|
+
*/
|
|
27
|
+
function shouldProcessFile(file, fileAnomalies) {
|
|
28
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
29
|
+
const allExcludedFiletypes = [
|
|
30
|
+
...DEFAULT_EXCLUDED_FILETYPES,
|
|
31
|
+
...excludedFiletypes
|
|
32
|
+
];
|
|
33
|
+
try {
|
|
34
|
+
// Check if the file is in the list of excluded files
|
|
35
|
+
if (allExcludedFiletypes.some(excluded => file.name.toLowerCase() === excluded.toLowerCase())) {
|
|
36
|
+
fileAnomalies.push(`Skipped excluded file: ${file.name}`);
|
|
37
|
+
return false;
|
|
38
|
+
}
|
|
39
|
+
// Check filesize - (valid) DICOM files are at least 132 bytes (128-byte preamble + 4-byte signature)
|
|
40
|
+
if (file.size < 132) {
|
|
41
|
+
fileAnomalies.push(`Skipped very small file: ${file.name} (${file.size} bytes)`);
|
|
42
|
+
return false;
|
|
43
|
+
}
|
|
44
|
+
// Check for DICOM signature "DICM" at offset 128
|
|
45
|
+
const headerBytes = yield file.slice(128, 132).arrayBuffer();
|
|
46
|
+
const headerView = new Uint8Array(headerBytes);
|
|
47
|
+
const dicomSignature = String.fromCharCode(headerView[0], headerView[1], headerView[2], headerView[3]);
|
|
48
|
+
if (dicomSignature === 'DICM') {
|
|
49
|
+
return true;
|
|
50
|
+
}
|
|
51
|
+
// Don't parse file without DICOM signature
|
|
52
|
+
fileAnomalies.push(`Skipped file without DICOM signature: ${file.name}`);
|
|
53
|
+
return false;
|
|
54
|
+
}
|
|
55
|
+
catch (error) {
|
|
56
|
+
fileAnomalies.push(`Unable to determine file validity - processing anyway: ${file.name} - ${error}`);
|
|
57
|
+
// If vetting process fails, let the parser decide
|
|
58
|
+
return true;
|
|
59
|
+
}
|
|
60
|
+
});
|
|
61
|
+
}
|
|
18
62
|
self.addEventListener('message', (event) => {
|
|
19
63
|
switch (event.data.request) {
|
|
20
64
|
case 'scan':
|
|
21
65
|
console.log(`Starting directory scan of ${event.data.directoryHandle.name}`);
|
|
66
|
+
// Update excluded filetypes if provided
|
|
67
|
+
if (event.data.excludedFiletypes) {
|
|
68
|
+
excludedFiletypes = event.data.excludedFiletypes;
|
|
69
|
+
}
|
|
22
70
|
keepScanning = true;
|
|
23
71
|
scanDirectory(event.data.directoryHandle);
|
|
24
72
|
break;
|
|
@@ -57,17 +105,36 @@ function scanDirectory(dir) {
|
|
|
57
105
|
for (const entry of entries) {
|
|
58
106
|
if (entry.kind === 'file' && keepScanning) {
|
|
59
107
|
const file = yield entry.getFile();
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
108
|
+
const fileAnomalies = [];
|
|
109
|
+
if (yield shouldProcessFile(file, fileAnomalies)) {
|
|
110
|
+
// Send file to processing pipeline
|
|
111
|
+
self.postMessage({
|
|
112
|
+
response: 'file',
|
|
113
|
+
fileIndex: fileIndex++,
|
|
114
|
+
fileInfo: {
|
|
115
|
+
path: prefix,
|
|
116
|
+
name: entry.name,
|
|
117
|
+
size: file.size,
|
|
118
|
+
kind: 'handle',
|
|
119
|
+
fileHandle: entry,
|
|
120
|
+
},
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
else if (fileAnomalies.length > 0) {
|
|
124
|
+
// Send scan anomalies as separate messsage so they are not sent to processing (curate)
|
|
125
|
+
self.postMessage({
|
|
126
|
+
response: 'scanAnomalies',
|
|
127
|
+
fileIndex: fileIndex++,
|
|
128
|
+
fileInfo: {
|
|
129
|
+
path: prefix,
|
|
130
|
+
name: entry.name,
|
|
131
|
+
size: file.size,
|
|
132
|
+
kind: 'handle',
|
|
133
|
+
fileHandle: entry,
|
|
134
|
+
},
|
|
135
|
+
anomalies: fileAnomalies,
|
|
136
|
+
});
|
|
137
|
+
}
|
|
71
138
|
}
|
|
72
139
|
else if (entry.kind === 'directory' && keepScanning) {
|
|
73
140
|
yield traverse(entry, prefix + '/' + entry.name);
|
package/dist/types/types.d.ts
CHANGED