dicom-curate 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/esm/collectMappings.js +1 -0
- package/dist/esm/config/dicom/tagConversion.js +44 -0
- package/dist/esm/curateDict.js +65 -0
- package/dist/esm/curateOne.js +13 -1
- package/dist/esm/deidentifyPS315E.js +50 -2
- package/dist/esm/index.js +41 -4
- package/dist/esm/offsetDateTime.js +5 -3
- package/dist/esm/scanDirectoryWorker.js +78 -11
- package/dist/types/config/dicom/tagConversion.d.ts +12 -0
- package/dist/types/deidentifyPS315E.d.ts +2 -1
- package/dist/types/scanDirectoryWorker.d.ts +5 -0
- package/dist/types/types.d.ts +1 -0
- package/dist/umd/dicom-curate.umd.js +438 -64
- package/dist/umd/dicom-curate.umd.js.map +1 -1
- package/dist/umd/dicom-curate.umd.min.js +3 -3
- package/dist/umd/dicom-curate.umd.min.js.map +1 -1
- package/package.json +1 -1
|
@@ -99,6 +99,7 @@ export default function collectMappings(inputFilePath, inputFileIndex, dicomData
|
|
|
99
99
|
dicomPS315EOptions: finalSpec.dicomPS315EOptions,
|
|
100
100
|
dateOffset: mappingOptions.dateOffset,
|
|
101
101
|
mapResults,
|
|
102
|
+
originalDicomDict: dicomData.dict,
|
|
102
103
|
});
|
|
103
104
|
}
|
|
104
105
|
// Moving this after collectMappingsInData as this should take precedence.
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import * as dcmjs from 'dcmjs';
|
|
2
|
+
/**
|
|
3
|
+
* Check if a tag identifier is a private tag
|
|
4
|
+
*/
|
|
5
|
+
export function isPrivateTag(tagId) {
|
|
6
|
+
// Check if it's already a tag ID format (8 hex digits)
|
|
7
|
+
if (/^[0-9A-Fa-f]{8}$/.test(tagId)) {
|
|
8
|
+
const group = parseInt(tagId.substring(0, 4), 16);
|
|
9
|
+
return group % 2 === 1;
|
|
10
|
+
}
|
|
11
|
+
// If it's a keyword, it's not a private tag
|
|
12
|
+
return false;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Convert a DICOM keyword to its corresponding tag ID
|
|
16
|
+
*/
|
|
17
|
+
export function convertKeywordToTagId(keyword) {
|
|
18
|
+
var _a;
|
|
19
|
+
// Use dcmjs built-in conversion for standard DICOM keywords
|
|
20
|
+
// For private tags (which don't have keywords), keep as-is
|
|
21
|
+
const tagId = isPrivateTag(keyword) ? keyword :
|
|
22
|
+
((_a = dcmjs.data.DicomMetaDictionary.nameMap[keyword]) === null || _a === void 0 ? void 0 : _a.tag) || keyword;
|
|
23
|
+
// Remove parentheses and commas, convert to the format used in dictionary keys
|
|
24
|
+
return tagId.replace(/[(),]/g, '').toLowerCase();
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Convert a keyword path to tag ID path for nested DICOM elements
|
|
28
|
+
*/
|
|
29
|
+
export function convertKeywordPathToTagIdPath(keywordPath) {
|
|
30
|
+
// Handle nested paths like "GeneralMatchingSequence[0].00510014"
|
|
31
|
+
const parts = keywordPath.split('.');
|
|
32
|
+
const convertedParts = parts.map(part => {
|
|
33
|
+
const arrayMatch = part.match(/^(.+)\[(\d+)\]$/);
|
|
34
|
+
if (arrayMatch) {
|
|
35
|
+
const [, keyword, index] = arrayMatch;
|
|
36
|
+
const tagId = convertKeywordToTagId(keyword);
|
|
37
|
+
return `${tagId}[${index}]`;
|
|
38
|
+
}
|
|
39
|
+
else {
|
|
40
|
+
return convertKeywordToTagId(part);
|
|
41
|
+
}
|
|
42
|
+
});
|
|
43
|
+
return convertedParts.join('.');
|
|
44
|
+
}
|
package/dist/esm/curateDict.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import * as dcmjs from 'dcmjs';
|
|
2
2
|
import collectMappings from './collectMappings.js';
|
|
3
3
|
import mapMetaheader from './mapMetaheader.js';
|
|
4
|
+
import { convertKeywordPathToTagIdPath } from './config/dicom/tagConversion.js';
|
|
4
5
|
import { set as _set, unset as _unset, cloneDeep as _cloneDeep } from 'lodash';
|
|
5
6
|
export default function curateDict(inputFilePath, inputFileIndex, dicomData, mappingOptions) {
|
|
6
7
|
//
|
|
@@ -27,5 +28,69 @@ export default function curateDict(inputFilePath, inputFileIndex, dicomData, map
|
|
|
27
28
|
mapMetaheader(dicomData.meta, naturalData.SOPInstanceUID));
|
|
28
29
|
mappedDicomData.dict =
|
|
29
30
|
dcmjs.data.DicomMetaDictionary.denaturalizeDataset(naturalData);
|
|
31
|
+
// Restore quarantined private tags directly to the final DICOM dict
|
|
32
|
+
// This must be done after denaturalization since private tags aren't in the dictionary
|
|
33
|
+
for (let tagPath in mapResults.quarantine) {
|
|
34
|
+
const quarantinedElement = mapResults.quarantine[tagPath];
|
|
35
|
+
if (!quarantinedElement)
|
|
36
|
+
continue;
|
|
37
|
+
// Convert keyword paths to tag ID paths for restoration
|
|
38
|
+
const tagIdPath = convertKeywordPathToTagIdPath(tagPath);
|
|
39
|
+
// If the quarantined element has DICOM structure (vr and Value), restore it directly
|
|
40
|
+
if (quarantinedElement && typeof quarantinedElement === 'object' && 'Value' in quarantinedElement) {
|
|
41
|
+
// Handle nested paths like "00080413[0].00510014"
|
|
42
|
+
const pathParts = tagIdPath.split('.');
|
|
43
|
+
if (pathParts.length === 2 && pathParts[0].includes('[')) {
|
|
44
|
+
// This is a nested path, handle it specially
|
|
45
|
+
const [sequenceWithIndex, privateTagId] = pathParts;
|
|
46
|
+
const arrayMatch = sequenceWithIndex.match(/^(.+)\[(\d+)\]$/);
|
|
47
|
+
if (arrayMatch) {
|
|
48
|
+
const [, sequenceTagId, index] = arrayMatch;
|
|
49
|
+
let sequence = mappedDicomData.dict[sequenceTagId];
|
|
50
|
+
// If the sequence doesn't exist, we need to create it
|
|
51
|
+
if (!sequence) {
|
|
52
|
+
// Create the sequence with the private tag already included
|
|
53
|
+
const sequenceItemWithPrivateTag = { [privateTagId]: quarantinedElement };
|
|
54
|
+
sequence = {
|
|
55
|
+
vr: 'SQ',
|
|
56
|
+
Value: [sequenceItemWithPrivateTag]
|
|
57
|
+
};
|
|
58
|
+
mappedDicomData.dict[sequenceTagId] = sequence;
|
|
59
|
+
}
|
|
60
|
+
else {
|
|
61
|
+
// Ensure the sequence has a Value array
|
|
62
|
+
if (!sequence.Value) {
|
|
63
|
+
sequence.Value = [];
|
|
64
|
+
}
|
|
65
|
+
// Ensure we have enough items in the sequence
|
|
66
|
+
while (sequence.Value.length <= parseInt(index)) {
|
|
67
|
+
sequence.Value.push({});
|
|
68
|
+
}
|
|
69
|
+
if (sequence && sequence.Value && sequence.Value[parseInt(index)]) {
|
|
70
|
+
// Ensure the sequence item is properly structured
|
|
71
|
+
const sequenceItem = sequence.Value[parseInt(index)];
|
|
72
|
+
if (typeof sequenceItem === 'object' && sequenceItem !== null) {
|
|
73
|
+
// Create a new object with the private tag included
|
|
74
|
+
const newSequenceItem = Object.assign(Object.assign({}, sequenceItem), { [privateTagId]: quarantinedElement });
|
|
75
|
+
sequence.Value[parseInt(index)] = newSequenceItem;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
else {
|
|
82
|
+
// Top-level private tag
|
|
83
|
+
_set(mappedDicomData.dict, tagIdPath, quarantinedElement);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
else {
|
|
87
|
+
// For raw values, we need to create a proper DICOM element structure
|
|
88
|
+
// This is a fallback - ideally all quarantined elements should have proper structure
|
|
89
|
+
_set(mappedDicomData.dict, tagIdPath, {
|
|
90
|
+
vr: 'UN', // Unknown VR for private tags
|
|
91
|
+
Value: Array.isArray(quarantinedElement) ? quarantinedElement : [quarantinedElement]
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
}
|
|
30
95
|
return { dicomData: mappedDicomData, mapResults: _cloneDeep(mapResults) };
|
|
31
96
|
}
|
package/dist/esm/curateOne.js
CHANGED
|
@@ -31,8 +31,20 @@ export function curateOne(_a) {
|
|
|
31
31
|
dicomData = dcmjs.data.DicomMessage.readFile(fileArrayBuffer);
|
|
32
32
|
}
|
|
33
33
|
catch (error) {
|
|
34
|
+
console.warn(`[dicom-curate] Could not parse ${fileInfo.name} as DICOM data:`, error);
|
|
35
|
+
// Create a more informative error result
|
|
34
36
|
const mapResults = {
|
|
35
|
-
anomalies: [`Could not parse ${fileInfo.name} as
|
|
37
|
+
anomalies: [`Could not parse ${fileInfo.name} as DICOM data`],
|
|
38
|
+
errors: [`File ${fileInfo.name} is not a valid DICOM file or is corrupted`],
|
|
39
|
+
sourceInstanceUID: `invalid_${fileInfo.name.replace(/[^a-zA-Z0-9]/g, '_')}`,
|
|
40
|
+
outputFilePath: `${fileInfo.path}/${fileInfo.name}`,
|
|
41
|
+
// Add metadata about the failed file
|
|
42
|
+
fileInfo: {
|
|
43
|
+
name: fileInfo.name,
|
|
44
|
+
size: fileInfo.size,
|
|
45
|
+
path: fileInfo.path,
|
|
46
|
+
parseError: error instanceof Error ? error.message : String(error)
|
|
47
|
+
}
|
|
36
48
|
};
|
|
37
49
|
return mapResults;
|
|
38
50
|
}
|
|
@@ -15,6 +15,7 @@ import hashUid from './hashUid.js';
|
|
|
15
15
|
import replaceUid from './replaceUid.js';
|
|
16
16
|
import { elementNamesToAlwaysKeep } from './config/dicom/elementNamesToAlwaysKeep.js';
|
|
17
17
|
import { ps315EElements as rawPs315EElements } from './config/dicom/ps315EElements.js';
|
|
18
|
+
import { convertKeywordToTagId } from './config/dicom/tagConversion.js';
|
|
18
19
|
import { offsetDateTime } from './offsetDateTime.js';
|
|
19
20
|
import { retainAdditionalIds } from './config/dicom/retainAdditionalIds.js';
|
|
20
21
|
import { uidRegistryPS3_06_A1 } from './config/dicom/uidRegistryPS3_06_A1.js';
|
|
@@ -55,7 +56,44 @@ const ps315EElements = rawPs315EElements.map((elm) => {
|
|
|
55
56
|
return elm;
|
|
56
57
|
}
|
|
57
58
|
});
|
|
58
|
-
export default function deidentifyPS315E({ naturalData, dicomPS315EOptions, dateOffset, mapResults, }) {
|
|
59
|
+
export default function deidentifyPS315E({ naturalData, dicomPS315EOptions, dateOffset, mapResults, originalDicomDict, }) {
|
|
60
|
+
// Helper function to get original DICOM element from nested path
|
|
61
|
+
function getOriginalDicomElement(path, tagName) {
|
|
62
|
+
if (!originalDicomDict)
|
|
63
|
+
return null;
|
|
64
|
+
if (!path) {
|
|
65
|
+
// Top-level element
|
|
66
|
+
return originalDicomDict[tagName];
|
|
67
|
+
}
|
|
68
|
+
// Parse nested path like "GeneralMatchingSequence[0]."
|
|
69
|
+
const pathParts = path.split('.');
|
|
70
|
+
let current = originalDicomDict;
|
|
71
|
+
for (const part of pathParts) {
|
|
72
|
+
if (!part)
|
|
73
|
+
continue; // Skip empty parts from trailing dots
|
|
74
|
+
const arrayMatch = part.match(/^(.+)\[(\d+)\]$/);
|
|
75
|
+
if (arrayMatch) {
|
|
76
|
+
const [, sequenceName, index] = arrayMatch;
|
|
77
|
+
const tagId = convertKeywordToTagId(sequenceName);
|
|
78
|
+
if (current[tagId] && current[tagId].Value && current[tagId].Value[parseInt(index)]) {
|
|
79
|
+
current = current[tagId].Value[parseInt(index)];
|
|
80
|
+
}
|
|
81
|
+
else {
|
|
82
|
+
return null;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
else {
|
|
86
|
+
const tagId = convertKeywordToTagId(part);
|
|
87
|
+
if (current[tagId]) {
|
|
88
|
+
current = current[tagId];
|
|
89
|
+
}
|
|
90
|
+
else {
|
|
91
|
+
return null;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
return current[tagName] || null;
|
|
96
|
+
}
|
|
59
97
|
const { cleanDescriptorsOption, cleanDescriptorsExceptions, retainLongitudinalTemporalInformationOptions, retainPatientCharacteristicsOption, retainDeviceIdentityOption, retainUIDsOption, retainSafePrivateOption, retainInstitutionIdentityOption, } = dicomPS315EOptions;
|
|
60
98
|
const taggedps315EEls = ps315EElements.reduce((acc, item) => {
|
|
61
99
|
acc.push(item);
|
|
@@ -327,7 +365,17 @@ export default function deidentifyPS315E({ naturalData, dicomPS315EOptions, date
|
|
|
327
365
|
}
|
|
328
366
|
else {
|
|
329
367
|
// We keep the private tag but register its value for checking.
|
|
330
|
-
|
|
368
|
+
// Store the full DICOM element structure, not just the value
|
|
369
|
+
// For private tags, we need to preserve the original structure
|
|
370
|
+
const originalElement = getOriginalDicomElement(path, name);
|
|
371
|
+
if (originalElement) {
|
|
372
|
+
// Store the original DICOM element structure (with vr and Value)
|
|
373
|
+
mapResults.quarantine[attrPath] = originalElement;
|
|
374
|
+
}
|
|
375
|
+
else {
|
|
376
|
+
// Fallback to the naturalized value if original not available
|
|
377
|
+
mapResults.quarantine[attrPath] = data[name];
|
|
378
|
+
}
|
|
331
379
|
}
|
|
332
380
|
}
|
|
333
381
|
else {
|
package/dist/esm/index.js
CHANGED
|
@@ -27,8 +27,11 @@ export { specVersion } from './config/specVersion.js';
|
|
|
27
27
|
export { sample2PassCurationSpecification as sampleSpecification } from './config/sample2PassCurationSpecification.js';
|
|
28
28
|
export { csvTextToRows } from './csvMapping.js';
|
|
29
29
|
const mappingWorkerCount = navigator.hardwareConcurrency;
|
|
30
|
+
// Update the type to include scan anomalies
|
|
30
31
|
let filesToProcess = [];
|
|
31
32
|
let directoryScanFinished = false;
|
|
33
|
+
// Track scan anomalies separately since they don't go through the processing pipeline
|
|
34
|
+
let scanAnomalies = [];
|
|
32
35
|
function requiresDateOffset(deIdOpts) {
|
|
33
36
|
return (deIdOpts !== 'Off' &&
|
|
34
37
|
deIdOpts.retainLongitudinalTemporalInformationOptions === 'Offset');
|
|
@@ -47,24 +50,34 @@ function requiresDateOffset(deIdOpts) {
|
|
|
47
50
|
function initializeFileListWorker() {
|
|
48
51
|
filesToProcess = [];
|
|
49
52
|
directoryScanFinished = false;
|
|
53
|
+
scanAnomalies = [];
|
|
50
54
|
const fileListWorker = new Worker(new URL('./scanDirectoryWorker.js', import.meta.url), { type: 'module' });
|
|
51
55
|
fileListWorker.addEventListener('message', (event) => {
|
|
52
56
|
switch (event.data.response) {
|
|
53
57
|
case 'file':
|
|
54
58
|
const { fileIndex, fileInfo } = event.data;
|
|
55
|
-
filesToProcess.push({
|
|
59
|
+
filesToProcess.push({
|
|
60
|
+
fileIndex,
|
|
61
|
+
fileInfo,
|
|
62
|
+
scanAnomalies: [], // Files sent to processing have no scan anomalies
|
|
63
|
+
});
|
|
56
64
|
// Could do some throttling:
|
|
57
65
|
// if (filesToProcess.length > 10) {
|
|
58
66
|
// fileListWorker.postMessage({ request: 'stop' })
|
|
59
67
|
// }
|
|
60
68
|
dispatchMappingJobs();
|
|
61
69
|
break;
|
|
70
|
+
case 'scanAnomalies':
|
|
71
|
+
// Handle scan anomalies separately - they don't go to processing
|
|
72
|
+
const { fileInfo: anomalyFileInfo, anomalies } = event.data;
|
|
73
|
+
scanAnomalies.push({ fileInfo: anomalyFileInfo, anomalies });
|
|
74
|
+
break;
|
|
62
75
|
case 'done':
|
|
63
76
|
console.log('directoryScanFinished');
|
|
64
77
|
directoryScanFinished = true;
|
|
65
78
|
break;
|
|
66
79
|
default:
|
|
67
|
-
//
|
|
80
|
+
// @ts-expect-error: response is string here, not never
|
|
68
81
|
console.error(`Unknown response from worker ${event.data.response}`);
|
|
69
82
|
}
|
|
70
83
|
dispatchMappingJobs();
|
|
@@ -143,6 +156,22 @@ function dispatchMappingJobs() {
|
|
|
143
156
|
clearCaches();
|
|
144
157
|
console.log(`Finished mapping ${mapResultsList.length} files`);
|
|
145
158
|
console.log('job is finished');
|
|
159
|
+
// Create individual mapResults entries for each scan anomaly
|
|
160
|
+
// Only do this during actual processing (not first pass)
|
|
161
|
+
if (!mappingWorkerOptions.skipWrite) {
|
|
162
|
+
scanAnomalies.forEach(({ fileInfo, anomalies }) => {
|
|
163
|
+
const scanAnomalyResult = {
|
|
164
|
+
sourceInstanceUID: `scan_${fileInfo.name.replace(/[^a-zA-Z0-9]/g, '_')}`,
|
|
165
|
+
outputFilePath: `${fileInfo.path}/${fileInfo.name}`, // Use the actual file path
|
|
166
|
+
mappings: {},
|
|
167
|
+
anomalies: anomalies, // Keep the original anomalies array
|
|
168
|
+
errors: [],
|
|
169
|
+
quarantine: {},
|
|
170
|
+
};
|
|
171
|
+
// Add each scan anomaly result to the final results
|
|
172
|
+
mapResultsList.push(scanAnomalyResult);
|
|
173
|
+
});
|
|
174
|
+
}
|
|
146
175
|
progressCallback({
|
|
147
176
|
response: 'done',
|
|
148
177
|
mapResultsList: mapResultsList,
|
|
@@ -197,9 +226,14 @@ function queueFilesForMapping(organizeOptions) {
|
|
|
197
226
|
kind: 'blob',
|
|
198
227
|
blob: inputFile,
|
|
199
228
|
};
|
|
200
|
-
filesToProcess.push({
|
|
201
|
-
|
|
229
|
+
filesToProcess.push({
|
|
230
|
+
fileInfo,
|
|
231
|
+
fileIndex,
|
|
232
|
+
scanAnomalies: [],
|
|
233
|
+
});
|
|
202
234
|
});
|
|
235
|
+
// Dispatch jobs once after all files are queued to prevent race conditions
|
|
236
|
+
dispatchMappingJobs();
|
|
203
237
|
}
|
|
204
238
|
let progressCallback;
|
|
205
239
|
function curateMany(organizeOptions, onProgress) {
|
|
@@ -225,9 +259,12 @@ function curateMany(organizeOptions, onProgress) {
|
|
|
225
259
|
//
|
|
226
260
|
if (organizeOptions.inputType === 'directory') {
|
|
227
261
|
const fileListWorker = initializeFileListWorker();
|
|
262
|
+
const curationSpec = organizeOptions.curationSpec();
|
|
263
|
+
const specExcludedFiletypes = curationSpec.excludedFiletypes;
|
|
228
264
|
fileListWorker.postMessage({
|
|
229
265
|
request: 'scan',
|
|
230
266
|
directoryHandle: organizeOptions.inputDirectory,
|
|
267
|
+
excludedFiletypes: specExcludedFiletypes,
|
|
231
268
|
});
|
|
232
269
|
}
|
|
233
270
|
else if (organizeOptions.inputType === 'files') {
|
|
@@ -197,7 +197,9 @@ function getDurationFractionMicroseconds(iso8601Duration) {
|
|
|
197
197
|
* @returns The offset DICOM string, formatted like the original.
|
|
198
198
|
*/
|
|
199
199
|
export function offsetDateTime(dicomValue, iso8601Duration) {
|
|
200
|
-
// Step 0:
|
|
200
|
+
// Step 0: Trim leading/trailing spaces from the DICOM value
|
|
201
|
+
const trimmedDicomValue = dicomValue.trim();
|
|
202
|
+
// Step 0.5: Detect and handle a leading minus sign.
|
|
201
203
|
let sign = 1;
|
|
202
204
|
let durationStr = iso8601Duration;
|
|
203
205
|
if (iso8601Duration.startsWith('-')) {
|
|
@@ -205,7 +207,7 @@ export function offsetDateTime(dicomValue, iso8601Duration) {
|
|
|
205
207
|
durationStr = iso8601Duration.slice(1);
|
|
206
208
|
}
|
|
207
209
|
// Step 1: Convert the original DICOM string to a canonical DT string.
|
|
208
|
-
const canonical = dicomToCanonicalDT(
|
|
210
|
+
const canonical = dicomToCanonicalDT(trimmedDicomValue); // Format: "YYYYMMDDHHMMSS.FFFFFF"
|
|
209
211
|
// Step 2: Split the canonical DT string.
|
|
210
212
|
const base = canonical.slice(0, 14); // 14-digit base: YYYYMMDDHHMMSS
|
|
211
213
|
const fractionStr = canonical.slice(15, 21); // 6-digit fractional part: FFFFFF
|
|
@@ -238,5 +240,5 @@ export function offsetDateTime(dicomValue, iso8601Duration) {
|
|
|
238
240
|
const newFractionStr = newFractionMicro.toString().padStart(6, '0');
|
|
239
241
|
const newCanonical = newBase + '.' + newFractionStr;
|
|
240
242
|
// Step 9: Convert the canonical DT back to the original DICOM format.
|
|
241
|
-
return canonicalDTToDicom(newCanonical,
|
|
243
|
+
return canonicalDTToDicom(newCanonical, trimmedDicomValue);
|
|
242
244
|
}
|
|
@@ -14,11 +14,59 @@ var __asyncValues = (this && this.__asyncValues) || function (o) {
|
|
|
14
14
|
function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }
|
|
15
15
|
function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }
|
|
16
16
|
};
|
|
17
|
+
// Case-insensitive filetypes to ALWAYS exclude from processing
|
|
18
|
+
const DEFAULT_EXCLUDED_FILETYPES = ['dicomdir', 'dicomdir.dir', 'dicomdir.dat', 'dicomdir.bak', 'thumbs.db', '.ds_store'];
|
|
17
19
|
let keepScanning = true;
|
|
20
|
+
let excludedFiletypes = [];
|
|
21
|
+
/**
|
|
22
|
+
* Check if a file should be processed based on filtering rules
|
|
23
|
+
* @param file - The file to check
|
|
24
|
+
* @param fileAnomalies - Array to collect anomalies for this specific file e.g. excluded files
|
|
25
|
+
* @returns Promise<boolean> - True if the file should be processed
|
|
26
|
+
*/
|
|
27
|
+
function shouldProcessFile(file, fileAnomalies) {
|
|
28
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
29
|
+
const allExcludedFiletypes = [
|
|
30
|
+
...DEFAULT_EXCLUDED_FILETYPES,
|
|
31
|
+
...excludedFiletypes
|
|
32
|
+
];
|
|
33
|
+
try {
|
|
34
|
+
// Check if the file is in the list of excluded files
|
|
35
|
+
if (allExcludedFiletypes.some(excluded => file.name.toLowerCase() === excluded.toLowerCase())) {
|
|
36
|
+
fileAnomalies.push(`Skipped excluded file: ${file.name}`);
|
|
37
|
+
return false;
|
|
38
|
+
}
|
|
39
|
+
// Check filesize - (valid) DICOM files are at least 132 bytes (128-byte preamble + 4-byte signature)
|
|
40
|
+
if (file.size < 132) {
|
|
41
|
+
fileAnomalies.push(`Skipped very small file: ${file.name} (${file.size} bytes)`);
|
|
42
|
+
return false;
|
|
43
|
+
}
|
|
44
|
+
// Check for DICOM signature "DICM" at offset 128
|
|
45
|
+
const headerBytes = yield file.slice(128, 132).arrayBuffer();
|
|
46
|
+
const headerView = new Uint8Array(headerBytes);
|
|
47
|
+
const dicomSignature = String.fromCharCode(headerView[0], headerView[1], headerView[2], headerView[3]);
|
|
48
|
+
if (dicomSignature === 'DICM') {
|
|
49
|
+
return true;
|
|
50
|
+
}
|
|
51
|
+
// Don't parse file without DICOM signature
|
|
52
|
+
fileAnomalies.push(`Skipped file without DICOM signature: ${file.name}`);
|
|
53
|
+
return false;
|
|
54
|
+
}
|
|
55
|
+
catch (error) {
|
|
56
|
+
fileAnomalies.push(`Unable to determine file validity - processing anyway: ${file.name} - ${error}`);
|
|
57
|
+
// If vetting process fails, let the parser decide
|
|
58
|
+
return true;
|
|
59
|
+
}
|
|
60
|
+
});
|
|
61
|
+
}
|
|
18
62
|
self.addEventListener('message', (event) => {
|
|
19
63
|
switch (event.data.request) {
|
|
20
64
|
case 'scan':
|
|
21
65
|
console.log(`Starting directory scan of ${event.data.directoryHandle.name}`);
|
|
66
|
+
// Update excluded filetypes if provided
|
|
67
|
+
if (event.data.excludedFiletypes) {
|
|
68
|
+
excludedFiletypes = event.data.excludedFiletypes;
|
|
69
|
+
}
|
|
22
70
|
keepScanning = true;
|
|
23
71
|
scanDirectory(event.data.directoryHandle);
|
|
24
72
|
break;
|
|
@@ -57,17 +105,36 @@ function scanDirectory(dir) {
|
|
|
57
105
|
for (const entry of entries) {
|
|
58
106
|
if (entry.kind === 'file' && keepScanning) {
|
|
59
107
|
const file = yield entry.getFile();
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
108
|
+
const fileAnomalies = [];
|
|
109
|
+
if (yield shouldProcessFile(file, fileAnomalies)) {
|
|
110
|
+
// Send file to processing pipeline
|
|
111
|
+
self.postMessage({
|
|
112
|
+
response: 'file',
|
|
113
|
+
fileIndex: fileIndex++,
|
|
114
|
+
fileInfo: {
|
|
115
|
+
path: prefix,
|
|
116
|
+
name: entry.name,
|
|
117
|
+
size: file.size,
|
|
118
|
+
kind: 'handle',
|
|
119
|
+
fileHandle: entry,
|
|
120
|
+
},
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
else if (fileAnomalies.length > 0) {
|
|
124
|
+
// Send scan anomalies as separate messsage so they are not sent to processing (curate)
|
|
125
|
+
self.postMessage({
|
|
126
|
+
response: 'scanAnomalies',
|
|
127
|
+
fileIndex: fileIndex++,
|
|
128
|
+
fileInfo: {
|
|
129
|
+
path: prefix,
|
|
130
|
+
name: entry.name,
|
|
131
|
+
size: file.size,
|
|
132
|
+
kind: 'handle',
|
|
133
|
+
fileHandle: entry,
|
|
134
|
+
},
|
|
135
|
+
anomalies: fileAnomalies,
|
|
136
|
+
});
|
|
137
|
+
}
|
|
71
138
|
}
|
|
72
139
|
else if (entry.kind === 'directory' && keepScanning) {
|
|
73
140
|
yield traverse(entry, prefix + '/' + entry.name);
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Check if a tag identifier is a private tag
|
|
3
|
+
*/
|
|
4
|
+
export declare function isPrivateTag(tagId: string): boolean;
|
|
5
|
+
/**
|
|
6
|
+
* Convert a DICOM keyword to its corresponding tag ID
|
|
7
|
+
*/
|
|
8
|
+
export declare function convertKeywordToTagId(keyword: string): string;
|
|
9
|
+
/**
|
|
10
|
+
* Convert a keyword path to tag ID path for nested DICOM elements
|
|
11
|
+
*/
|
|
12
|
+
export declare function convertKeywordPathToTagIdPath(keywordPath: string): string;
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import type { TNaturalData } from 'dcmjs';
|
|
2
2
|
import type { Iso8601Duration, TPs315Options, TMapResults } from './types';
|
|
3
3
|
export declare function protectUid(uid: string, retainUIDsOption: string): string;
|
|
4
|
-
export default function deidentifyPS315E({ naturalData, dicomPS315EOptions, dateOffset, mapResults, }: {
|
|
4
|
+
export default function deidentifyPS315E({ naturalData, dicomPS315EOptions, dateOffset, mapResults, originalDicomDict, }: {
|
|
5
5
|
naturalData: TNaturalData;
|
|
6
6
|
dicomPS315EOptions: TPs315Options;
|
|
7
7
|
dateOffset?: Iso8601Duration;
|
|
8
8
|
mapResults: TMapResults;
|
|
9
|
+
originalDicomDict?: Record<string, any>;
|
|
9
10
|
}): void;
|
|
10
11
|
export declare const defaultPs315Options: TPs315Options;
|
package/dist/types/types.d.ts
CHANGED