dicom-curate 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +235 -0
- package/package.json +82 -0
package/README.md
ADDED
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
# dicom-curate
|
|
2
|
+
|
|
3
|
+
Organize and de-identify DICOM header values and file hierarchies based on a provided configuration object.
|
|
4
|
+
|
|
5
|
+
## ⚠️ Disclaimer
|
|
6
|
+
|
|
7
|
+
This project is currently in a pre-1.0.0 state. APIs and behavior may change at any time without notice.
|
|
8
|
+
|
|
9
|
+
You're welcome to open issues, but please only do so if you're also willing to contribute a pull request.
|
|
10
|
+
|
|
11
|
+
## Why
|
|
12
|
+
|
|
13
|
+
This provides an open configuration language and a ready-to-use library for modifying DICOM headers for the purpose of de-identification and organization.
|
|
14
|
+
|
|
15
|
+
The library can be used in a toolkit-agnostic way, because it provides access to functionality to modify decoded DICOM headers in "DICOM json" format.
|
|
16
|
+
|
|
17
|
+
## Usage
|
|
18
|
+
|
|
19
|
+
Converting a nested input folder structure containing DICOM files to a cleaned output folder destination (note: this uses a browser API only supported in Chrome and Edge browsers):
|
|
20
|
+
|
|
21
|
+
```ts
|
|
22
|
+
import { curateMany, OrganizeOptions } from 'dicom-curate'
|
|
23
|
+
|
|
24
|
+
const options: OrganizeOptions = {
|
|
25
|
+
inputType: 'directory',
|
|
26
|
+
inputDirectory, // input folder directory handle
|
|
27
|
+
outputDirectory, // output folder directory handle
|
|
28
|
+
curationSpec, // DICOM curation specification
|
|
29
|
+
columnMapping, // csv file handle to add csv-based mapping
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Read input, map headers, write to well-structured output.
|
|
33
|
+
curateMany(options, onProgressCallback)
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Alternatively, a list of `File`s is accepted:
|
|
37
|
+
|
|
38
|
+
```ts
|
|
39
|
+
const options: OrganizeOptions = {
|
|
40
|
+
inputType: 'files',
|
|
41
|
+
inputFiles, // list of `File` objects
|
|
42
|
+
outputDirectory, // output folder directory handle
|
|
43
|
+
curationSpec, // DICOM curation specification
|
|
44
|
+
columnMappings, // csv file handle to add csv-based mapping
|
|
45
|
+
}
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
If `outputDirectory` is omitted, output `Blob`s will be passed to the `onProgressCallback` function instead.
|
|
49
|
+
|
|
50
|
+
You can also call `curateOne` directly and receive a promise with the mapped blob:
|
|
51
|
+
|
|
52
|
+
```ts
|
|
53
|
+
import { curateOne, extractColumnMappings, clearCaches } from 'dicom-curate'
|
|
54
|
+
|
|
55
|
+
// Data prep responsibility for optional table is with caller
|
|
56
|
+
const columnMappings = extractColumnMappings([
|
|
57
|
+
{ subjectID: 'SubjectID1', blindedID: 'BlindedID1' },
|
|
58
|
+
{ subjectID: 'SubjectID2', blindedID: 'BlindedID2' },
|
|
59
|
+
])
|
|
60
|
+
|
|
61
|
+
curateOne(
|
|
62
|
+
fileInfo, // path, name, size, kind, blob
|
|
63
|
+
undefined,
|
|
64
|
+
{ curationSpec, columnMappings },
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
// Cache clean-up responsibility, e.g. for consistent UID mapping in `retainUIDsOption: 'Off'` is with caller
|
|
68
|
+
clearCaches()
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
An example DICOM curation function:
|
|
72
|
+
|
|
73
|
+
<!-- Snippet auto-generated from src/config/sampleBatchCurationSpecification.ts -->
|
|
74
|
+
|
|
75
|
+
```ts
|
|
76
|
+
import type { TCurationSpecification } from 'dicom-curate'
|
|
77
|
+
|
|
78
|
+
/*
|
|
79
|
+
* Curation specification for batch-curating DICOM files.
|
|
80
|
+
*/
|
|
81
|
+
export function sampleBatchCurationSpecification(): TCurationSpecification {
|
|
82
|
+
// Confirm allowed identifiers for this transfer.
|
|
83
|
+
const identifiers = {
|
|
84
|
+
protocolNumber: 'Sample_Protocol_Number',
|
|
85
|
+
activityProviderName: 'Sample_CRO',
|
|
86
|
+
centerSubjectId: /^[A-Z]{2}\d{2}-\d{3}$/,
|
|
87
|
+
timepointNames: ['Visit 1', 'Visit 2', 'Visit 3'],
|
|
88
|
+
// Folder "scan": the trial-specific/provider-assigned series name
|
|
89
|
+
scanNames: ['3DT1 Sagittal', 'PET-Abdomen'],
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
return {
|
|
93
|
+
// Review the required input folder structure (all DICOM files need minimally this folder depth)
|
|
94
|
+
// This configuration depends on correct centerSubjectId, timepoint, scan folder names.
|
|
95
|
+
inputPathPattern:
|
|
96
|
+
'protocolNumber/activityProvider/centerSubjectId/timepoint/scan',
|
|
97
|
+
|
|
98
|
+
additionalData: {
|
|
99
|
+
// collect from a csv file. A client can use regex to validate the input.
|
|
100
|
+
type: 'load',
|
|
101
|
+
collect: {
|
|
102
|
+
CURR_ID: identifiers.centerSubjectId,
|
|
103
|
+
StudyDescription: identifiers.timepointNames,
|
|
104
|
+
MAPPED_ID: /BLIND_\d+/,
|
|
105
|
+
},
|
|
106
|
+
// With this, can refer to mappings as parser.getMapping('blindedId')
|
|
107
|
+
mapping: {
|
|
108
|
+
// Using the CSV
|
|
109
|
+
blindedId: {
|
|
110
|
+
value: (parser) => parser.getDicom('PatientID'),
|
|
111
|
+
lookup: (row) => row['CURR_ID'],
|
|
112
|
+
replace: (row) => row['MAPPED_ID'],
|
|
113
|
+
},
|
|
114
|
+
},
|
|
115
|
+
},
|
|
116
|
+
|
|
117
|
+
version: '2.0',
|
|
118
|
+
identifiers,
|
|
119
|
+
|
|
120
|
+
// This specifies the standardized DICOM de-identification
|
|
121
|
+
dicomPS315EOptions: {
|
|
122
|
+
cleanDescriptorsOption: true,
|
|
123
|
+
cleanDescriptorsExceptions: ['SeriesDescription'],
|
|
124
|
+
retainLongitudinalTemporalInformationOptions: 'Full',
|
|
125
|
+
retainPatientCharacteristicsOption: [
|
|
126
|
+
'PatientsWeight',
|
|
127
|
+
'PatientsSize',
|
|
128
|
+
'PatientsAge',
|
|
129
|
+
'PatientsSex',
|
|
130
|
+
'SelectorASValue',
|
|
131
|
+
],
|
|
132
|
+
retainDeviceIdentityOption: true,
|
|
133
|
+
retainUIDsOption: 'Hashed',
|
|
134
|
+
retainSafePrivateOption: 'Quarantine',
|
|
135
|
+
retainInstitutionIdentityOption: true,
|
|
136
|
+
},
|
|
137
|
+
|
|
138
|
+
// This section defines the output folder structure and alignment of DICOM headers
|
|
139
|
+
modifications(parser) {
|
|
140
|
+
const scan = parser.getFilePathComp('scan')
|
|
141
|
+
const centerSubjectId = parser.getFilePathComp('centerSubjectId')
|
|
142
|
+
|
|
143
|
+
return {
|
|
144
|
+
dicomHeader: {
|
|
145
|
+
// Align the PatientID DICOM header with the centerSubjectId folder name.
|
|
146
|
+
PatientID: centerSubjectId,
|
|
147
|
+
// This example maps PatientIDs based on the mapping CSV file.
|
|
148
|
+
// PatientID: parser.getMapping('blindedId'),
|
|
149
|
+
PatientName: centerSubjectId,
|
|
150
|
+
// Align the StudyDescription DICOM header with the timepoint folder name.
|
|
151
|
+
StudyDescription: parser.getFilePathComp('timepoint'),
|
|
152
|
+
// The party responsible for assigning a standard ClinicalTrialSeriesDescription
|
|
153
|
+
ClinicalTrialCoordinatingCenterName: identifiers.activityProviderName,
|
|
154
|
+
// Align the ClinicalTrialSeriesDescription DICOM header with the scan folder name.
|
|
155
|
+
ClinicalTrialSeriesDescription: scan,
|
|
156
|
+
},
|
|
157
|
+
|
|
158
|
+
// This defines the output folder structure.
|
|
159
|
+
outputFilePathComponents: [
|
|
160
|
+
parser.getFilePathComp('protocolNumber'),
|
|
161
|
+
parser.getFilePathComp('activityProvider'),
|
|
162
|
+
centerSubjectId,
|
|
163
|
+
parser.getFilePathComp('timepoint'),
|
|
164
|
+
parser.getFilePathComp('scan') +
|
|
165
|
+
'=' +
|
|
166
|
+
parser.getDicom('SeriesNumber'),
|
|
167
|
+
parser.getFilePathComp(parser.FILEBASENAME) + '.dcm',
|
|
168
|
+
],
|
|
169
|
+
}
|
|
170
|
+
},
|
|
171
|
+
|
|
172
|
+
// This section defines the validation rules for the input DICOMs.
|
|
173
|
+
// The processing continues on errors, but errors will have to be fixed
|
|
174
|
+
// or reviewed between the parties.
|
|
175
|
+
validation(parser) {
|
|
176
|
+
const modality = parser.getDicom('Modality')
|
|
177
|
+
const filename = parser.getFilePathComp(parser.FILEBASENAME)
|
|
178
|
+
const seriesUid = parser.getDicom('SeriesInstanceUID')
|
|
179
|
+
|
|
180
|
+
return {
|
|
181
|
+
errors: [
|
|
182
|
+
// File path
|
|
183
|
+
[
|
|
184
|
+
'Invalid study folder name',
|
|
185
|
+
parser.getFilePathComp('protocolNumber') !==
|
|
186
|
+
identifiers.protocolNumber,
|
|
187
|
+
],
|
|
188
|
+
// DICOM header
|
|
189
|
+
['Missing Modality', parser.missingDicom('Modality')],
|
|
190
|
+
],
|
|
191
|
+
}
|
|
192
|
+
},
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
## DICOM Conformance Notes
|
|
198
|
+
|
|
199
|
+
dicom-curate
|
|
200
|
+
|
|
201
|
+
- does not use an Encrypted Attributes Sequence
|
|
202
|
+
- does not anonymize burnt-in information or modify PixelData
|
|
203
|
+
- populates the `PatientIdentityRemoved` attribute with `YES`
|
|
204
|
+
- populates the `LongitudinalTemporalInformationModified` attribute per DICOM PS3.15E
|
|
205
|
+
- populates the `DeidentificationMethod` attribute with information about this README
|
|
206
|
+
- populates the `DeidentificationMethodCodeSequence` with the CID7050 codes of provided options, per PS3.15E
|
|
207
|
+
- keeps only the following in File Meta Information:
|
|
208
|
+
'FileMetaInformationVersion', 'ImplementationClassUID', 'ImplementationVersionName',
|
|
209
|
+
'MediaStorageSOPClassUID', as well as setting the 'TransferSyntaxUID' to 'Explit little Endian', and 'MediaStorageSOPInstanceUID' to the correct SOP instance UID.
|
|
210
|
+
- cleans sequences ('SQ') by recursively applying the de-identification rules to each Dataset in each Item of the Sequence.
|
|
211
|
+
- uses an allow-list approach, by removing everything not defined in PS3.06 or handled in PS3.15E1.1.
|
|
212
|
+
- identifies and removes additional ID attributes beyond PS3.15E1.1 by parsing PS3.06 and finding all attributes ending on "ID(s)", but not UID(s) that are not defined in PS3.15E. This ID list is defined in "src/config/dicom/retainAdditionalIds.ts", and a few of them are manually annotated to be retained if the "retain device identifier option" is activated.
|
|
213
|
+
- keeps the 'EncapsulatedDocument' attribute if modality is "DOC", unless overridden
|
|
214
|
+
- keeps the 'VerifyingObserverSequence' if modality is SR, unless overridden
|
|
215
|
+
- allows the users to describe all cleaning configurations in the curationSpec file
|
|
216
|
+
- implements the following PS3.15E options:
|
|
217
|
+
- 'retainDeviceIdentityOption': Keeps the attributes marked as 'K' and performs the default action on all other attributes
|
|
218
|
+
- 'cleanDescriptorsOption' by removing all description and comment Attributes except those comment attributes explicitly listed in the `cleanDescriptorExceptions` list.
|
|
219
|
+
- 'retainLongitudinalTemporalInformationOptions': this considers all temporal attributes (DA, TM, DT), as described as a possible approach in PS3.15E.
|
|
220
|
+
Possible values are 'Full' (keep all temporal info intact), 'Off' (remove all temporal attributes or add defaults per PS3.15E), or 'Offset' (move all temporal attributes by a duration. An ISO-8601 compliant duration `dateOffset` parameter must be passed).
|
|
221
|
+
- 'retainDeviceIdentityOption': true or false. If true, overrides `retainLongitudinalTemporalInformationOptions` for the respective attributes to keep.
|
|
222
|
+
- 'retainUIDsOption': 'On', 'Off', or 'Hashed'.
|
|
223
|
+
- If 'On', maintain all UIDs.
|
|
224
|
+
- If 'Off', replaces instance UIDs with arbitrary new UIDs, maintaining referential integrity within a single run.
|
|
225
|
+
- maximum protection
|
|
226
|
+
- only maintains referential integrity within a run
|
|
227
|
+
- do not use for de-identifying data in multiple batches
|
|
228
|
+
- If 'Hashed', creates a new UID using an using a decentrally repeatable, hash-based method.
|
|
229
|
+
- maintains referential integrity even if de-identifying data in separate, or decentralized, batches
|
|
230
|
+
- use if the risk of re-identifying by UID is not bigger than the risk of re-identifying by PixelData
|
|
231
|
+
- do not use if you want to specifically protect UIDs from an auxiliary knowledge attack, e.g. an attacker that knows possible input UIDs
|
|
232
|
+
- There are more instance UIDs in part PS3.06 than described in PS3.15E for protection, therefore this option identifies the following uids for protection: 1. All instance UIDs per PS3.15E, 2. Any additional UIDs with a value not well-known in DICOM, per table PS3.06A (Registry of DICOM Unique Identifiers). This protects instance UIDs but also private class UIDs, which is intentional.
|
|
233
|
+
- 'retainSafePrivateOption': 'Quarantine' or 'Off'. If 'Quarantine', keeps all private tags but creates a quarantine log for manual review
|
|
234
|
+
- 'retainInstitutionIdentityOption': true or false
|
|
235
|
+
- does not currently clean structured content
|
package/package.json
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "dicom-curate",
|
|
3
|
+
"version": "0.1.1",
|
|
4
|
+
"description": "Organize and de-identify DICOM header data ",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "dist/esm/index.js",
|
|
7
|
+
"module": "dist/esm/index.js",
|
|
8
|
+
"types": "dist/types/index.d.ts",
|
|
9
|
+
"exports": {
|
|
10
|
+
".": {
|
|
11
|
+
"import": "./dist/esm/index.js",
|
|
12
|
+
"types": "./dist/types/index.d.ts",
|
|
13
|
+
"default": "./dist/esm/index.js"
|
|
14
|
+
}
|
|
15
|
+
},
|
|
16
|
+
"files": [
|
|
17
|
+
"dist"
|
|
18
|
+
],
|
|
19
|
+
"lint-staged": {
|
|
20
|
+
"*.{js,jsx,ts,tsx,md}": [
|
|
21
|
+
"prettier --write"
|
|
22
|
+
]
|
|
23
|
+
},
|
|
24
|
+
"husky": {
|
|
25
|
+
"hooks": {
|
|
26
|
+
"pre-commit": "lint-staged"
|
|
27
|
+
}
|
|
28
|
+
},
|
|
29
|
+
"prettier": {
|
|
30
|
+
"semi": false,
|
|
31
|
+
"singleQuote": true
|
|
32
|
+
},
|
|
33
|
+
"eslintConfig": {
|
|
34
|
+
"extends": "react-app",
|
|
35
|
+
"rules": {
|
|
36
|
+
"import/no-extraneous-dependencies": 2
|
|
37
|
+
}
|
|
38
|
+
},
|
|
39
|
+
"scripts": {
|
|
40
|
+
"clean": "bebbi-scripts clean",
|
|
41
|
+
"build": "yarn generate:sampleSpec && bebbi-scripts build esm types",
|
|
42
|
+
"watch": "bebbi-scripts build --watch",
|
|
43
|
+
"test": "bebbi-scripts test",
|
|
44
|
+
"format": "bebbi-scripts format",
|
|
45
|
+
"lint": "bebbi-scripts lint",
|
|
46
|
+
"validate": "bebbi-scripts validate",
|
|
47
|
+
"prepare": "husky install",
|
|
48
|
+
"generate:sampleSpec": "tsx scripts/generateSampleSpec.ts",
|
|
49
|
+
"generate:unitTestSample": "tsx scripts/generateSampleDicomData.ts"
|
|
50
|
+
},
|
|
51
|
+
"dependencies": {
|
|
52
|
+
"acorn": "^8.14.1",
|
|
53
|
+
"acorn-globals": "^7.0.1",
|
|
54
|
+
"dcmjs": "^0.38.3",
|
|
55
|
+
"iso8601-duration": "^2.1.2",
|
|
56
|
+
"js-sha256": "^0.11.0",
|
|
57
|
+
"lodash": "^4.17.21",
|
|
58
|
+
"memize": "^2.1.0",
|
|
59
|
+
"uuid": "^11.0.5"
|
|
60
|
+
},
|
|
61
|
+
"devDependencies": {
|
|
62
|
+
"@commitlint/cli": "^19.8.1",
|
|
63
|
+
"@commitlint/config-conventional": "^19.8.1",
|
|
64
|
+
"@semantic-release/changelog": "^6.0.3",
|
|
65
|
+
"@semantic-release/git": "^10.0.1",
|
|
66
|
+
"@semantic-release/github": "^11.0.3",
|
|
67
|
+
"@semantic-release/npm": "^12.0.1",
|
|
68
|
+
"@types/lodash": "^4",
|
|
69
|
+
"@types/xml2js": "^0",
|
|
70
|
+
"bebbi-scripts": "^0.7.8",
|
|
71
|
+
"husky": "^8.0.3",
|
|
72
|
+
"node-fetch": "^3.3.2",
|
|
73
|
+
"prettier": "^3.4.2",
|
|
74
|
+
"semantic-release": "^24.2.5",
|
|
75
|
+
"ts-jest": "^29.2.6",
|
|
76
|
+
"tsx": "^4.19.3",
|
|
77
|
+
"typescript": "^5.8.3",
|
|
78
|
+
"xml2js": "^0.6.2"
|
|
79
|
+
},
|
|
80
|
+
"license": "MIT",
|
|
81
|
+
"packageManager": "yarn@4.5.3"
|
|
82
|
+
}
|