caideface 0.2.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {caideface-0.2.0/src/caideface.egg-info → caideface-0.3.0}/PKG-INFO +138 -9
- {caideface-0.2.0 → caideface-0.3.0}/README.md +133 -6
- {caideface-0.2.0 → caideface-0.3.0}/pyproject.toml +9 -4
- {caideface-0.2.0 → caideface-0.3.0}/src/caideface/__init__.py +8 -2
- caideface-0.3.0/src/caideface/anonymize.py +249 -0
- caideface-0.3.0/src/caideface/background.py +174 -0
- {caideface-0.2.0 → caideface-0.3.0}/src/caideface/cli.py +36 -0
- caideface-0.3.0/src/caideface/data/ner_model/config.cfg +145 -0
- caideface-0.3.0/src/caideface/data/ner_model/meta.json +52 -0
- caideface-0.3.0/src/caideface/data/ner_model/ner/cfg +13 -0
- caideface-0.3.0/src/caideface/data/ner_model/ner/model +0 -0
- caideface-0.3.0/src/caideface/data/ner_model/ner/moves +1 -0
- caideface-0.3.0/src/caideface/data/ner_model/tok2vec/cfg +3 -0
- caideface-0.3.0/src/caideface/data/ner_model/tok2vec/model +0 -0
- caideface-0.3.0/src/caideface/data/ner_model/tokenizer +3 -0
- caideface-0.3.0/src/caideface/data/ner_model/vocab/key2row +1 -0
- caideface-0.3.0/src/caideface/data/ner_model/vocab/lookups.bin +1 -0
- caideface-0.3.0/src/caideface/data/ner_model/vocab/strings.json +7816 -0
- caideface-0.3.0/src/caideface/data/ner_model/vocab/vectors +0 -0
- caideface-0.3.0/src/caideface/data/ner_model/vocab/vectors.cfg +3 -0
- {caideface-0.2.0 → caideface-0.3.0/src/caideface.egg-info}/PKG-INFO +138 -9
- caideface-0.3.0/src/caideface.egg-info/SOURCES.txt +37 -0
- {caideface-0.2.0 → caideface-0.3.0}/src/caideface.egg-info/requires.txt +2 -0
- caideface-0.3.0/tests/test_anonymize.py +147 -0
- caideface-0.3.0/tests/test_background.py +172 -0
- caideface-0.2.0/src/caideface.egg-info/SOURCES.txt +0 -20
- {caideface-0.2.0 → caideface-0.3.0}/LICENSE.md +0 -0
- {caideface-0.2.0 → caideface-0.3.0}/setup.cfg +0 -0
- {caideface-0.2.0 → caideface-0.3.0}/src/caideface/data/mni_icbm152_t1_tal_nlin_sym_55_ext_brain_only.nii.gz +0 -0
- {caideface-0.2.0 → caideface-0.3.0}/src/caideface/data/t1_mask.nii.gz +0 -0
- {caideface-0.2.0 → caideface-0.3.0}/src/caideface/pipeline.py +0 -0
- {caideface-0.2.0 → caideface-0.3.0}/src/caideface/register.py +0 -0
- {caideface-0.2.0 → caideface-0.3.0}/src/caideface/reorient.py +0 -0
- {caideface-0.2.0 → caideface-0.3.0}/src/caideface/skull_strip.py +0 -0
- {caideface-0.2.0 → caideface-0.3.0}/src/caideface.egg-info/dependency_links.txt +0 -0
- {caideface-0.2.0 → caideface-0.3.0}/src/caideface.egg-info/entry_points.txt +0 -0
- {caideface-0.2.0 → caideface-0.3.0}/src/caideface.egg-info/top_level.txt +0 -0
- {caideface-0.2.0 → caideface-0.3.0}/tests/test_register.py +0 -0
- {caideface-0.2.0 → caideface-0.3.0}/tests/test_reorient.py +0 -0
- {caideface-0.2.0 → caideface-0.3.0}/tests/test_skull_strip.py +0 -0
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: caideface
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: MRI defacing pipeline with skull-stripping and affine registration from cai4cai
|
|
5
5
|
Author-email: Lorena Garcia-Foncillas <lorenagarfon00@gmail.com>
|
|
6
|
-
License
|
|
6
|
+
License: MIT
|
|
7
7
|
Project-URL: Homepage, https://github.com/cai4cai/defacing_pipeline
|
|
8
8
|
Project-URL: Repository, https://github.com/cai4cai/defacing_pipeline
|
|
9
|
-
Keywords: MRI,defacing,anonymisation,skull-stripping,neuroimaging
|
|
9
|
+
Keywords: MRI,defacing,anonymisation,skull-stripping,neuroimaging,NER,text-anonymization
|
|
10
10
|
Classifier: Development Status :: 3 - Alpha
|
|
11
11
|
Classifier: Intended Audience :: Science/Research
|
|
12
12
|
Classifier: Programming Language :: Python :: 3
|
|
@@ -22,6 +22,8 @@ Requires-Dist: pandas>=1.5
|
|
|
22
22
|
Requires-Dist: natsort>=8.0
|
|
23
23
|
Requires-Dist: tqdm>=4.60
|
|
24
24
|
Requires-Dist: hd-bet
|
|
25
|
+
Requires-Dist: spacy>=3.5
|
|
26
|
+
Requires-Dist: faker>=18.0
|
|
25
27
|
Provides-Extra: dev
|
|
26
28
|
Requires-Dist: pytest; extra == "dev"
|
|
27
29
|
Requires-Dist: ruff; extra == "dev"
|
|
@@ -29,19 +31,28 @@ Dynamic: license-file
|
|
|
29
31
|
|
|
30
32
|
# caideface
|
|
31
33
|
|
|
32
|
-
**MRI defacing
|
|
34
|
+
**MRI defacing and text anonymisation toolkit** from the [cai4cai](https://cai4cai.ml/) research group (Contextual Artificial Intelligence for Computer Assisted Interventions).
|
|
33
35
|
|
|
34
|
-
This
|
|
36
|
+
This package provides two complementary anonymisation capabilities:
|
|
37
|
+
|
|
38
|
+
- **Image defacing** -- removes facial features from head MRI scans while preserving brain structures, as described in the paper *"A Generalisable Head MRI Defacing Pipeline: Evaluation on 2,566 Meningioma Scans"* ([arXiv:2505.12999](https://arxiv.org/abs/2505.12999)).
|
|
39
|
+
- **Text anonymisation** -- detects personal names in medical reports using a trained spaCy NER model and replaces them with realistic fake names (Hiding in Plain Sight / HIPS technique).
|
|
35
40
|
|
|
36
41
|
## Pipeline overview
|
|
37
42
|
|
|
38
|
-
|
|
43
|
+
### Image defacing pipeline
|
|
44
|
+
|
|
45
|
+
The defacing pipeline consists of three steps:
|
|
39
46
|
|
|
40
47
|
1. **Reorientation** -- Aligns NIfTI scans to LAS canonical orientation (MNI152 standard) using nibabel.
|
|
41
48
|
2. **Skull-stripping** -- Extracts brain masks using [HD-BET](https://github.com/MIC-DKFZ/HD-BET), then applies dynamic dilation to preserve peripheral brain structures.
|
|
42
49
|
3. **Registration & Defacing** -- Registers each scan to the MNI152 template using BRAINSFit (affine), warps a face mask into the scan's space, and applies it to remove facial features.
|
|
43
50
|
|
|
44
|
-
|
|
51
|
+
### Text anonymisation (NER + HIPS)
|
|
52
|
+
|
|
53
|
+
The text anonymisation module uses a trained spaCy Named Entity Recognition (NER) model to identify personal names (`PER` entities) in `.txt` files and replaces them with realistic fake names generated by the [Faker](https://faker.readthedocs.io/) library. This "Hiding in Plain Sight" (HIPS) approach produces anonymised reports that remain naturally readable. Consistent name mapping ensures that the same real name is always replaced with the same fake name within a document.
|
|
54
|
+
|
|
55
|
+
All required models and data are **bundled with the package**, so no additional downloads are needed.
|
|
45
56
|
|
|
46
57
|
## Requirements
|
|
47
58
|
|
|
@@ -106,7 +117,7 @@ pip install -e .
|
|
|
106
117
|
|
|
107
118
|
## Usage
|
|
108
119
|
|
|
109
|
-
### CLI -- Full pipeline
|
|
120
|
+
### CLI -- Full defacing pipeline
|
|
110
121
|
|
|
111
122
|
Run all three steps in one command:
|
|
112
123
|
|
|
@@ -134,7 +145,7 @@ This creates three subdirectories under `./output`:
|
|
|
134
145
|
| `--steps` | `all` | Run specific steps: `reorient`, `skull_strip`, `deface` (comma-separated) |
|
|
135
146
|
| `-v` | off | Verbose/debug logging |
|
|
136
147
|
|
|
137
|
-
### CLI -- Individual steps
|
|
148
|
+
### CLI -- Individual defacing steps
|
|
138
149
|
|
|
139
150
|
Run each step separately for more control:
|
|
140
151
|
|
|
@@ -151,8 +162,100 @@ caideface deface ./reoriented ./hdbet ./defaced \
|
|
|
151
162
|
--brainsresample /path/to/BRAINSResample
|
|
152
163
|
```
|
|
153
164
|
|
|
165
|
+
### CLI -- Text anonymisation
|
|
166
|
+
|
|
167
|
+
#### Single file
|
|
168
|
+
|
|
169
|
+
```bash
|
|
170
|
+
caideface anonymize-single ./reports/report_1.txt ./anonymized/report_1.txt
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
#### Batch (all `.txt` files in a directory)
|
|
174
|
+
|
|
175
|
+
```bash
|
|
176
|
+
caideface anonymize ./reports ./anonymized_reports
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
#### Options
|
|
180
|
+
|
|
181
|
+
Both commands accept the same options:
|
|
182
|
+
|
|
183
|
+
| Flag | Default | Description |
|
|
184
|
+
|------|---------|-------------|
|
|
185
|
+
| `--model` | bundled | Path to a custom spaCy NER model directory |
|
|
186
|
+
| `--n-names` | `50` | Size of the fake name pool |
|
|
187
|
+
| `--seed` | none | Random seed for reproducible output |
|
|
188
|
+
| `-v` | off | Verbose/debug logging |
|
|
189
|
+
|
|
190
|
+
#### Example
|
|
191
|
+
|
|
192
|
+
**Input** (`reports/report_1550.txt`):
|
|
193
|
+
```
|
|
194
|
+
Reported by Danielle Smith and William Stuart on 03/10/2014
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
**Output** (`anonymized_reports/report_1550.txt`):
|
|
198
|
+
```
|
|
199
|
+
Reported by Ryan Munoz and Holly Wood on 03/10/2014
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
The batch command saves an `anonymization_log.csv` alongside the output files with a summary of replacements per file.
|
|
203
|
+
|
|
204
|
+
### Python API -- Text anonymisation
|
|
205
|
+
|
|
206
|
+
#### Single file
|
|
207
|
+
|
|
208
|
+
```python
|
|
209
|
+
from caideface.anonymize import load_ner_model, generate_fake_names, anonymize_single
|
|
210
|
+
|
|
211
|
+
# Load model and generate fake name pool (do this once)
|
|
212
|
+
nlp = load_ner_model() # uses bundled model
|
|
213
|
+
fake_names = generate_fake_names(n=50, seed=42)
|
|
214
|
+
|
|
215
|
+
# Anonymise a single report
|
|
216
|
+
result = anonymize_single(
|
|
217
|
+
input_file="reports/report_1.txt",
|
|
218
|
+
output_file="anonymized/report_1.txt",
|
|
219
|
+
nlp=nlp,
|
|
220
|
+
fake_names=fake_names,
|
|
221
|
+
)
|
|
222
|
+
print(result["replacements"]) # number of names replaced
|
|
223
|
+
print(result["names_found"]) # list of original names detected
|
|
224
|
+
print(result["name_mapping"]) # {original_name: fake_name} mapping
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
#### Batch processing
|
|
228
|
+
|
|
229
|
+
```python
|
|
230
|
+
from caideface import anonymize_batch
|
|
231
|
+
|
|
232
|
+
# Anonymise all .txt files in a directory
|
|
233
|
+
log_df = anonymize_batch(
|
|
234
|
+
input_dir="reports/",
|
|
235
|
+
output_dir="anonymized_reports/",
|
|
236
|
+
seed=42,
|
|
237
|
+
)
|
|
238
|
+
print(log_df) # DataFrame with file, replacements, names_found per file
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
#### All available imports
|
|
242
|
+
|
|
243
|
+
```python
|
|
244
|
+
from caideface import (
|
|
245
|
+
DefacePipeline, # Full image defacing pipeline
|
|
246
|
+
reorient_batch, # Step 1
|
|
247
|
+
skull_strip_batch, # Step 2
|
|
248
|
+
deface_batch, # Step 3
|
|
249
|
+
anonymize_batch, # Text anonymisation (batch)
|
|
250
|
+
anonymize_single, # Text anonymisation (single file)
|
|
251
|
+
default_ner_model_path, # Path to bundled NER model
|
|
252
|
+
)
|
|
253
|
+
```
|
|
254
|
+
|
|
154
255
|
## Output structure
|
|
155
256
|
|
|
257
|
+
### Image defacing
|
|
258
|
+
|
|
156
259
|
```
|
|
157
260
|
output/
|
|
158
261
|
├── reoriented/
|
|
@@ -170,6 +273,16 @@ output/
|
|
|
170
273
|
└── hd_bet_dilated_<scan>_masked.nii.gz # Final defaced scan
|
|
171
274
|
```
|
|
172
275
|
|
|
276
|
+
### Text anonymisation
|
|
277
|
+
|
|
278
|
+
```
|
|
279
|
+
anonymized_reports/
|
|
280
|
+
├── anonymization_log.csv # Replacements per file
|
|
281
|
+
├── report_1.txt # Anonymised report
|
|
282
|
+
├── report_2.txt
|
|
283
|
+
└── ...
|
|
284
|
+
```
|
|
285
|
+
|
|
173
286
|
## Existing transforms
|
|
174
287
|
|
|
175
288
|
If you have pre-computed registration transforms (e.g. from 3D Slicer), place a file named `Transform_to_template.txt` in the same directory as the dilated skull-stripped scan. The pipeline will use it instead of running BRAINSFit. Both plain 4x4 text matrices and ITK/Slicer transform formats are supported.
|
|
@@ -199,6 +312,22 @@ If you use HD-BET (skull-stripping, Step 2), please also cite:
|
|
|
199
312
|
}
|
|
200
313
|
```
|
|
201
314
|
|
|
315
|
+
If you use the text anonymisation (NER + HIPS), please also cite:
|
|
316
|
+
|
|
317
|
+
```bibtex
|
|
318
|
+
@article{garcia2025ner,
|
|
319
|
+
title={Evaluation of Named Entity Recognition for Automated Extraction of Present Tumor Size and Personal Names from Radiology Reports Using Spacy},
|
|
320
|
+
author={Garcia-Foncillas Macias, Lorena and Barfoot, Theodore and Vercauteren, Tom and Shapey, Jonathan},
|
|
321
|
+
journal={Journal of Neurological Surgery Part B: Skull Base},
|
|
322
|
+
volume={86},
|
|
323
|
+
number={S 01},
|
|
324
|
+
year={2025},
|
|
325
|
+
doi={10.1055/s-0045-1803715}
|
|
326
|
+
}
|
|
327
|
+
```
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
|
|
202
331
|
## License
|
|
203
332
|
|
|
204
333
|
This project is licensed under the MIT License -- see the [LICENSE](LICENSE.md) file for details.
|
|
@@ -1,18 +1,27 @@
|
|
|
1
1
|
# caideface
|
|
2
2
|
|
|
3
|
-
**MRI defacing
|
|
3
|
+
**MRI defacing and text anonymisation toolkit** from the [cai4cai](https://cai4cai.ml/) research group (Contextual Artificial Intelligence for Computer Assisted Interventions).
|
|
4
4
|
|
|
5
|
-
This
|
|
5
|
+
This package provides two complementary anonymisation capabilities:
|
|
6
|
+
|
|
7
|
+
- **Image defacing** -- removes facial features from head MRI scans while preserving brain structures, as described in the paper *"A Generalisable Head MRI Defacing Pipeline: Evaluation on 2,566 Meningioma Scans"* ([arXiv:2505.12999](https://arxiv.org/abs/2505.12999)).
|
|
8
|
+
- **Text anonymisation** -- detects personal names in medical reports using a trained spaCy NER model and replaces them with realistic fake names (Hiding in Plain Sight / HIPS technique).
|
|
6
9
|
|
|
7
10
|
## Pipeline overview
|
|
8
11
|
|
|
9
|
-
|
|
12
|
+
### Image defacing pipeline
|
|
13
|
+
|
|
14
|
+
The defacing pipeline consists of three steps:
|
|
10
15
|
|
|
11
16
|
1. **Reorientation** -- Aligns NIfTI scans to LAS canonical orientation (MNI152 standard) using nibabel.
|
|
12
17
|
2. **Skull-stripping** -- Extracts brain masks using [HD-BET](https://github.com/MIC-DKFZ/HD-BET), then applies dynamic dilation to preserve peripheral brain structures.
|
|
13
18
|
3. **Registration & Defacing** -- Registers each scan to the MNI152 template using BRAINSFit (affine), warps a face mask into the scan's space, and applies it to remove facial features.
|
|
14
19
|
|
|
15
|
-
|
|
20
|
+
### Text anonymisation (NER + HIPS)
|
|
21
|
+
|
|
22
|
+
The text anonymisation module uses a trained spaCy Named Entity Recognition (NER) model to identify personal names (`PER` entities) in `.txt` files and replaces them with realistic fake names generated by the [Faker](https://faker.readthedocs.io/) library. This "Hiding in Plain Sight" (HIPS) approach produces anonymised reports that remain naturally readable. Consistent name mapping ensures that the same real name is always replaced with the same fake name within a document.
|
|
23
|
+
|
|
24
|
+
All required models and data are **bundled with the package**, so no additional downloads are needed.
|
|
16
25
|
|
|
17
26
|
## Requirements
|
|
18
27
|
|
|
@@ -77,7 +86,7 @@ pip install -e .
|
|
|
77
86
|
|
|
78
87
|
## Usage
|
|
79
88
|
|
|
80
|
-
### CLI -- Full pipeline
|
|
89
|
+
### CLI -- Full defacing pipeline
|
|
81
90
|
|
|
82
91
|
Run all three steps in one command:
|
|
83
92
|
|
|
@@ -105,7 +114,7 @@ This creates three subdirectories under `./output`:
|
|
|
105
114
|
| `--steps` | `all` | Run specific steps: `reorient`, `skull_strip`, `deface` (comma-separated) |
|
|
106
115
|
| `-v` | off | Verbose/debug logging |
|
|
107
116
|
|
|
108
|
-
### CLI -- Individual steps
|
|
117
|
+
### CLI -- Individual defacing steps
|
|
109
118
|
|
|
110
119
|
Run each step separately for more control:
|
|
111
120
|
|
|
@@ -122,8 +131,100 @@ caideface deface ./reoriented ./hdbet ./defaced \
|
|
|
122
131
|
--brainsresample /path/to/BRAINSResample
|
|
123
132
|
```
|
|
124
133
|
|
|
134
|
+
### CLI -- Text anonymisation
|
|
135
|
+
|
|
136
|
+
#### Single file
|
|
137
|
+
|
|
138
|
+
```bash
|
|
139
|
+
caideface anonymize-single ./reports/report_1.txt ./anonymized/report_1.txt
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
#### Batch (all `.txt` files in a directory)
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
caideface anonymize ./reports ./anonymized_reports
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
#### Options
|
|
149
|
+
|
|
150
|
+
Both commands accept the same options:
|
|
151
|
+
|
|
152
|
+
| Flag | Default | Description |
|
|
153
|
+
|------|---------|-------------|
|
|
154
|
+
| `--model` | bundled | Path to a custom spaCy NER model directory |
|
|
155
|
+
| `--n-names` | `50` | Size of the fake name pool |
|
|
156
|
+
| `--seed` | none | Random seed for reproducible output |
|
|
157
|
+
| `-v` | off | Verbose/debug logging |
|
|
158
|
+
|
|
159
|
+
#### Example
|
|
160
|
+
|
|
161
|
+
**Input** (`reports/report_1550.txt`):
|
|
162
|
+
```
|
|
163
|
+
Reported by Danielle Smith and William Stuart on 03/10/2014
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
**Output** (`anonymized_reports/report_1550.txt`):
|
|
167
|
+
```
|
|
168
|
+
Reported by Ryan Munoz and Holly Wood on 03/10/2014
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
The batch command saves an `anonymization_log.csv` alongside the output files with a summary of replacements per file.
|
|
172
|
+
|
|
173
|
+
### Python API -- Text anonymisation
|
|
174
|
+
|
|
175
|
+
#### Single file
|
|
176
|
+
|
|
177
|
+
```python
|
|
178
|
+
from caideface.anonymize import load_ner_model, generate_fake_names, anonymize_single
|
|
179
|
+
|
|
180
|
+
# Load model and generate fake name pool (do this once)
|
|
181
|
+
nlp = load_ner_model() # uses bundled model
|
|
182
|
+
fake_names = generate_fake_names(n=50, seed=42)
|
|
183
|
+
|
|
184
|
+
# Anonymise a single report
|
|
185
|
+
result = anonymize_single(
|
|
186
|
+
input_file="reports/report_1.txt",
|
|
187
|
+
output_file="anonymized/report_1.txt",
|
|
188
|
+
nlp=nlp,
|
|
189
|
+
fake_names=fake_names,
|
|
190
|
+
)
|
|
191
|
+
print(result["replacements"]) # number of names replaced
|
|
192
|
+
print(result["names_found"]) # list of original names detected
|
|
193
|
+
print(result["name_mapping"]) # {original_name: fake_name} mapping
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
#### Batch processing
|
|
197
|
+
|
|
198
|
+
```python
|
|
199
|
+
from caideface import anonymize_batch
|
|
200
|
+
|
|
201
|
+
# Anonymise all .txt files in a directory
|
|
202
|
+
log_df = anonymize_batch(
|
|
203
|
+
input_dir="reports/",
|
|
204
|
+
output_dir="anonymized_reports/",
|
|
205
|
+
seed=42,
|
|
206
|
+
)
|
|
207
|
+
print(log_df) # DataFrame with file, replacements, names_found per file
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
#### All available imports
|
|
211
|
+
|
|
212
|
+
```python
|
|
213
|
+
from caideface import (
|
|
214
|
+
DefacePipeline, # Full image defacing pipeline
|
|
215
|
+
reorient_batch, # Step 1
|
|
216
|
+
skull_strip_batch, # Step 2
|
|
217
|
+
deface_batch, # Step 3
|
|
218
|
+
anonymize_batch, # Text anonymisation (batch)
|
|
219
|
+
anonymize_single, # Text anonymisation (single file)
|
|
220
|
+
default_ner_model_path, # Path to bundled NER model
|
|
221
|
+
)
|
|
222
|
+
```
|
|
223
|
+
|
|
125
224
|
## Output structure
|
|
126
225
|
|
|
226
|
+
### Image defacing
|
|
227
|
+
|
|
127
228
|
```
|
|
128
229
|
output/
|
|
129
230
|
├── reoriented/
|
|
@@ -141,6 +242,16 @@ output/
|
|
|
141
242
|
└── hd_bet_dilated_<scan>_masked.nii.gz # Final defaced scan
|
|
142
243
|
```
|
|
143
244
|
|
|
245
|
+
### Text anonymisation
|
|
246
|
+
|
|
247
|
+
```
|
|
248
|
+
anonymized_reports/
|
|
249
|
+
├── anonymization_log.csv # Replacements per file
|
|
250
|
+
├── report_1.txt # Anonymised report
|
|
251
|
+
├── report_2.txt
|
|
252
|
+
└── ...
|
|
253
|
+
```
|
|
254
|
+
|
|
144
255
|
## Existing transforms
|
|
145
256
|
|
|
146
257
|
If you have pre-computed registration transforms (e.g. from 3D Slicer), place a file named `Transform_to_template.txt` in the same directory as the dilated skull-stripped scan. The pipeline will use it instead of running BRAINSFit. Both plain 4x4 text matrices and ITK/Slicer transform formats are supported.
|
|
@@ -170,6 +281,22 @@ If you use HD-BET (skull-stripping, Step 2), please also cite:
|
|
|
170
281
|
}
|
|
171
282
|
```
|
|
172
283
|
|
|
284
|
+
If you use the text anonymisation (NER + HIPS), please also cite:
|
|
285
|
+
|
|
286
|
+
```bibtex
|
|
287
|
+
@article{garcia2025ner,
|
|
288
|
+
title={Evaluation of Named Entity Recognition for Automated Extraction of Present Tumor Size and Personal Names from Radiology Reports Using Spacy},
|
|
289
|
+
author={Garcia-Foncillas Macias, Lorena and Barfoot, Theodore and Vercauteren, Tom and Shapey, Jonathan},
|
|
290
|
+
journal={Journal of Neurological Surgery Part B: Skull Base},
|
|
291
|
+
volume={86},
|
|
292
|
+
number={S 01},
|
|
293
|
+
year={2025},
|
|
294
|
+
doi={10.1055/s-0045-1803715}
|
|
295
|
+
}
|
|
296
|
+
```
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
|
|
173
300
|
## License
|
|
174
301
|
|
|
175
302
|
This project is licensed under the MIT License -- see the [LICENSE](LICENSE.md) file for details.
|
|
@@ -4,15 +4,15 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "caideface"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.3.0"
|
|
8
8
|
description = "MRI defacing pipeline with skull-stripping and affine registration from cai4cai"
|
|
9
9
|
readme = "README.md"
|
|
10
|
-
license = "MIT"
|
|
10
|
+
license = {text = "MIT"}
|
|
11
11
|
requires-python = ">=3.9"
|
|
12
12
|
authors = [
|
|
13
13
|
{name = "Lorena Garcia-Foncillas", email = "lorenagarfon00@gmail.com"},
|
|
14
14
|
]
|
|
15
|
-
keywords = ["MRI", "defacing", "anonymisation", "skull-stripping", "neuroimaging"]
|
|
15
|
+
keywords = ["MRI", "defacing", "anonymisation", "skull-stripping", "neuroimaging", "NER", "text-anonymization"]
|
|
16
16
|
classifiers = [
|
|
17
17
|
"Development Status :: 3 - Alpha",
|
|
18
18
|
"Intended Audience :: Science/Research",
|
|
@@ -29,6 +29,8 @@ dependencies = [
|
|
|
29
29
|
"natsort>=8.0",
|
|
30
30
|
"tqdm>=4.60",
|
|
31
31
|
"hd-bet",
|
|
32
|
+
"spacy>=3.5",
|
|
33
|
+
"faker>=18.0",
|
|
32
34
|
]
|
|
33
35
|
|
|
34
36
|
[project.optional-dependencies]
|
|
@@ -48,4 +50,7 @@ Repository = "https://github.com/cai4cai/defacing_pipeline"
|
|
|
48
50
|
where = ["src"]
|
|
49
51
|
|
|
50
52
|
[tool.setuptools.package-data]
|
|
51
|
-
caideface = [
|
|
53
|
+
caideface = [
|
|
54
|
+
"data/*.nii.gz",
|
|
55
|
+
"data/ner_model/**/*",
|
|
56
|
+
]
|
|
@@ -1,17 +1,20 @@
|
|
|
1
|
-
"""caideface - MRI defacing pipeline from cai4cai.
|
|
1
|
+
"""caideface - MRI defacing and text anonymisation pipeline from cai4cai.
|
|
2
2
|
|
|
3
3
|
A three-step pipeline for anonymising head MRI scans:
|
|
4
4
|
1. Reorientation to MNI152 atlas reference (nibabel)
|
|
5
5
|
2. Skull-stripping with HD-BET and dynamic dilation
|
|
6
6
|
3. Affine registration and defacing (BRAINSFit)
|
|
7
|
+
|
|
8
|
+
Plus standalone text anonymisation via NER + HIPS (Hiding in Plain Sight).
|
|
7
9
|
"""
|
|
8
10
|
|
|
9
|
-
__version__ = "0.
|
|
11
|
+
__version__ = "0.3.0"
|
|
10
12
|
|
|
11
13
|
from .pipeline import DefacePipeline
|
|
12
14
|
from .reorient import reorient_batch, reorient_single
|
|
13
15
|
from .skull_strip import skull_strip_batch, skull_strip_single
|
|
14
16
|
from .register import deface_batch, deface_single
|
|
17
|
+
from .anonymize import anonymize_batch, anonymize_single, default_ner_model_path
|
|
15
18
|
|
|
16
19
|
__all__ = [
|
|
17
20
|
"DefacePipeline",
|
|
@@ -21,4 +24,7 @@ __all__ = [
|
|
|
21
24
|
"skull_strip_single",
|
|
22
25
|
"deface_batch",
|
|
23
26
|
"deface_single",
|
|
27
|
+
"anonymize_batch",
|
|
28
|
+
"anonymize_single",
|
|
29
|
+
"default_ner_model_path",
|
|
24
30
|
]
|