pathsafe 1.0.2__tar.gz → 1.0.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pathsafe-1.0.2 → pathsafe-1.0.4}/PKG-INFO +78 -25
- {pathsafe-1.0.2 → pathsafe-1.0.4}/README.md +77 -24
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/__init__.py +1 -1
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/anonymizer.py +116 -31
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/cli.py +10 -10
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/gui/window.py +13 -3
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/gui/workers.py +46 -25
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/models.py +1 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/report.py +68 -5
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe.egg-info/PKG-INFO +78 -25
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pyproject.toml +1 -1
- {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_cli_integration.py +2 -1
- {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_integration.py +2 -1
- {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_report.py +6 -1
- {pathsafe-1.0.2 → pathsafe-1.0.4}/LICENSE +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/converter.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/formats/__init__.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/formats/base.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/formats/bif.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/formats/dicom.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/formats/generic_tiff.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/formats/mrxs.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/formats/ndpi.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/formats/scn.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/formats/svs.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/formats/tiff_base.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/gui/__init__.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/gui/themes.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/gui/widgets.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/gui_qt.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/log.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/openslide_utils.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/scanner.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/tiff/__init__.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/tiff/blanking.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/tiff/hashing.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/tiff/parser.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/tiff/sub_ifd.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/verify.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe.egg-info/SOURCES.txt +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe.egg-info/dependency_links.txt +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe.egg-info/entry_points.txt +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe.egg-info/requires.txt +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe.egg-info/top_level.txt +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/setup.cfg +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_adversarial.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_anonymizer.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_bif.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_cli.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_converter.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_dicom.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_exif_gps.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_generic_tiff.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_log.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_mrxs.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_ndpi.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_pattern_config.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_phi_bypass.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_preflight.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_roundtrip.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_scanner.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_scanner_extended.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_scn.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_stress_concurrency.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_stress_dicom.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_stress_filenames.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_stress_ifd_chains.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_stress_rerun.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_stress_strip_bounds.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_stress_tag_order.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_svs.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_tiff.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_tiff_extended.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_unlink_ifd.py +0 -0
- {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_verify.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pathsafe
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.4
|
|
4
4
|
Summary: Production-tested WSI anonymizer for pathology slide files
|
|
5
5
|
Author: PathSafe Contributors
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -41,11 +41,17 @@ Dynamic: license-file
|
|
|
41
41
|
|
|
42
42
|
# PathSafe
|
|
43
43
|
|
|
44
|
+
<p align="center">
|
|
45
|
+
<img src="pathsafe/assets/icon.png" alt="PathSafe icon" width="180">
|
|
46
|
+
</p>
|
|
47
|
+
|
|
44
48
|
**Remove patient information from pathology slide files... safely, automatically, and verifiably.**
|
|
45
49
|
|
|
46
50
|
When pathology scanners create digital slide files, they often embed hidden patient data inside the file: accession numbers, scan dates, operator names, and even photographs of the slide label. This information is invisible when viewing the slide image, but anyone with the right tools can extract it. PathSafe finds and removes all of this hidden data so your slides are safe to share for research or education.
|
|
47
51
|
|
|
48
|
-
PathSafe works with all major scanner brands, can process thousands of files at once, and double-
|
|
52
|
+
PathSafe works with all major scanner brands, can process thousands of files at once, and can double-check its own work to make sure nothing was missed.
|
|
53
|
+
|
|
54
|
+
> **v1.0.4 released** — Faster default runs (verify and checksum now opt-in), real-time per-phase progress in the GUI, I/O concurrency control for large batches, and a new SHA-256 checksum option.
|
|
49
55
|
|
|
50
56
|
---
|
|
51
57
|
|
|
@@ -57,7 +63,7 @@ PathSafe works with all major scanner brands, can process thousands of files at
|
|
|
57
63
|
| **Works with all major scanners** | Hamamatsu (NDPI), Aperio (SVS), 3DHISTECH (MRXS), Roche/Ventana (BIF), Leica (SCN), DICOM, and other TIFF-based files |
|
|
58
64
|
| **Erases label photos** | Many scanners take a photo of the physical slide label (which may show patient names) and hide it inside the file. PathSafe erases these photos |
|
|
59
65
|
| **Keeps your originals safe** | PathSafe creates cleaned copies in a separate folder, so your original files are never touched |
|
|
60
|
-
| **Double-checks everything** |
|
|
66
|
+
| **Double-checks everything** | Use Scan or Verify on your output folder to confirm all patient data was removed |
|
|
61
67
|
| **Creates compliance reports** | Generates PDF reports and certificates documenting exactly what was found, what was removed, and proof that each file is clean |
|
|
62
68
|
| **Easy to use** | A visual interface guides you through four simple steps with no typing commands required |
|
|
63
69
|
| **Handles large batches** | Process hundreds or thousands of slides at once, with parallel processing to speed things up |
|
|
@@ -76,6 +82,18 @@ Download the installer for your platform. It creates a desktop shortcut and adds
|
|
|
76
82
|
| **macOS** | [pathsafe-gui-macos.dmg](https://github.com/DrSoma/PathSafe/releases/latest/download/pathsafe-gui-macos.dmg) |
|
|
77
83
|
| **Linux** | [pathsafe-gui-linux.AppImage](https://github.com/DrSoma/PathSafe/releases/latest/download/pathsafe-gui-linux.AppImage) |
|
|
78
84
|
|
|
85
|
+
**General note**: Code-signing certificates for Windows and macOS are in the process of being added. Until then, your OS may show a first-run warning.
|
|
86
|
+
|
|
87
|
+
**Windows note**: Windows may show a "Windows protected your PC" warning the first time. Click "More info" then "Run anyway".
|
|
88
|
+
|
|
89
|
+
**macOS note**: After downloading, drag into your applications folder and input the following into the terminal:
|
|
90
|
+
|
|
91
|
+
**sudo xattr -rd com.apple.quarantine "/Applications/PathSafe.app"**
|
|
92
|
+
|
|
93
|
+
**open "/Applications/PathSafe.app"**
|
|
94
|
+
|
|
95
|
+
**Linux note**: You may need to right-click > properties, and choose "Open/execute as program" the first time, or run `chmod +x` on the downloaded file.
|
|
96
|
+
|
|
79
97
|
### Option 2: Standalone executable (no installation needed)
|
|
80
98
|
|
|
81
99
|
A single portable file you can run from anywhere, including USB drives.
|
|
@@ -86,38 +104,55 @@ A single portable file you can run from anywhere, including USB drives.
|
|
|
86
104
|
| **macOS** | [pathsafe-gui-macos.dmg](https://github.com/DrSoma/PathSafe/releases/latest/download/pathsafe-gui-macos.dmg) |
|
|
87
105
|
| **Linux** | [pathsafe-gui-linux.AppImage](https://github.com/DrSoma/PathSafe/releases/latest/download/pathsafe-gui-linux.AppImage) |
|
|
88
106
|
|
|
89
|
-
|
|
107
|
+
### Alternative: Install with Python
|
|
90
108
|
|
|
91
|
-
|
|
109
|
+
For users who have Python installed (version 3.9 or newer):
|
|
110
|
+
|
|
111
|
+
```
|
|
112
|
+
pip install pathsafe[gui]
|
|
113
|
+
```
|
|
92
114
|
|
|
93
|
-
|
|
115
|
+
Then launch with `pathsafe gui` or use `pathsafe` for the command line.
|
|
94
116
|
|
|
95
|
-
|
|
117
|
+
### Uninstallation (Windows, macOS, Linux)
|
|
96
118
|
|
|
97
|
-
|
|
119
|
+
Use the steps below based on how PathSafe was installed.
|
|
98
120
|
|
|
99
|
-
|
|
121
|
+
#### Windows
|
|
100
122
|
|
|
101
|
-
|
|
123
|
+
- **Installed with `PathSafe-Setup.exe`**: Open **Settings > Apps > Installed apps**, find **PathSafe**, then click **Uninstall**.
|
|
124
|
+
- **Standalone `pathsafe-gui-windows.exe`**: Delete the `.exe` file and any shortcuts you created.
|
|
125
|
+
- **Optional settings cleanup**: Remove `HKEY_CURRENT_USER\Software\PathSafe\PathSafe` from the registry.
|
|
126
|
+
|
|
127
|
+
#### macOS
|
|
102
128
|
|
|
129
|
+
- **Installed app (`PathSafe.app`)**: Move `/Applications/PathSafe.app` to Trash, then empty Trash.
|
|
130
|
+
- Delete any downloaded `.dmg` file if no longer needed.
|
|
131
|
+
- **Optional settings cleanup**:
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
rm -f ~/Library/Preferences/com.PathSafe.PathSafe.plist
|
|
103
135
|
```
|
|
104
|
-
|
|
136
|
+
|
|
137
|
+
#### Linux
|
|
138
|
+
|
|
139
|
+
- **AppImage install**: Delete the `pathsafe-gui-linux.AppImage` file.
|
|
140
|
+
- If you created desktop integration entries manually, delete those launcher/icon files from `~/.local/share/applications` and `~/.local/share/icons` if present.
|
|
141
|
+
- **Optional settings cleanup**:
|
|
142
|
+
|
|
143
|
+
```bash
|
|
144
|
+
rm -f ~/.config/PathSafe/PathSafe.conf
|
|
105
145
|
```
|
|
106
146
|
|
|
107
|
-
|
|
147
|
+
#### Python install (any platform)
|
|
108
148
|
|
|
109
|
-
|
|
110
|
-
<summary>Optional add-ons (click to expand)</summary>
|
|
149
|
+
If PathSafe was installed with `pip`:
|
|
111
150
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
| `pip install pathsafe[dicom]` | Support for DICOM WSI files |
|
|
116
|
-
| `pip install pathsafe[openslide]` | Enhanced format detection via OpenSlide |
|
|
117
|
-
| `pip install pathsafe[convert]` | Format conversion (change file types) |
|
|
118
|
-
| `pip install pathsafe[all]` | All of the above |
|
|
151
|
+
```bash
|
|
152
|
+
python -m pip uninstall pathsafe
|
|
153
|
+
```
|
|
119
154
|
|
|
120
|
-
|
|
155
|
+
If it was installed inside a virtual environment, activate that environment first, or remove the full environment directory.
|
|
121
156
|
|
|
122
157
|
---
|
|
123
158
|
|
|
@@ -141,7 +176,7 @@ Pick the output folder where your cleaned copies will go. A default location is
|
|
|
141
176
|
|
|
142
177
|
### Step 4: Anonymize
|
|
143
178
|
|
|
144
|
-
Click **Anonymize**. PathSafe copies your files to the output folder
|
|
179
|
+
Click **Anonymize**. PathSafe copies your files to the output folder and removes all patient data from the copies. **Your original files are never modified.**
|
|
145
180
|
|
|
146
181
|
A summary popup tells you exactly what happened after each step.
|
|
147
182
|
|
|
@@ -155,6 +190,7 @@ A summary popup tells you exactly what happened after each step.
|
|
|
155
190
|
| **Use keyboard shortcuts** | Ctrl+O (open files), Ctrl+Shift+O (open folder), Ctrl+S (scan), Ctrl+R (anonymize), Ctrl+E (verify), Ctrl+I (file info), Ctrl+T (convert), Ctrl+L (save log), Esc (stop) |
|
|
156
191
|
| **Speed up large batches** | Increase the Workers slider (try 2-4) |
|
|
157
192
|
| **Preview without changing anything** | Check the "Dry run" box |
|
|
193
|
+
| **Generate SHA-256 checksums** | Check the "SHA-256 checksum" box for audit-trail hashes |
|
|
158
194
|
| **Add your institution name** | Fill in the Institution field (it appears on PDF reports and is remembered) |
|
|
159
195
|
| **Save your results** | Use Save Log or Export JSON in the Actions menu |
|
|
160
196
|
| **Convert file formats** | Use the Convert tab to change between NDPI, SVS, TIFF, PNG, and JPEG |
|
|
@@ -257,8 +293,8 @@ pathsafe gui Launch the graphical interface
|
|
|
257
293
|
| `--certificate FILE` / `-c` | Generate a compliance certificate (JSON + PDF) |
|
|
258
294
|
| `--institution NAME` / `-i` | Institution name for PDF certificate headers |
|
|
259
295
|
| `--format FORMAT` | Only process files of a specific format |
|
|
260
|
-
| `--
|
|
261
|
-
| `--
|
|
296
|
+
| `--verify-integrity` | Verify image tile data integrity via SHA-256 checksums (off by default) |
|
|
297
|
+
| `--checksum` | Compute SHA-256 checksum of each output file (off by default) |
|
|
262
298
|
| `--no-reset-timestamps` | Keep original file timestamps (reset by default) |
|
|
263
299
|
| `--log FILE` | Save all output to a log file |
|
|
264
300
|
|
|
@@ -364,6 +400,18 @@ PathSafe implements **Level IV** anonymization as defined by [Bisson et al. (202
|
|
|
364
400
|
|
|
365
401
|
---
|
|
366
402
|
|
|
403
|
+
## Independent Verification
|
|
404
|
+
|
|
405
|
+
A standalone verification script is included in the `tools/` directory. This script uses no PathSafe code and independently verifies that anonymized files contain no remaining patient data. It parses the TIFF binary structure from scratch, checks every IFD for label and macro images, scans all string and binary metadata tags, looks for EXIF and GPS sub-IFDs, and runs regex patterns against the raw file bytes.
|
|
406
|
+
|
|
407
|
+
```bash
|
|
408
|
+
python tools/independent_scanner.py /path/to/anonymized/file.svs
|
|
409
|
+
```
|
|
410
|
+
|
|
411
|
+
No dependencies required beyond Python 3.9+.
|
|
412
|
+
|
|
413
|
+
---
|
|
414
|
+
|
|
367
415
|
## Dependencies
|
|
368
416
|
|
|
369
417
|
If you use the downloadable installers (`.exe`, `.dmg`, `.AppImage`), you do not need to install Python dependencies manually.
|
|
@@ -397,4 +445,9 @@ Optional packages add extra features:
|
|
|
397
445
|
|
|
398
446
|
Apache 2.0
|
|
399
447
|
|
|
448
|
+
Maintained by Fernando Soto, MD.
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
|
|
400
453
|
|
|
@@ -1,10 +1,16 @@
|
|
|
1
1
|
# PathSafe
|
|
2
2
|
|
|
3
|
+
<p align="center">
|
|
4
|
+
<img src="pathsafe/assets/icon.png" alt="PathSafe icon" width="180">
|
|
5
|
+
</p>
|
|
6
|
+
|
|
3
7
|
**Remove patient information from pathology slide files... safely, automatically, and verifiably.**
|
|
4
8
|
|
|
5
9
|
When pathology scanners create digital slide files, they often embed hidden patient data inside the file: accession numbers, scan dates, operator names, and even photographs of the slide label. This information is invisible when viewing the slide image, but anyone with the right tools can extract it. PathSafe finds and removes all of this hidden data so your slides are safe to share for research or education.
|
|
6
10
|
|
|
7
|
-
PathSafe works with all major scanner brands, can process thousands of files at once, and double-
|
|
11
|
+
PathSafe works with all major scanner brands, can process thousands of files at once, and can double-check its own work to make sure nothing was missed.
|
|
12
|
+
|
|
13
|
+
> **v1.0.4 released** — Faster default runs (verify and checksum now opt-in), real-time per-phase progress in the GUI, I/O concurrency control for large batches, and a new SHA-256 checksum option.
|
|
8
14
|
|
|
9
15
|
---
|
|
10
16
|
|
|
@@ -16,7 +22,7 @@ PathSafe works with all major scanner brands, can process thousands of files at
|
|
|
16
22
|
| **Works with all major scanners** | Hamamatsu (NDPI), Aperio (SVS), 3DHISTECH (MRXS), Roche/Ventana (BIF), Leica (SCN), DICOM, and other TIFF-based files |
|
|
17
23
|
| **Erases label photos** | Many scanners take a photo of the physical slide label (which may show patient names) and hide it inside the file. PathSafe erases these photos |
|
|
18
24
|
| **Keeps your originals safe** | PathSafe creates cleaned copies in a separate folder, so your original files are never touched |
|
|
19
|
-
| **Double-checks everything** |
|
|
25
|
+
| **Double-checks everything** | Use Scan or Verify on your output folder to confirm all patient data was removed |
|
|
20
26
|
| **Creates compliance reports** | Generates PDF reports and certificates documenting exactly what was found, what was removed, and proof that each file is clean |
|
|
21
27
|
| **Easy to use** | A visual interface guides you through four simple steps with no typing commands required |
|
|
22
28
|
| **Handles large batches** | Process hundreds or thousands of slides at once, with parallel processing to speed things up |
|
|
@@ -35,6 +41,18 @@ Download the installer for your platform. It creates a desktop shortcut and adds
|
|
|
35
41
|
| **macOS** | [pathsafe-gui-macos.dmg](https://github.com/DrSoma/PathSafe/releases/latest/download/pathsafe-gui-macos.dmg) |
|
|
36
42
|
| **Linux** | [pathsafe-gui-linux.AppImage](https://github.com/DrSoma/PathSafe/releases/latest/download/pathsafe-gui-linux.AppImage) |
|
|
37
43
|
|
|
44
|
+
**General note**: Code-signing certificates for Windows and macOS are in the process of being added. Until then, your OS may show a first-run warning.
|
|
45
|
+
|
|
46
|
+
**Windows note**: Windows may show a "Windows protected your PC" warning the first time. Click "More info" then "Run anyway".
|
|
47
|
+
|
|
48
|
+
**macOS note**: After downloading, drag into your applications folder and input the following into the terminal:
|
|
49
|
+
|
|
50
|
+
**sudo xattr -rd com.apple.quarantine "/Applications/PathSafe.app"**
|
|
51
|
+
|
|
52
|
+
**open "/Applications/PathSafe.app"**
|
|
53
|
+
|
|
54
|
+
**Linux note**: You may need to right-click > properties, and choose "Open/execute as program" the first time, or run `chmod +x` on the downloaded file.
|
|
55
|
+
|
|
38
56
|
### Option 2: Standalone executable (no installation needed)
|
|
39
57
|
|
|
40
58
|
A single portable file you can run from anywhere, including USB drives.
|
|
@@ -45,38 +63,55 @@ A single portable file you can run from anywhere, including USB drives.
|
|
|
45
63
|
| **macOS** | [pathsafe-gui-macos.dmg](https://github.com/DrSoma/PathSafe/releases/latest/download/pathsafe-gui-macos.dmg) |
|
|
46
64
|
| **Linux** | [pathsafe-gui-linux.AppImage](https://github.com/DrSoma/PathSafe/releases/latest/download/pathsafe-gui-linux.AppImage) |
|
|
47
65
|
|
|
48
|
-
|
|
66
|
+
### Alternative: Install with Python
|
|
49
67
|
|
|
50
|
-
|
|
68
|
+
For users who have Python installed (version 3.9 or newer):
|
|
69
|
+
|
|
70
|
+
```
|
|
71
|
+
pip install pathsafe[gui]
|
|
72
|
+
```
|
|
51
73
|
|
|
52
|
-
|
|
74
|
+
Then launch with `pathsafe gui` or use `pathsafe` for the command line.
|
|
53
75
|
|
|
54
|
-
|
|
76
|
+
### Uninstallation (Windows, macOS, Linux)
|
|
55
77
|
|
|
56
|
-
|
|
78
|
+
Use the steps below based on how PathSafe was installed.
|
|
57
79
|
|
|
58
|
-
|
|
80
|
+
#### Windows
|
|
59
81
|
|
|
60
|
-
|
|
82
|
+
- **Installed with `PathSafe-Setup.exe`**: Open **Settings > Apps > Installed apps**, find **PathSafe**, then click **Uninstall**.
|
|
83
|
+
- **Standalone `pathsafe-gui-windows.exe`**: Delete the `.exe` file and any shortcuts you created.
|
|
84
|
+
- **Optional settings cleanup**: Remove `HKEY_CURRENT_USER\Software\PathSafe\PathSafe` from the registry.
|
|
85
|
+
|
|
86
|
+
#### macOS
|
|
61
87
|
|
|
88
|
+
- **Installed app (`PathSafe.app`)**: Move `/Applications/PathSafe.app` to Trash, then empty Trash.
|
|
89
|
+
- Delete any downloaded `.dmg` file if no longer needed.
|
|
90
|
+
- **Optional settings cleanup**:
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
rm -f ~/Library/Preferences/com.PathSafe.PathSafe.plist
|
|
62
94
|
```
|
|
63
|
-
|
|
95
|
+
|
|
96
|
+
#### Linux
|
|
97
|
+
|
|
98
|
+
- **AppImage install**: Delete the `pathsafe-gui-linux.AppImage` file.
|
|
99
|
+
- If you created desktop integration entries manually, delete those launcher/icon files from `~/.local/share/applications` and `~/.local/share/icons` if present.
|
|
100
|
+
- **Optional settings cleanup**:
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
rm -f ~/.config/PathSafe/PathSafe.conf
|
|
64
104
|
```
|
|
65
105
|
|
|
66
|
-
|
|
106
|
+
#### Python install (any platform)
|
|
67
107
|
|
|
68
|
-
|
|
69
|
-
<summary>Optional add-ons (click to expand)</summary>
|
|
108
|
+
If PathSafe was installed with `pip`:
|
|
70
109
|
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
| `pip install pathsafe[dicom]` | Support for DICOM WSI files |
|
|
75
|
-
| `pip install pathsafe[openslide]` | Enhanced format detection via OpenSlide |
|
|
76
|
-
| `pip install pathsafe[convert]` | Format conversion (change file types) |
|
|
77
|
-
| `pip install pathsafe[all]` | All of the above |
|
|
110
|
+
```bash
|
|
111
|
+
python -m pip uninstall pathsafe
|
|
112
|
+
```
|
|
78
113
|
|
|
79
|
-
|
|
114
|
+
If it was installed inside a virtual environment, activate that environment first, or remove the full environment directory.
|
|
80
115
|
|
|
81
116
|
---
|
|
82
117
|
|
|
@@ -100,7 +135,7 @@ Pick the output folder where your cleaned copies will go. A default location is
|
|
|
100
135
|
|
|
101
136
|
### Step 4: Anonymize
|
|
102
137
|
|
|
103
|
-
Click **Anonymize**. PathSafe copies your files to the output folder
|
|
138
|
+
Click **Anonymize**. PathSafe copies your files to the output folder and removes all patient data from the copies. **Your original files are never modified.**
|
|
104
139
|
|
|
105
140
|
A summary popup tells you exactly what happened after each step.
|
|
106
141
|
|
|
@@ -114,6 +149,7 @@ A summary popup tells you exactly what happened after each step.
|
|
|
114
149
|
| **Use keyboard shortcuts** | Ctrl+O (open files), Ctrl+Shift+O (open folder), Ctrl+S (scan), Ctrl+R (anonymize), Ctrl+E (verify), Ctrl+I (file info), Ctrl+T (convert), Ctrl+L (save log), Esc (stop) |
|
|
115
150
|
| **Speed up large batches** | Increase the Workers slider (try 2-4) |
|
|
116
151
|
| **Preview without changing anything** | Check the "Dry run" box |
|
|
152
|
+
| **Generate SHA-256 checksums** | Check the "SHA-256 checksum" box for audit-trail hashes |
|
|
117
153
|
| **Add your institution name** | Fill in the Institution field (it appears on PDF reports and is remembered) |
|
|
118
154
|
| **Save your results** | Use Save Log or Export JSON in the Actions menu |
|
|
119
155
|
| **Convert file formats** | Use the Convert tab to change between NDPI, SVS, TIFF, PNG, and JPEG |
|
|
@@ -216,8 +252,8 @@ pathsafe gui Launch the graphical interface
|
|
|
216
252
|
| `--certificate FILE` / `-c` | Generate a compliance certificate (JSON + PDF) |
|
|
217
253
|
| `--institution NAME` / `-i` | Institution name for PDF certificate headers |
|
|
218
254
|
| `--format FORMAT` | Only process files of a specific format |
|
|
219
|
-
| `--
|
|
220
|
-
| `--
|
|
255
|
+
| `--verify-integrity` | Verify image tile data integrity via SHA-256 checksums (off by default) |
|
|
256
|
+
| `--checksum` | Compute SHA-256 checksum of each output file (off by default) |
|
|
221
257
|
| `--no-reset-timestamps` | Keep original file timestamps (reset by default) |
|
|
222
258
|
| `--log FILE` | Save all output to a log file |
|
|
223
259
|
|
|
@@ -323,6 +359,18 @@ PathSafe implements **Level IV** anonymization as defined by [Bisson et al. (202
|
|
|
323
359
|
|
|
324
360
|
---
|
|
325
361
|
|
|
362
|
+
## Independent Verification
|
|
363
|
+
|
|
364
|
+
A standalone verification script is included in the `tools/` directory. This script uses no PathSafe code and independently verifies that anonymized files contain no remaining patient data. It parses the TIFF binary structure from scratch, checks every IFD for label and macro images, scans all string and binary metadata tags, looks for EXIF and GPS sub-IFDs, and runs regex patterns against the raw file bytes.
|
|
365
|
+
|
|
366
|
+
```bash
|
|
367
|
+
python tools/independent_scanner.py /path/to/anonymized/file.svs
|
|
368
|
+
```
|
|
369
|
+
|
|
370
|
+
No dependencies required beyond Python 3.9+.
|
|
371
|
+
|
|
372
|
+
---
|
|
373
|
+
|
|
326
374
|
## Dependencies
|
|
327
375
|
|
|
328
376
|
If you use the downloadable installers (`.exe`, `.dmg`, `.AppImage`), you do not need to install Python dependencies manually.
|
|
@@ -356,4 +404,9 @@ Optional packages add extra features:
|
|
|
356
404
|
|
|
357
405
|
Apache 2.0
|
|
358
406
|
|
|
407
|
+
Maintained by Fernando Soto, MD.
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
|
|
359
412
|
|
|
@@ -139,10 +139,13 @@ def _verify_image_integrity(filepath: Path, pre_hashes: dict) -> Optional[bool]:
|
|
|
139
139
|
def anonymize_file(
|
|
140
140
|
filepath: Path,
|
|
141
141
|
output_path: Optional[Path] = None,
|
|
142
|
-
verify: bool =
|
|
142
|
+
verify: bool = False,
|
|
143
143
|
dry_run: bool = False,
|
|
144
144
|
reset_timestamps: bool = False,
|
|
145
145
|
verify_integrity: bool = False,
|
|
146
|
+
phase_callback: Optional[Callable] = None,
|
|
147
|
+
io_semaphore: Optional[threading.Semaphore] = None,
|
|
148
|
+
compute_checksum: bool = False,
|
|
146
149
|
) -> AnonymizationResult:
|
|
147
150
|
"""Anonymize a single WSI file.
|
|
148
151
|
|
|
@@ -181,6 +184,8 @@ def anonymize_file(
|
|
|
181
184
|
|
|
182
185
|
if dry_run:
|
|
183
186
|
# Just scan, report what would be done
|
|
187
|
+
if phase_callback:
|
|
188
|
+
phase_callback("Scanning", filepath)
|
|
184
189
|
scan_result = handler.scan(filepath)
|
|
185
190
|
elapsed = (time.monotonic() - t0) * 1000
|
|
186
191
|
return AnonymizationResult(
|
|
@@ -188,28 +193,63 @@ def anonymize_file(
|
|
|
188
193
|
output_path=target,
|
|
189
194
|
mode=mode,
|
|
190
195
|
findings_cleared=len(scan_result.findings),
|
|
196
|
+
findings=scan_result.findings,
|
|
191
197
|
verified=False,
|
|
192
198
|
anonymization_time_ms=elapsed,
|
|
193
199
|
)
|
|
194
200
|
|
|
195
201
|
# Copy mode: copy file to output path first
|
|
196
202
|
if mode == "copy":
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
203
|
+
if io_semaphore:
|
|
204
|
+
io_semaphore.acquire()
|
|
205
|
+
try:
|
|
206
|
+
if phase_callback:
|
|
207
|
+
phase_callback("Copying", filepath)
|
|
208
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
209
|
+
# Chunked copy with sub-progress reporting
|
|
210
|
+
src_size = os.path.getsize(str(filepath))
|
|
211
|
+
copied = 0
|
|
212
|
+
last_pct = -1
|
|
213
|
+
with open(str(filepath), 'rb') as fsrc, open(str(target), 'wb') as fdst:
|
|
214
|
+
while True:
|
|
215
|
+
buf = fsrc.read(1_048_576) # 1 MB chunks
|
|
216
|
+
if not buf:
|
|
217
|
+
break
|
|
218
|
+
fdst.write(buf)
|
|
219
|
+
copied += len(buf)
|
|
220
|
+
if phase_callback and src_size > 0:
|
|
221
|
+
new_pct = int(copied * 100 / src_size)
|
|
222
|
+
if new_pct > last_pct:
|
|
223
|
+
last_pct = new_pct
|
|
224
|
+
phase_callback("Copying", filepath, copied / src_size)
|
|
225
|
+
shutil.copystat(str(filepath), str(target))
|
|
226
|
+
# MRXS: also copy companion data directory (slide/ next to slide.mrxs)
|
|
227
|
+
companion_dir = filepath.parent / filepath.stem
|
|
228
|
+
if companion_dir.is_dir():
|
|
229
|
+
target_companion = target.parent / target.stem
|
|
230
|
+
if not target_companion.exists():
|
|
231
|
+
shutil.copytree(str(companion_dir), str(target_companion))
|
|
232
|
+
finally:
|
|
233
|
+
if io_semaphore:
|
|
234
|
+
io_semaphore.release()
|
|
205
235
|
|
|
206
236
|
# Pre-hash tile data for integrity verification
|
|
207
237
|
pre_hashes = {}
|
|
208
238
|
if verify_integrity and not dry_run:
|
|
209
|
-
|
|
210
|
-
|
|
239
|
+
if io_semaphore:
|
|
240
|
+
io_semaphore.acquire()
|
|
241
|
+
try:
|
|
242
|
+
if phase_callback:
|
|
243
|
+
phase_callback("Hashing tiles", filepath)
|
|
244
|
+
from pathsafe.tiff import compute_image_hashes
|
|
245
|
+
pre_hashes = compute_image_hashes(target)
|
|
246
|
+
finally:
|
|
247
|
+
if io_semaphore:
|
|
248
|
+
io_semaphore.release()
|
|
211
249
|
|
|
212
250
|
# Anonymize
|
|
251
|
+
if phase_callback:
|
|
252
|
+
phase_callback("Anonymizing", filepath)
|
|
213
253
|
try:
|
|
214
254
|
findings = handler.anonymize(target)
|
|
215
255
|
except Exception as e:
|
|
@@ -232,11 +272,21 @@ def anonymize_file(
|
|
|
232
272
|
# Verify image tile data integrity
|
|
233
273
|
integrity_result = None
|
|
234
274
|
if verify_integrity and not dry_run:
|
|
235
|
-
|
|
275
|
+
if io_semaphore:
|
|
276
|
+
io_semaphore.acquire()
|
|
277
|
+
try:
|
|
278
|
+
if phase_callback:
|
|
279
|
+
phase_callback("Verifying integrity", filepath)
|
|
280
|
+
integrity_result = _verify_image_integrity(target, pre_hashes)
|
|
281
|
+
finally:
|
|
282
|
+
if io_semaphore:
|
|
283
|
+
io_semaphore.release()
|
|
236
284
|
|
|
237
285
|
# Verify -- always re-scan when verify is enabled (not just when findings > 0)
|
|
238
286
|
verified = False
|
|
239
287
|
if verify:
|
|
288
|
+
if phase_callback:
|
|
289
|
+
phase_callback("Verifying clean", filepath)
|
|
240
290
|
from pathsafe.verify import verify_file
|
|
241
291
|
verify_result = verify_file(target)
|
|
242
292
|
verified = verify_result.is_clean and not verify_result.error
|
|
@@ -245,20 +295,37 @@ def anonymize_file(
|
|
|
245
295
|
from pathsafe.scanner import scan_filename_for_phi
|
|
246
296
|
filename_has_phi = len(scan_filename_for_phi(target)) > 0
|
|
247
297
|
|
|
248
|
-
#
|
|
249
|
-
# so that opening the file doesn't update st_atime after reset)
|
|
298
|
+
# Finalize: compute SHA-256 (only when requested) and reset timestamps
|
|
250
299
|
file_sha256 = None
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
300
|
+
if compute_checksum:
|
|
301
|
+
if io_semaphore:
|
|
302
|
+
io_semaphore.acquire()
|
|
303
|
+
try:
|
|
304
|
+
if phase_callback:
|
|
305
|
+
phase_callback("Finalizing", filepath)
|
|
306
|
+
try:
|
|
307
|
+
target_size = os.path.getsize(str(target))
|
|
308
|
+
h = hashlib.sha256()
|
|
309
|
+
hashed = 0
|
|
310
|
+
last_pct = -1
|
|
311
|
+
with open(str(target), 'rb') as fh:
|
|
312
|
+
while True:
|
|
313
|
+
chunk = fh.read(65536)
|
|
314
|
+
if not chunk:
|
|
315
|
+
break
|
|
316
|
+
h.update(chunk)
|
|
317
|
+
hashed += len(chunk)
|
|
318
|
+
if phase_callback and target_size > 0:
|
|
319
|
+
new_pct = int(hashed * 100 / target_size)
|
|
320
|
+
if new_pct > last_pct:
|
|
321
|
+
last_pct = new_pct
|
|
322
|
+
phase_callback("Finalizing", filepath, hashed / target_size)
|
|
323
|
+
file_sha256 = h.hexdigest()
|
|
324
|
+
except (OSError, FileNotFoundError):
|
|
325
|
+
pass
|
|
326
|
+
finally:
|
|
327
|
+
if io_semaphore:
|
|
328
|
+
io_semaphore.release()
|
|
262
329
|
|
|
263
330
|
# Reset filesystem timestamps to epoch (removes temporal PHI)
|
|
264
331
|
if reset_timestamps:
|
|
@@ -285,7 +352,8 @@ def anonymize_file(
|
|
|
285
352
|
elapsed = (time.monotonic() - t0) * 1000
|
|
286
353
|
return AnonymizationResult(
|
|
287
354
|
source_path=filepath, output_path=target, mode=mode,
|
|
288
|
-
findings_cleared=len(findings),
|
|
355
|
+
findings_cleared=len(findings), findings=findings,
|
|
356
|
+
verified=verified,
|
|
289
357
|
anonymization_time_ms=elapsed,
|
|
290
358
|
image_integrity_verified=integrity_result,
|
|
291
359
|
filename_has_phi=filename_has_phi,
|
|
@@ -335,7 +403,7 @@ def collect_wsi_files(path: Path, format_filter: Optional[str] = None) -> List[P
|
|
|
335
403
|
def anonymize_batch(
|
|
336
404
|
input_path: Path,
|
|
337
405
|
output_dir: Optional[Path] = None,
|
|
338
|
-
verify: bool =
|
|
406
|
+
verify: bool = False,
|
|
339
407
|
dry_run: bool = False,
|
|
340
408
|
format_filter: Optional[str] = None,
|
|
341
409
|
progress_callback: Optional[Callable] = None,
|
|
@@ -344,6 +412,8 @@ def anonymize_batch(
|
|
|
344
412
|
verify_integrity: bool = False,
|
|
345
413
|
stop_check: Optional[Callable] = None,
|
|
346
414
|
file_list: Optional[List[Path]] = None,
|
|
415
|
+
phase_callback: Optional[Callable] = None,
|
|
416
|
+
compute_checksum: bool = False,
|
|
347
417
|
) -> BatchResult:
|
|
348
418
|
"""Anonymize a batch of WSI files.
|
|
349
419
|
|
|
@@ -359,6 +429,7 @@ def anonymize_batch(
|
|
|
359
429
|
verify_integrity: If True, verify image tile data integrity via SHA-256.
|
|
360
430
|
stop_check: Optional callable returning True to abort immediately.
|
|
361
431
|
file_list: If provided, use these files instead of collecting from input_path.
|
|
432
|
+
phase_callback: Called with (phase_name, filepath) at each processing phase.
|
|
362
433
|
|
|
363
434
|
Returns:
|
|
364
435
|
BatchResult with summary statistics.
|
|
@@ -390,11 +461,13 @@ def anonymize_batch(
|
|
|
390
461
|
if workers > 1 and total > 1:
|
|
391
462
|
results = _batch_parallel(file_pairs, verify, dry_run, workers,
|
|
392
463
|
progress_callback, batch, reset_timestamps,
|
|
393
|
-
verify_integrity, stop_check
|
|
464
|
+
verify_integrity, stop_check,
|
|
465
|
+
phase_callback, compute_checksum)
|
|
394
466
|
else:
|
|
395
467
|
results = _batch_sequential(file_pairs, verify, dry_run,
|
|
396
468
|
progress_callback, batch, reset_timestamps,
|
|
397
|
-
verify_integrity, stop_check
|
|
469
|
+
verify_integrity, stop_check,
|
|
470
|
+
phase_callback, compute_checksum)
|
|
398
471
|
|
|
399
472
|
batch.results = results
|
|
400
473
|
batch.total_time_seconds = time.monotonic() - t0
|
|
@@ -410,6 +483,8 @@ def _batch_sequential(
|
|
|
410
483
|
reset_timestamps: bool = False,
|
|
411
484
|
verify_integrity: bool = False,
|
|
412
485
|
stop_check: Optional[Callable] = None,
|
|
486
|
+
phase_callback: Optional[Callable] = None,
|
|
487
|
+
compute_checksum: bool = False,
|
|
413
488
|
) -> List[AnonymizationResult]:
|
|
414
489
|
"""Process files sequentially."""
|
|
415
490
|
results = []
|
|
@@ -422,7 +497,9 @@ def _batch_sequential(
|
|
|
422
497
|
result = anonymize_file(filepath, output_path=out, verify=verify,
|
|
423
498
|
dry_run=dry_run,
|
|
424
499
|
reset_timestamps=reset_timestamps,
|
|
425
|
-
verify_integrity=verify_integrity
|
|
500
|
+
verify_integrity=verify_integrity,
|
|
501
|
+
phase_callback=phase_callback,
|
|
502
|
+
compute_checksum=compute_checksum)
|
|
426
503
|
except Exception as e:
|
|
427
504
|
result = AnonymizationResult(
|
|
428
505
|
source_path=filepath,
|
|
@@ -450,6 +527,8 @@ def _batch_parallel(
|
|
|
450
527
|
reset_timestamps: bool = False,
|
|
451
528
|
verify_integrity: bool = False,
|
|
452
529
|
stop_check: Optional[Callable] = None,
|
|
530
|
+
phase_callback: Optional[Callable] = None,
|
|
531
|
+
compute_checksum: bool = False,
|
|
453
532
|
) -> List[AnonymizationResult]:
|
|
454
533
|
"""Process files in parallel using a thread pool.
|
|
455
534
|
|
|
@@ -457,6 +536,9 @@ def _batch_parallel(
|
|
|
457
536
|
submission order for deterministic output.
|
|
458
537
|
"""
|
|
459
538
|
total = len(file_pairs)
|
|
539
|
+
workers = min(workers, total) # no point creating more threads than files
|
|
540
|
+
# Cap concurrent I/O to 1 to prevent disk thrashing (single disk)
|
|
541
|
+
io_semaphore = threading.Semaphore(1)
|
|
460
542
|
# Pre-allocate results list to maintain order
|
|
461
543
|
results = [None] * total
|
|
462
544
|
lock = threading.Lock()
|
|
@@ -467,7 +549,10 @@ def _batch_parallel(
|
|
|
467
549
|
return index, anonymize_file(filepath, output_path=out,
|
|
468
550
|
verify=verify, dry_run=dry_run,
|
|
469
551
|
reset_timestamps=reset_timestamps,
|
|
470
|
-
verify_integrity=verify_integrity
|
|
552
|
+
verify_integrity=verify_integrity,
|
|
553
|
+
phase_callback=phase_callback,
|
|
554
|
+
io_semaphore=io_semaphore,
|
|
555
|
+
compute_checksum=compute_checksum)
|
|
471
556
|
except Exception as e:
|
|
472
557
|
return index, AnonymizationResult(
|
|
473
558
|
source_path=filepath,
|