pathsafe 1.0.2__tar.gz → 1.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. {pathsafe-1.0.2 → pathsafe-1.0.4}/PKG-INFO +78 -25
  2. {pathsafe-1.0.2 → pathsafe-1.0.4}/README.md +77 -24
  3. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/__init__.py +1 -1
  4. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/anonymizer.py +116 -31
  5. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/cli.py +10 -10
  6. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/gui/window.py +13 -3
  7. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/gui/workers.py +46 -25
  8. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/models.py +1 -0
  9. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/report.py +68 -5
  10. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe.egg-info/PKG-INFO +78 -25
  11. {pathsafe-1.0.2 → pathsafe-1.0.4}/pyproject.toml +1 -1
  12. {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_cli_integration.py +2 -1
  13. {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_integration.py +2 -1
  14. {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_report.py +6 -1
  15. {pathsafe-1.0.2 → pathsafe-1.0.4}/LICENSE +0 -0
  16. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/converter.py +0 -0
  17. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/formats/__init__.py +0 -0
  18. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/formats/base.py +0 -0
  19. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/formats/bif.py +0 -0
  20. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/formats/dicom.py +0 -0
  21. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/formats/generic_tiff.py +0 -0
  22. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/formats/mrxs.py +0 -0
  23. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/formats/ndpi.py +0 -0
  24. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/formats/scn.py +0 -0
  25. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/formats/svs.py +0 -0
  26. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/formats/tiff_base.py +0 -0
  27. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/gui/__init__.py +0 -0
  28. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/gui/themes.py +0 -0
  29. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/gui/widgets.py +0 -0
  30. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/gui_qt.py +0 -0
  31. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/log.py +0 -0
  32. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/openslide_utils.py +0 -0
  33. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/scanner.py +0 -0
  34. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/tiff/__init__.py +0 -0
  35. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/tiff/blanking.py +0 -0
  36. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/tiff/hashing.py +0 -0
  37. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/tiff/parser.py +0 -0
  38. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/tiff/sub_ifd.py +0 -0
  39. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe/verify.py +0 -0
  40. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe.egg-info/SOURCES.txt +0 -0
  41. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe.egg-info/dependency_links.txt +0 -0
  42. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe.egg-info/entry_points.txt +0 -0
  43. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe.egg-info/requires.txt +0 -0
  44. {pathsafe-1.0.2 → pathsafe-1.0.4}/pathsafe.egg-info/top_level.txt +0 -0
  45. {pathsafe-1.0.2 → pathsafe-1.0.4}/setup.cfg +0 -0
  46. {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_adversarial.py +0 -0
  47. {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_anonymizer.py +0 -0
  48. {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_bif.py +0 -0
  49. {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_cli.py +0 -0
  50. {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_converter.py +0 -0
  51. {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_dicom.py +0 -0
  52. {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_exif_gps.py +0 -0
  53. {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_generic_tiff.py +0 -0
  54. {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_log.py +0 -0
  55. {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_mrxs.py +0 -0
  56. {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_ndpi.py +0 -0
  57. {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_pattern_config.py +0 -0
  58. {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_phi_bypass.py +0 -0
  59. {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_preflight.py +0 -0
  60. {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_roundtrip.py +0 -0
  61. {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_scanner.py +0 -0
  62. {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_scanner_extended.py +0 -0
  63. {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_scn.py +0 -0
  64. {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_stress_concurrency.py +0 -0
  65. {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_stress_dicom.py +0 -0
  66. {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_stress_filenames.py +0 -0
  67. {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_stress_ifd_chains.py +0 -0
  68. {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_stress_rerun.py +0 -0
  69. {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_stress_strip_bounds.py +0 -0
  70. {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_stress_tag_order.py +0 -0
  71. {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_svs.py +0 -0
  72. {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_tiff.py +0 -0
  73. {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_tiff_extended.py +0 -0
  74. {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_unlink_ifd.py +0 -0
  75. {pathsafe-1.0.2 → pathsafe-1.0.4}/tests/test_verify.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pathsafe
3
- Version: 1.0.2
3
+ Version: 1.0.4
4
4
  Summary: Production-tested WSI anonymizer for pathology slide files
5
5
  Author: PathSafe Contributors
6
6
  License-Expression: Apache-2.0
@@ -41,11 +41,17 @@ Dynamic: license-file
41
41
 
42
42
  # PathSafe
43
43
 
44
+ <p align="center">
45
+ <img src="pathsafe/assets/icon.png" alt="PathSafe icon" width="180">
46
+ </p>
47
+
44
48
  **Remove patient information from pathology slide files... safely, automatically, and verifiably.**
45
49
 
46
50
  When pathology scanners create digital slide files, they often embed hidden patient data inside the file: accession numbers, scan dates, operator names, and even photographs of the slide label. This information is invisible when viewing the slide image, but anyone with the right tools can extract it. PathSafe finds and removes all of this hidden data so your slides are safe to share for research or education.
47
51
 
48
- PathSafe works with all major scanner brands, can process thousands of files at once, and double-checks its own work to make sure nothing was missed.
52
+ PathSafe works with all major scanner brands, can process thousands of files at once, and can double-check its own work to make sure nothing was missed.
53
+
54
+ > **v1.0.4 released** — Faster default runs (verify and checksum now opt-in), real-time per-phase progress in the GUI, I/O concurrency control for large batches, and a new SHA-256 checksum option.
49
55
 
50
56
  ---
51
57
 
@@ -57,7 +63,7 @@ PathSafe works with all major scanner brands, can process thousands of files at
57
63
  | **Works with all major scanners** | Hamamatsu (NDPI), Aperio (SVS), 3DHISTECH (MRXS), Roche/Ventana (BIF), Leica (SCN), DICOM, and other TIFF-based files |
58
64
  | **Erases label photos** | Many scanners take a photo of the physical slide label (which may show patient names) and hide it inside the file. PathSafe erases these photos |
59
65
  | **Keeps your originals safe** | PathSafe creates cleaned copies in a separate folder, so your original files are never touched |
60
- | **Double-checks everything** | After cleaning, PathSafe re-scans every file to confirm all patient data was actually removed |
66
+ | **Double-checks everything** | Use Scan or Verify on your output folder to confirm all patient data was removed |
61
67
  | **Creates compliance reports** | Generates PDF reports and certificates documenting exactly what was found, what was removed, and proof that each file is clean |
62
68
  | **Easy to use** | A visual interface guides you through four simple steps with no typing commands required |
63
69
  | **Handles large batches** | Process hundreds or thousands of slides at once, with parallel processing to speed things up |
@@ -76,6 +82,18 @@ Download the installer for your platform. It creates a desktop shortcut and adds
76
82
  | **macOS** | [pathsafe-gui-macos.dmg](https://github.com/DrSoma/PathSafe/releases/latest/download/pathsafe-gui-macos.dmg) |
77
83
  | **Linux** | [pathsafe-gui-linux.AppImage](https://github.com/DrSoma/PathSafe/releases/latest/download/pathsafe-gui-linux.AppImage) |
78
84
 
85
+ **General note**: Code-signing certificates for Windows and macOS are in the process of being added. Until then, your OS may show a first-run warning.
86
+
87
+ **Windows note**: Windows may show a "Windows protected your PC" warning the first time. Click "More info" then "Run anyway".
88
+
89
+ **macOS note**: After downloading, drag into your applications folder and input the following into the terminal:
90
+
91
+ **sudo xattr -rd com.apple.quarantine "/Applications/PathSafe.app"**
92
+
93
+ **open "/Applications/PathSafe.app"**
94
+
95
+ **Linux note**: You may need to right-click > properties, and choose "Open/execute as program" the first time, or run `chmod +x` on the downloaded file.
96
+
79
97
  ### Option 2: Standalone executable (no installation needed)
80
98
 
81
99
  A single portable file you can run from anywhere, including USB drives.
@@ -86,38 +104,55 @@ A single portable file you can run from anywhere, including USB drives.
86
104
  | **macOS** | [pathsafe-gui-macos.dmg](https://github.com/DrSoma/PathSafe/releases/latest/download/pathsafe-gui-macos.dmg) |
87
105
  | **Linux** | [pathsafe-gui-linux.AppImage](https://github.com/DrSoma/PathSafe/releases/latest/download/pathsafe-gui-linux.AppImage) |
88
106
 
89
- **Windows note**: Windows may show a "Windows protected your PC" warning the first time. Click "More info" then "Run anyway".
107
+ ### Alternative: Install with Python
90
108
 
91
- **macOS note**: After downloading, drag into your applications folder and input the following into the terminal:
109
+ For users who have Python installed (version 3.9 or newer):
110
+
111
+ ```
112
+ pip install pathsafe[gui]
113
+ ```
92
114
 
93
- sudo xattr -rd com.apple.quarantine "/Applications/PathSafe.app"
115
+ Then launch with `pathsafe gui` or use `pathsafe` for the command line.
94
116
 
95
- open "/Applications/PathSafe.app"
117
+ ### Uninstallation (Windows, macOS, Linux)
96
118
 
97
- **Linux note**: You may need to right-click > properties, and choose "Open/execute as program" the first time, or run `chmod +x` on the downloaded file.
119
+ Use the steps below based on how PathSafe was installed.
98
120
 
99
- ### Alternative: Install with Python
121
+ #### Windows
100
122
 
101
- For users who have Python installed (version 3.9 or newer):
123
+ - **Installed with `PathSafe-Setup.exe`**: Open **Settings > Apps > Installed apps**, find **PathSafe**, then click **Uninstall**.
124
+ - **Standalone `pathsafe-gui-windows.exe`**: Delete the `.exe` file and any shortcuts you created.
125
+ - **Optional settings cleanup**: Remove `HKEY_CURRENT_USER\Software\PathSafe\PathSafe` from the registry.
126
+
127
+ #### macOS
102
128
 
129
+ - **Installed app (`PathSafe.app`)**: Move `/Applications/PathSafe.app` to Trash, then empty Trash.
130
+ - Delete any downloaded `.dmg` file if no longer needed.
131
+ - **Optional settings cleanup**:
132
+
133
+ ```bash
134
+ rm -f ~/Library/Preferences/com.PathSafe.PathSafe.plist
103
135
  ```
104
- pip install pathsafe[gui]
136
+
137
+ #### Linux
138
+
139
+ - **AppImage install**: Delete the `pathsafe-gui-linux.AppImage` file.
140
+ - If you created desktop integration entries manually, delete those launcher/icon files from `~/.local/share/applications` and `~/.local/share/icons` if present.
141
+ - **Optional settings cleanup**:
142
+
143
+ ```bash
144
+ rm -f ~/.config/PathSafe/PathSafe.conf
105
145
  ```
106
146
 
107
- Then launch with `pathsafe gui` or use `pathsafe` for the command line.
147
+ #### Python install (any platform)
108
148
 
109
- <details>
110
- <summary>Optional add-ons (click to expand)</summary>
149
+ If PathSafe was installed with `pip`:
111
150
 
112
- | Install command | What it adds |
113
- |----------------|-------------|
114
- | `pip install pathsafe[gui]` | Graphical interface (recommended) |
115
- | `pip install pathsafe[dicom]` | Support for DICOM WSI files |
116
- | `pip install pathsafe[openslide]` | Enhanced format detection via OpenSlide |
117
- | `pip install pathsafe[convert]` | Format conversion (change file types) |
118
- | `pip install pathsafe[all]` | All of the above |
151
+ ```bash
152
+ python -m pip uninstall pathsafe
153
+ ```
119
154
 
120
- </details>
155
+ If it was installed inside a virtual environment, activate that environment first, or remove the full environment directory.
121
156
 
122
157
  ---
123
158
 
@@ -141,7 +176,7 @@ Pick the output folder where your cleaned copies will go. A default location is
141
176
 
142
177
  ### Step 4: Anonymize
143
178
 
144
- Click **Anonymize**. PathSafe copies your files to the output folder, removes all patient data from the copies, and automatically double-checks that everything was removed. **Your original files are never modified.**
179
+ Click **Anonymize**. PathSafe copies your files to the output folder and removes all patient data from the copies. **Your original files are never modified.**
145
180
 
146
181
  A summary popup tells you exactly what happened after each step.
147
182
 
@@ -155,6 +190,7 @@ A summary popup tells you exactly what happened after each step.
155
190
  | **Use keyboard shortcuts** | Ctrl+O (open files), Ctrl+Shift+O (open folder), Ctrl+S (scan), Ctrl+R (anonymize), Ctrl+E (verify), Ctrl+I (file info), Ctrl+T (convert), Ctrl+L (save log), Esc (stop) |
156
191
  | **Speed up large batches** | Increase the Workers slider (try 2-4) |
157
192
  | **Preview without changing anything** | Check the "Dry run" box |
193
+ | **Generate SHA-256 checksums** | Check the "SHA-256 checksum" box for audit-trail hashes |
158
194
  | **Add your institution name** | Fill in the Institution field (it appears on PDF reports and is remembered) |
159
195
  | **Save your results** | Use Save Log or Export JSON in the Actions menu |
160
196
  | **Convert file formats** | Use the Convert tab to change between NDPI, SVS, TIFF, PNG, and JPEG |
@@ -257,8 +293,8 @@ pathsafe gui Launch the graphical interface
257
293
  | `--certificate FILE` / `-c` | Generate a compliance certificate (JSON + PDF) |
258
294
  | `--institution NAME` / `-i` | Institution name for PDF certificate headers |
259
295
  | `--format FORMAT` | Only process files of a specific format |
260
- | `--no-verify` | Skip the automatic re-scan after anonymization |
261
- | `--no-verify-integrity` | Skip image integrity checking (enabled by default) |
296
+ | `--verify-integrity` | Verify image tile data integrity via SHA-256 checksums (off by default) |
297
+ | `--checksum` | Compute SHA-256 checksum of each output file (off by default) |
262
298
  | `--no-reset-timestamps` | Keep original file timestamps (reset by default) |
263
299
  | `--log FILE` | Save all output to a log file |
264
300
 
@@ -364,6 +400,18 @@ PathSafe implements **Level IV** anonymization as defined by [Bisson et al. (202
364
400
 
365
401
  ---
366
402
 
403
+ ## Independent Verification
404
+
405
+ A standalone verification script is included in the `tools/` directory. This script uses no PathSafe code and independently verifies that anonymized files contain no remaining patient data. It parses the TIFF binary structure from scratch, checks every IFD for label and macro images, scans all string and binary metadata tags, looks for EXIF and GPS sub-IFDs, and runs regex patterns against the raw file bytes.
406
+
407
+ ```bash
408
+ python tools/independent_scanner.py /path/to/anonymized/file.svs
409
+ ```
410
+
411
+ No dependencies required beyond Python 3.9+.
412
+
413
+ ---
414
+
367
415
  ## Dependencies
368
416
 
369
417
  If you use the downloadable installers (`.exe`, `.dmg`, `.AppImage`), you do not need to install Python dependencies manually.
@@ -397,4 +445,9 @@ Optional packages add extra features:
397
445
 
398
446
  Apache 2.0
399
447
 
448
+ Maintained by Fernando Soto, MD.
449
+
450
+
451
+
452
+
400
453
 
@@ -1,10 +1,16 @@
1
1
  # PathSafe
2
2
 
3
+ <p align="center">
4
+ <img src="pathsafe/assets/icon.png" alt="PathSafe icon" width="180">
5
+ </p>
6
+
3
7
  **Remove patient information from pathology slide files... safely, automatically, and verifiably.**
4
8
 
5
9
  When pathology scanners create digital slide files, they often embed hidden patient data inside the file: accession numbers, scan dates, operator names, and even photographs of the slide label. This information is invisible when viewing the slide image, but anyone with the right tools can extract it. PathSafe finds and removes all of this hidden data so your slides are safe to share for research or education.
6
10
 
7
- PathSafe works with all major scanner brands, can process thousands of files at once, and double-checks its own work to make sure nothing was missed.
11
+ PathSafe works with all major scanner brands, can process thousands of files at once, and can double-check its own work to make sure nothing was missed.
12
+
13
+ > **v1.0.4 released** — Faster default runs (verify and checksum now opt-in), real-time per-phase progress in the GUI, I/O concurrency control for large batches, and a new SHA-256 checksum option.
8
14
 
9
15
  ---
10
16
 
@@ -16,7 +22,7 @@ PathSafe works with all major scanner brands, can process thousands of files at
16
22
  | **Works with all major scanners** | Hamamatsu (NDPI), Aperio (SVS), 3DHISTECH (MRXS), Roche/Ventana (BIF), Leica (SCN), DICOM, and other TIFF-based files |
17
23
  | **Erases label photos** | Many scanners take a photo of the physical slide label (which may show patient names) and hide it inside the file. PathSafe erases these photos |
18
24
  | **Keeps your originals safe** | PathSafe creates cleaned copies in a separate folder, so your original files are never touched |
19
- | **Double-checks everything** | After cleaning, PathSafe re-scans every file to confirm all patient data was actually removed |
25
+ | **Double-checks everything** | Use Scan or Verify on your output folder to confirm all patient data was removed |
20
26
  | **Creates compliance reports** | Generates PDF reports and certificates documenting exactly what was found, what was removed, and proof that each file is clean |
21
27
  | **Easy to use** | A visual interface guides you through four simple steps with no typing commands required |
22
28
  | **Handles large batches** | Process hundreds or thousands of slides at once, with parallel processing to speed things up |
@@ -35,6 +41,18 @@ Download the installer for your platform. It creates a desktop shortcut and adds
35
41
  | **macOS** | [pathsafe-gui-macos.dmg](https://github.com/DrSoma/PathSafe/releases/latest/download/pathsafe-gui-macos.dmg) |
36
42
  | **Linux** | [pathsafe-gui-linux.AppImage](https://github.com/DrSoma/PathSafe/releases/latest/download/pathsafe-gui-linux.AppImage) |
37
43
 
44
+ **General note**: Code-signing certificates for Windows and macOS are in the process of being added. Until then, your OS may show a first-run warning.
45
+
46
+ **Windows note**: Windows may show a "Windows protected your PC" warning the first time. Click "More info" then "Run anyway".
47
+
48
+ **macOS note**: After downloading, drag into your applications folder and input the following into the terminal:
49
+
50
+ **sudo xattr -rd com.apple.quarantine "/Applications/PathSafe.app"**
51
+
52
+ **open "/Applications/PathSafe.app"**
53
+
54
+ **Linux note**: You may need to right-click > properties, and choose "Open/execute as program" the first time, or run `chmod +x` on the downloaded file.
55
+
38
56
  ### Option 2: Standalone executable (no installation needed)
39
57
 
40
58
  A single portable file you can run from anywhere, including USB drives.
@@ -45,38 +63,55 @@ A single portable file you can run from anywhere, including USB drives.
45
63
  | **macOS** | [pathsafe-gui-macos.dmg](https://github.com/DrSoma/PathSafe/releases/latest/download/pathsafe-gui-macos.dmg) |
46
64
  | **Linux** | [pathsafe-gui-linux.AppImage](https://github.com/DrSoma/PathSafe/releases/latest/download/pathsafe-gui-linux.AppImage) |
47
65
 
48
- **Windows note**: Windows may show a "Windows protected your PC" warning the first time. Click "More info" then "Run anyway".
66
+ ### Alternative: Install with Python
49
67
 
50
- **macOS note**: After downloading, drag into your applications folder and input the following into the terminal:
68
+ For users who have Python installed (version 3.9 or newer):
69
+
70
+ ```
71
+ pip install pathsafe[gui]
72
+ ```
51
73
 
52
- sudo xattr -rd com.apple.quarantine "/Applications/PathSafe.app"
74
+ Then launch with `pathsafe gui` or use `pathsafe` for the command line.
53
75
 
54
- open "/Applications/PathSafe.app"
76
+ ### Uninstallation (Windows, macOS, Linux)
55
77
 
56
- **Linux note**: You may need to right-click > properties, and choose "Open/execute as program" the first time, or run `chmod +x` on the downloaded file.
78
+ Use the steps below based on how PathSafe was installed.
57
79
 
58
- ### Alternative: Install with Python
80
+ #### Windows
59
81
 
60
- For users who have Python installed (version 3.9 or newer):
82
+ - **Installed with `PathSafe-Setup.exe`**: Open **Settings > Apps > Installed apps**, find **PathSafe**, then click **Uninstall**.
83
+ - **Standalone `pathsafe-gui-windows.exe`**: Delete the `.exe` file and any shortcuts you created.
84
+ - **Optional settings cleanup**: Remove `HKEY_CURRENT_USER\Software\PathSafe\PathSafe` from the registry.
85
+
86
+ #### macOS
61
87
 
88
+ - **Installed app (`PathSafe.app`)**: Move `/Applications/PathSafe.app` to Trash, then empty Trash.
89
+ - Delete any downloaded `.dmg` file if no longer needed.
90
+ - **Optional settings cleanup**:
91
+
92
+ ```bash
93
+ rm -f ~/Library/Preferences/com.PathSafe.PathSafe.plist
62
94
  ```
63
- pip install pathsafe[gui]
95
+
96
+ #### Linux
97
+
98
+ - **AppImage install**: Delete the `pathsafe-gui-linux.AppImage` file.
99
+ - If you created desktop integration entries manually, delete those launcher/icon files from `~/.local/share/applications` and `~/.local/share/icons` if present.
100
+ - **Optional settings cleanup**:
101
+
102
+ ```bash
103
+ rm -f ~/.config/PathSafe/PathSafe.conf
64
104
  ```
65
105
 
66
- Then launch with `pathsafe gui` or use `pathsafe` for the command line.
106
+ #### Python install (any platform)
67
107
 
68
- <details>
69
- <summary>Optional add-ons (click to expand)</summary>
108
+ If PathSafe was installed with `pip`:
70
109
 
71
- | Install command | What it adds |
72
- |----------------|-------------|
73
- | `pip install pathsafe[gui]` | Graphical interface (recommended) |
74
- | `pip install pathsafe[dicom]` | Support for DICOM WSI files |
75
- | `pip install pathsafe[openslide]` | Enhanced format detection via OpenSlide |
76
- | `pip install pathsafe[convert]` | Format conversion (change file types) |
77
- | `pip install pathsafe[all]` | All of the above |
110
+ ```bash
111
+ python -m pip uninstall pathsafe
112
+ ```
78
113
 
79
- </details>
114
+ If it was installed inside a virtual environment, activate that environment first, or remove the full environment directory.
80
115
 
81
116
  ---
82
117
 
@@ -100,7 +135,7 @@ Pick the output folder where your cleaned copies will go. A default location is
100
135
 
101
136
  ### Step 4: Anonymize
102
137
 
103
- Click **Anonymize**. PathSafe copies your files to the output folder, removes all patient data from the copies, and automatically double-checks that everything was removed. **Your original files are never modified.**
138
+ Click **Anonymize**. PathSafe copies your files to the output folder and removes all patient data from the copies. **Your original files are never modified.**
104
139
 
105
140
  A summary popup tells you exactly what happened after each step.
106
141
 
@@ -114,6 +149,7 @@ A summary popup tells you exactly what happened after each step.
114
149
  | **Use keyboard shortcuts** | Ctrl+O (open files), Ctrl+Shift+O (open folder), Ctrl+S (scan), Ctrl+R (anonymize), Ctrl+E (verify), Ctrl+I (file info), Ctrl+T (convert), Ctrl+L (save log), Esc (stop) |
115
150
  | **Speed up large batches** | Increase the Workers slider (try 2-4) |
116
151
  | **Preview without changing anything** | Check the "Dry run" box |
152
+ | **Generate SHA-256 checksums** | Check the "SHA-256 checksum" box for audit-trail hashes |
117
153
  | **Add your institution name** | Fill in the Institution field (it appears on PDF reports and is remembered) |
118
154
  | **Save your results** | Use Save Log or Export JSON in the Actions menu |
119
155
  | **Convert file formats** | Use the Convert tab to change between NDPI, SVS, TIFF, PNG, and JPEG |
@@ -216,8 +252,8 @@ pathsafe gui Launch the graphical interface
216
252
  | `--certificate FILE` / `-c` | Generate a compliance certificate (JSON + PDF) |
217
253
  | `--institution NAME` / `-i` | Institution name for PDF certificate headers |
218
254
  | `--format FORMAT` | Only process files of a specific format |
219
- | `--no-verify` | Skip the automatic re-scan after anonymization |
220
- | `--no-verify-integrity` | Skip image integrity checking (enabled by default) |
255
+ | `--verify-integrity` | Verify image tile data integrity via SHA-256 checksums (off by default) |
256
+ | `--checksum` | Compute SHA-256 checksum of each output file (off by default) |
221
257
  | `--no-reset-timestamps` | Keep original file timestamps (reset by default) |
222
258
  | `--log FILE` | Save all output to a log file |
223
259
 
@@ -323,6 +359,18 @@ PathSafe implements **Level IV** anonymization as defined by [Bisson et al. (202
323
359
 
324
360
  ---
325
361
 
362
+ ## Independent Verification
363
+
364
+ A standalone verification script is included in the `tools/` directory. This script uses no PathSafe code and independently verifies that anonymized files contain no remaining patient data. It parses the TIFF binary structure from scratch, checks every IFD for label and macro images, scans all string and binary metadata tags, looks for EXIF and GPS sub-IFDs, and runs regex patterns against the raw file bytes.
365
+
366
+ ```bash
367
+ python tools/independent_scanner.py /path/to/anonymized/file.svs
368
+ ```
369
+
370
+ No dependencies required beyond Python 3.9+.
371
+
372
+ ---
373
+
326
374
  ## Dependencies
327
375
 
328
376
  If you use the downloadable installers (`.exe`, `.dmg`, `.AppImage`), you do not need to install Python dependencies manually.
@@ -356,4 +404,9 @@ Optional packages add extra features:
356
404
 
357
405
  Apache 2.0
358
406
 
407
+ Maintained by Fernando Soto, MD.
408
+
409
+
410
+
411
+
359
412
 
@@ -1,6 +1,6 @@
1
1
  """PathSafe -- Production-tested WSI anonymizer for pathology slide files."""
2
2
 
3
- __version__ = "1.0.2"
3
+ __version__ = "1.0.4"
4
4
 
5
5
  from pathsafe.models import (
6
6
  AnonymizationResult,
@@ -139,10 +139,13 @@ def _verify_image_integrity(filepath: Path, pre_hashes: dict) -> Optional[bool]:
139
139
  def anonymize_file(
140
140
  filepath: Path,
141
141
  output_path: Optional[Path] = None,
142
- verify: bool = True,
142
+ verify: bool = False,
143
143
  dry_run: bool = False,
144
144
  reset_timestamps: bool = False,
145
145
  verify_integrity: bool = False,
146
+ phase_callback: Optional[Callable] = None,
147
+ io_semaphore: Optional[threading.Semaphore] = None,
148
+ compute_checksum: bool = False,
146
149
  ) -> AnonymizationResult:
147
150
  """Anonymize a single WSI file.
148
151
 
@@ -181,6 +184,8 @@ def anonymize_file(
181
184
 
182
185
  if dry_run:
183
186
  # Just scan, report what would be done
187
+ if phase_callback:
188
+ phase_callback("Scanning", filepath)
184
189
  scan_result = handler.scan(filepath)
185
190
  elapsed = (time.monotonic() - t0) * 1000
186
191
  return AnonymizationResult(
@@ -188,28 +193,63 @@ def anonymize_file(
188
193
  output_path=target,
189
194
  mode=mode,
190
195
  findings_cleared=len(scan_result.findings),
196
+ findings=scan_result.findings,
191
197
  verified=False,
192
198
  anonymization_time_ms=elapsed,
193
199
  )
194
200
 
195
201
  # Copy mode: copy file to output path first
196
202
  if mode == "copy":
197
- target.parent.mkdir(parents=True, exist_ok=True)
198
- shutil.copy2(str(filepath), str(target))
199
- # MRXS: also copy companion data directory (slide/ next to slide.mrxs)
200
- companion_dir = filepath.parent / filepath.stem
201
- if companion_dir.is_dir():
202
- target_companion = target.parent / target.stem
203
- if not target_companion.exists():
204
- shutil.copytree(str(companion_dir), str(target_companion))
203
+ if io_semaphore:
204
+ io_semaphore.acquire()
205
+ try:
206
+ if phase_callback:
207
+ phase_callback("Copying", filepath)
208
+ target.parent.mkdir(parents=True, exist_ok=True)
209
+ # Chunked copy with sub-progress reporting
210
+ src_size = os.path.getsize(str(filepath))
211
+ copied = 0
212
+ last_pct = -1
213
+ with open(str(filepath), 'rb') as fsrc, open(str(target), 'wb') as fdst:
214
+ while True:
215
+ buf = fsrc.read(1_048_576) # 1 MB chunks
216
+ if not buf:
217
+ break
218
+ fdst.write(buf)
219
+ copied += len(buf)
220
+ if phase_callback and src_size > 0:
221
+ new_pct = int(copied * 100 / src_size)
222
+ if new_pct > last_pct:
223
+ last_pct = new_pct
224
+ phase_callback("Copying", filepath, copied / src_size)
225
+ shutil.copystat(str(filepath), str(target))
226
+ # MRXS: also copy companion data directory (slide/ next to slide.mrxs)
227
+ companion_dir = filepath.parent / filepath.stem
228
+ if companion_dir.is_dir():
229
+ target_companion = target.parent / target.stem
230
+ if not target_companion.exists():
231
+ shutil.copytree(str(companion_dir), str(target_companion))
232
+ finally:
233
+ if io_semaphore:
234
+ io_semaphore.release()
205
235
 
206
236
  # Pre-hash tile data for integrity verification
207
237
  pre_hashes = {}
208
238
  if verify_integrity and not dry_run:
209
- from pathsafe.tiff import compute_image_hashes
210
- pre_hashes = compute_image_hashes(target)
239
+ if io_semaphore:
240
+ io_semaphore.acquire()
241
+ try:
242
+ if phase_callback:
243
+ phase_callback("Hashing tiles", filepath)
244
+ from pathsafe.tiff import compute_image_hashes
245
+ pre_hashes = compute_image_hashes(target)
246
+ finally:
247
+ if io_semaphore:
248
+ io_semaphore.release()
211
249
 
212
250
  # Anonymize
251
+ if phase_callback:
252
+ phase_callback("Anonymizing", filepath)
213
253
  try:
214
254
  findings = handler.anonymize(target)
215
255
  except Exception as e:
@@ -232,11 +272,21 @@ def anonymize_file(
232
272
  # Verify image tile data integrity
233
273
  integrity_result = None
234
274
  if verify_integrity and not dry_run:
235
- integrity_result = _verify_image_integrity(target, pre_hashes)
275
+ if io_semaphore:
276
+ io_semaphore.acquire()
277
+ try:
278
+ if phase_callback:
279
+ phase_callback("Verifying integrity", filepath)
280
+ integrity_result = _verify_image_integrity(target, pre_hashes)
281
+ finally:
282
+ if io_semaphore:
283
+ io_semaphore.release()
236
284
 
237
285
  # Verify -- always re-scan when verify is enabled (not just when findings > 0)
238
286
  verified = False
239
287
  if verify:
288
+ if phase_callback:
289
+ phase_callback("Verifying clean", filepath)
240
290
  from pathsafe.verify import verify_file
241
291
  verify_result = verify_file(target)
242
292
  verified = verify_result.is_clean and not verify_result.error
@@ -245,20 +295,37 @@ def anonymize_file(
245
295
  from pathsafe.scanner import scan_filename_for_phi
246
296
  filename_has_phi = len(scan_filename_for_phi(target)) > 0
247
297
 
248
- # Compute SHA-256 of the final output file (before timestamp reset
249
- # so that opening the file doesn't update st_atime after reset)
298
+ # Finalize: compute SHA-256 (only when requested) and reset timestamps
250
299
  file_sha256 = None
251
- try:
252
- h = hashlib.sha256()
253
- with open(str(target), 'rb') as fh:
254
- while True:
255
- chunk = fh.read(65536)
256
- if not chunk:
257
- break
258
- h.update(chunk)
259
- file_sha256 = h.hexdigest()
260
- except (OSError, FileNotFoundError):
261
- pass
300
+ if compute_checksum:
301
+ if io_semaphore:
302
+ io_semaphore.acquire()
303
+ try:
304
+ if phase_callback:
305
+ phase_callback("Finalizing", filepath)
306
+ try:
307
+ target_size = os.path.getsize(str(target))
308
+ h = hashlib.sha256()
309
+ hashed = 0
310
+ last_pct = -1
311
+ with open(str(target), 'rb') as fh:
312
+ while True:
313
+ chunk = fh.read(65536)
314
+ if not chunk:
315
+ break
316
+ h.update(chunk)
317
+ hashed += len(chunk)
318
+ if phase_callback and target_size > 0:
319
+ new_pct = int(hashed * 100 / target_size)
320
+ if new_pct > last_pct:
321
+ last_pct = new_pct
322
+ phase_callback("Finalizing", filepath, hashed / target_size)
323
+ file_sha256 = h.hexdigest()
324
+ except (OSError, FileNotFoundError):
325
+ pass
326
+ finally:
327
+ if io_semaphore:
328
+ io_semaphore.release()
262
329
 
263
330
  # Reset filesystem timestamps to epoch (removes temporal PHI)
264
331
  if reset_timestamps:
@@ -285,7 +352,8 @@ def anonymize_file(
285
352
  elapsed = (time.monotonic() - t0) * 1000
286
353
  return AnonymizationResult(
287
354
  source_path=filepath, output_path=target, mode=mode,
288
- findings_cleared=len(findings), verified=verified,
355
+ findings_cleared=len(findings), findings=findings,
356
+ verified=verified,
289
357
  anonymization_time_ms=elapsed,
290
358
  image_integrity_verified=integrity_result,
291
359
  filename_has_phi=filename_has_phi,
@@ -335,7 +403,7 @@ def collect_wsi_files(path: Path, format_filter: Optional[str] = None) -> List[P
335
403
  def anonymize_batch(
336
404
  input_path: Path,
337
405
  output_dir: Optional[Path] = None,
338
- verify: bool = True,
406
+ verify: bool = False,
339
407
  dry_run: bool = False,
340
408
  format_filter: Optional[str] = None,
341
409
  progress_callback: Optional[Callable] = None,
@@ -344,6 +412,8 @@ def anonymize_batch(
344
412
  verify_integrity: bool = False,
345
413
  stop_check: Optional[Callable] = None,
346
414
  file_list: Optional[List[Path]] = None,
415
+ phase_callback: Optional[Callable] = None,
416
+ compute_checksum: bool = False,
347
417
  ) -> BatchResult:
348
418
  """Anonymize a batch of WSI files.
349
419
 
@@ -359,6 +429,7 @@ def anonymize_batch(
359
429
  verify_integrity: If True, verify image tile data integrity via SHA-256.
360
430
  stop_check: Optional callable returning True to abort immediately.
361
431
  file_list: If provided, use these files instead of collecting from input_path.
432
+ phase_callback: Called with (phase_name, filepath) at each processing phase.
362
433
 
363
434
  Returns:
364
435
  BatchResult with summary statistics.
@@ -390,11 +461,13 @@ def anonymize_batch(
390
461
  if workers > 1 and total > 1:
391
462
  results = _batch_parallel(file_pairs, verify, dry_run, workers,
392
463
  progress_callback, batch, reset_timestamps,
393
- verify_integrity, stop_check)
464
+ verify_integrity, stop_check,
465
+ phase_callback, compute_checksum)
394
466
  else:
395
467
  results = _batch_sequential(file_pairs, verify, dry_run,
396
468
  progress_callback, batch, reset_timestamps,
397
- verify_integrity, stop_check)
469
+ verify_integrity, stop_check,
470
+ phase_callback, compute_checksum)
398
471
 
399
472
  batch.results = results
400
473
  batch.total_time_seconds = time.monotonic() - t0
@@ -410,6 +483,8 @@ def _batch_sequential(
410
483
  reset_timestamps: bool = False,
411
484
  verify_integrity: bool = False,
412
485
  stop_check: Optional[Callable] = None,
486
+ phase_callback: Optional[Callable] = None,
487
+ compute_checksum: bool = False,
413
488
  ) -> List[AnonymizationResult]:
414
489
  """Process files sequentially."""
415
490
  results = []
@@ -422,7 +497,9 @@ def _batch_sequential(
422
497
  result = anonymize_file(filepath, output_path=out, verify=verify,
423
498
  dry_run=dry_run,
424
499
  reset_timestamps=reset_timestamps,
425
- verify_integrity=verify_integrity)
500
+ verify_integrity=verify_integrity,
501
+ phase_callback=phase_callback,
502
+ compute_checksum=compute_checksum)
426
503
  except Exception as e:
427
504
  result = AnonymizationResult(
428
505
  source_path=filepath,
@@ -450,6 +527,8 @@ def _batch_parallel(
450
527
  reset_timestamps: bool = False,
451
528
  verify_integrity: bool = False,
452
529
  stop_check: Optional[Callable] = None,
530
+ phase_callback: Optional[Callable] = None,
531
+ compute_checksum: bool = False,
453
532
  ) -> List[AnonymizationResult]:
454
533
  """Process files in parallel using a thread pool.
455
534
 
@@ -457,6 +536,9 @@ def _batch_parallel(
457
536
  submission order for deterministic output.
458
537
  """
459
538
  total = len(file_pairs)
539
+ workers = min(workers, total) # no point creating more threads than files
540
+ # Cap concurrent I/O to 1 to prevent disk thrashing (single disk)
541
+ io_semaphore = threading.Semaphore(1)
460
542
  # Pre-allocate results list to maintain order
461
543
  results = [None] * total
462
544
  lock = threading.Lock()
@@ -467,7 +549,10 @@ def _batch_parallel(
467
549
  return index, anonymize_file(filepath, output_path=out,
468
550
  verify=verify, dry_run=dry_run,
469
551
  reset_timestamps=reset_timestamps,
470
- verify_integrity=verify_integrity)
552
+ verify_integrity=verify_integrity,
553
+ phase_callback=phase_callback,
554
+ io_semaphore=io_semaphore,
555
+ compute_checksum=compute_checksum)
471
556
  except Exception as e:
472
557
  return index, AnonymizationResult(
473
558
  source_path=filepath,