pathsafe 2.0.0__tar.gz → 2.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pathsafe-2.0.0 → pathsafe-2.0.2}/PKG-INFO +2 -2
- {pathsafe-2.0.0 → pathsafe-2.0.2}/README.md +1 -1
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/__init__.py +1 -1
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/converter.py +4 -4
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/formats/bif.py +16 -2
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/formats/mrxs.py +8 -1
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/gui/panels/deidentify_panel.py +4 -1
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe.egg-info/PKG-INFO +2 -2
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe.egg-info/SOURCES.txt +1 -0
- pathsafe-2.0.2/tests/test_bisson_conformance.py +480 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/LICENSE +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/assets/icon.icns +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/assets/icon.ico +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/assets/icon.png +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/classifier.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/cli/__init__.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/cli/__main__.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/cli/_common.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/cli/_gui_cmd.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/cli/convert.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/cli/deidentify.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/cli/info.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/cli/pipeline.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/cli/scan.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/cli/verify.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/data/stain_vocabulary.json +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/deidentifier.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/formats/__init__.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/formats/base.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/formats/dicom.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/formats/generic_tiff.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/formats/ndpi.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/formats/scn.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/formats/svs.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/formats/tiff_base.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/gui/__init__.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/gui/dialogs.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/gui/menus.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/gui/panels/__init__.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/gui/panels/convert_panel.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/gui/pipeline_tab.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/gui/themes.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/gui/toast.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/gui/widgets.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/gui/window.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/gui/workers.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/gui_qt.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/log.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/lookup.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/models.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/openslide_utils.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/pipeline.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/pipeline_runner.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/report.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/scanner.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/serializer.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/tiff/__init__.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/tiff/blanking.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/tiff/hashing.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/tiff/parser.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/tiff/sub_ifd.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/transfer.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/updater.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/utils.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe/verify.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe.egg-info/dependency_links.txt +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe.egg-info/entry_points.txt +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe.egg-info/requires.txt +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pathsafe.egg-info/top_level.txt +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/pyproject.toml +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/setup.cfg +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_adversarial.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_bif.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_classifier.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_cli.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_cli_integration.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_converter.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_deidentifier.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_dicom.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_exif_gps.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_file_filter.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_generic_tiff.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_integration.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_log.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_lookup.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_mrxs.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_ndpi.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_pattern_config.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_phi_bypass.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_phi_cleanup.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_pipeline_runner.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_preflight.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_report.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_roundtrip.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_scanner.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_scanner_extended.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_scn.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_serializer.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_staging.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_stress_concurrency.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_stress_dicom.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_stress_filenames.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_stress_ifd_chains.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_stress_rerun.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_stress_strip_bounds.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_stress_tag_order.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_sub_ifd.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_svs.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_symlinks.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_tiff.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_tiff_extended.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_transfer.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_unlink_ifd.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_utils.py +0 -0
- {pathsafe-2.0.0 → pathsafe-2.0.2}/tests/test_verify.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pathsafe
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.2
|
|
4
4
|
Summary: WSI de-identifier for pathology slide files
|
|
5
5
|
Author: PathSafe Contributors
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -407,7 +407,7 @@ For a detailed technical breakdown of exactly which fields are cleaned in each f
|
|
|
407
407
|
|
|
408
408
|
There are other WSI de-identification tools out there. What sets PathSafe apart is that it's built for the people who actually need to use it: pathologists, lab techs, and research coordinators who shouldn't have to learn command-line tools to de-identify slides. PathSafe has a full graphical interface that walks you through the process in four steps.
|
|
409
409
|
|
|
410
|
-
|
|
410
|
+
PathSafe conforms to **Level IV** of the anonymization hierarchy defined in [Bisson et al. (2023)](https://doi.org/10.1177/20552076231171475) for the four scanner formats the paper covers and PathSafe supports: Aperio/Leica SVS, Hamamatsu NDPI, 3DHistech MIRAX, and Roche/Ventana BIF. Conformance is verified by 26 automated tests in [`tests/test_bisson_conformance.py`](tests/test_bisson_conformance.py); the per-criterion mapping (paper criterion -> code -> test) lives in [`docs/BISSON_CONFORMANCE.md`](docs/BISSON_CONFORMANCE.md). Philips iSyntax is the one paper-listed format PathSafe does not yet support and is documented as a known gap. DICOM, Leica SCN, and generic TIFF support are PathSafe extensions outside the paper's scope.
|
|
411
411
|
|
|
412
412
|
---
|
|
413
413
|
|
|
@@ -356,7 +356,7 @@ For a detailed technical breakdown of exactly which fields are cleaned in each f
|
|
|
356
356
|
|
|
357
357
|
There are other WSI de-identification tools out there. What sets PathSafe apart is that it's built for the people who actually need to use it: pathologists, lab techs, and research coordinators who shouldn't have to learn command-line tools to de-identify slides. PathSafe has a full graphical interface that walks you through the process in four steps.
|
|
358
358
|
|
|
359
|
-
|
|
359
|
+
PathSafe conforms to **Level IV** of the anonymization hierarchy defined in [Bisson et al. (2023)](https://doi.org/10.1177/20552076231171475) for the four scanner formats the paper covers and PathSafe supports: Aperio/Leica SVS, Hamamatsu NDPI, 3DHistech MIRAX, and Roche/Ventana BIF. Conformance is verified by 26 automated tests in [`tests/test_bisson_conformance.py`](tests/test_bisson_conformance.py); the per-criterion mapping (paper criterion -> code -> test) lives in [`docs/BISSON_CONFORMANCE.md`](docs/BISSON_CONFORMANCE.md). Philips iSyntax is the one paper-listed format PathSafe does not yet support and is documented as a known gap. DICOM, Leica SCN, and generic TIFF support are PathSafe extensions outside the paper's scope.
|
|
360
360
|
|
|
361
361
|
---
|
|
362
362
|
|
|
@@ -112,7 +112,7 @@ def convert_file(
|
|
|
112
112
|
"""
|
|
113
113
|
source = Path(source)
|
|
114
114
|
output_path = Path(output_path)
|
|
115
|
-
t0 = time.
|
|
115
|
+
t0 = time.perf_counter()
|
|
116
116
|
|
|
117
117
|
openslide = _require_openslide()
|
|
118
118
|
|
|
@@ -176,7 +176,7 @@ def convert_file(
|
|
|
176
176
|
if reset_timestamps and result.error is None and output_path.exists():
|
|
177
177
|
os.utime(output_path, (0, 0))
|
|
178
178
|
|
|
179
|
-
result.conversion_time_ms = (time.
|
|
179
|
+
result.conversion_time_ms = (time.perf_counter() - t0) * 1000
|
|
180
180
|
return result
|
|
181
181
|
|
|
182
182
|
|
|
@@ -460,7 +460,7 @@ def convert_batch(
|
|
|
460
460
|
|
|
461
461
|
input_path = Path(input_path)
|
|
462
462
|
output_dir = Path(output_dir)
|
|
463
|
-
t0 = time.
|
|
463
|
+
t0 = time.perf_counter()
|
|
464
464
|
|
|
465
465
|
files = collect_wsi_files(input_path, format_filter)
|
|
466
466
|
total = len(files)
|
|
@@ -511,7 +511,7 @@ def convert_batch(
|
|
|
511
511
|
)
|
|
512
512
|
|
|
513
513
|
batch.results = results
|
|
514
|
-
batch.total_time_seconds = time.
|
|
514
|
+
batch.total_time_seconds = time.perf_counter() - t0
|
|
515
515
|
return batch
|
|
516
516
|
|
|
517
517
|
|
|
@@ -31,8 +31,15 @@ from pathsafe.tiff import (
|
|
|
31
31
|
from pathsafe.utils import _sanitize_error
|
|
32
32
|
|
|
33
33
|
|
|
34
|
-
# XMP attributes in <iScan> that contain PHI
|
|
34
|
+
# XMP attributes in <iScan> that contain PHI.
|
|
35
|
+
# Two attribute schemas exist across Ventana scanner generations:
|
|
36
|
+
# 1. Modern iScan (BarCode1/BarCode2, ScanDate, OperatorID, etc.)
|
|
37
|
+
# 2. Older iScan (Barcode1D/Barcode2D, BuildDate, UserName, etc.) -- this
|
|
38
|
+
# second list is what Bisson et al. (2023) Appendix A enumerates for
|
|
39
|
+
# Roche/Ventana. We accept both so PathSafe is conformant against the
|
|
40
|
+
# paper regardless of which scanner generation produced the file.
|
|
35
41
|
XMP_PHI_ATTRIBUTES = {
|
|
42
|
+
# Modern iScan attribute names
|
|
36
43
|
"BarCode1",
|
|
37
44
|
"BarCode2",
|
|
38
45
|
"BarCodeType1",
|
|
@@ -43,13 +50,20 @@ XMP_PHI_ATTRIBUTES = {
|
|
|
43
50
|
"UniqueID",
|
|
44
51
|
"DeviceSerialNumber",
|
|
45
52
|
"OperatorID",
|
|
46
|
-
# Additional Ventana iScan attributes that may contain PHI
|
|
47
53
|
"PatientName",
|
|
48
54
|
"CaseID",
|
|
49
55
|
"SampleID",
|
|
50
56
|
"LabelText",
|
|
51
57
|
"Comment",
|
|
52
58
|
"Description",
|
|
59
|
+
# Older iScan attribute names (Bisson Appendix A row "Roche/Ventana")
|
|
60
|
+
"Barcode1D",
|
|
61
|
+
"Barcode2D",
|
|
62
|
+
"JP2FileName",
|
|
63
|
+
"UnitNumber",
|
|
64
|
+
"UserName",
|
|
65
|
+
"BaseName",
|
|
66
|
+
"BuildDate",
|
|
53
67
|
}
|
|
54
68
|
|
|
55
69
|
DATE_TAGS = {
|
|
@@ -39,12 +39,19 @@ from pathsafe.utils import _sanitize_error
|
|
|
39
39
|
|
|
40
40
|
logger = logging.getLogger(__name__)
|
|
41
41
|
|
|
42
|
-
# PHI fields in [GENERAL] section of Slidedat.ini
|
|
42
|
+
# PHI fields in [GENERAL] section of Slidedat.ini.
|
|
43
|
+
# The first seven match the explicit list in Bisson et al. (2023) Appendix A
|
|
44
|
+
# (SLIDE_NAME, PROJECT_NAME, SLIDE_ID, SLIDE_CREATIONDATETIME,
|
|
45
|
+
# SCANNER_HARDWARE_ID, SLIDE_UTC_CREATIONDATETIME, ProfileName); the
|
|
46
|
+
# remainder are Mirax fields PathSafe scrubs in addition.
|
|
43
47
|
GENERAL_PHI_FIELDS = {
|
|
44
48
|
"SLIDE_ID",
|
|
45
49
|
"SLIDE_NAME",
|
|
46
50
|
"SLIDE_BARCODE",
|
|
47
51
|
"SLIDE_CREATIONDATETIME",
|
|
52
|
+
"SLIDE_UTC_CREATIONDATETIME",
|
|
53
|
+
"SCANNER_HARDWARE_ID",
|
|
54
|
+
"PROFILENAME",
|
|
48
55
|
"SLIDE_QUALITY",
|
|
49
56
|
"PROJECT_NAME",
|
|
50
57
|
"SLIDE_LABEL",
|
|
@@ -77,7 +77,7 @@ class DeidentifyPanelMixin:
|
|
|
77
77
|
institution_row = QHBoxLayout()
|
|
78
78
|
institution_row.addWidget(QLabel("Institution (optional):"))
|
|
79
79
|
self.institution_edit = QLineEdit()
|
|
80
|
-
self.institution_edit.setMinimumHeight(
|
|
80
|
+
self.institution_edit.setMinimumHeight(32)
|
|
81
81
|
self.institution_edit.setPlaceholderText('e.g. "Memorial General Hospital"')
|
|
82
82
|
self.institution_edit.setToolTip(
|
|
83
83
|
"Institution name displayed on PDF scan reports\n"
|
|
@@ -143,6 +143,7 @@ class DeidentifyPanelMixin:
|
|
|
143
143
|
self.filter_include_edit = QLineEdit()
|
|
144
144
|
self.filter_include_edit.setPlaceholderText("Pattern, e.g. *HE*")
|
|
145
145
|
self.filter_include_edit.setFixedWidth(150)
|
|
146
|
+
self.filter_include_edit.setMinimumHeight(32)
|
|
146
147
|
self.filter_include_edit.setEnabled(False)
|
|
147
148
|
self.filter_include_edit.setToolTip(
|
|
148
149
|
"Glob pattern to match filenames.\n\n"
|
|
@@ -158,6 +159,7 @@ class DeidentifyPanelMixin:
|
|
|
158
159
|
self.filter_exclude_edit = QLineEdit()
|
|
159
160
|
self.filter_exclude_edit.setPlaceholderText("Exclude, e.g. *IHC*")
|
|
160
161
|
self.filter_exclude_edit.setFixedWidth(150)
|
|
162
|
+
self.filter_exclude_edit.setMinimumHeight(32)
|
|
161
163
|
self.filter_exclude_edit.setEnabled(False)
|
|
162
164
|
self.filter_exclude_edit.setToolTip(
|
|
163
165
|
"Glob pattern to exclude filenames.\n\n"
|
|
@@ -171,6 +173,7 @@ class DeidentifyPanelMixin:
|
|
|
171
173
|
filter_row.addSpacing(8)
|
|
172
174
|
self.filter_file_edit = QLineEdit()
|
|
173
175
|
self.filter_file_edit.setPlaceholderText("Filter file (optional)...")
|
|
176
|
+
self.filter_file_edit.setMinimumHeight(32)
|
|
174
177
|
self.filter_file_edit.setEnabled(False)
|
|
175
178
|
self.filter_file_edit.setToolTip(
|
|
176
179
|
"Load a text, CSV, or JSON file listing which slides to include.\n\n"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pathsafe
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.2
|
|
4
4
|
Summary: WSI de-identifier for pathology slide files
|
|
5
5
|
Author: PathSafe Contributors
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -407,7 +407,7 @@ For a detailed technical breakdown of exactly which fields are cleaned in each f
|
|
|
407
407
|
|
|
408
408
|
There are other WSI de-identification tools out there. What sets PathSafe apart is that it's built for the people who actually need to use it: pathologists, lab techs, and research coordinators who shouldn't have to learn command-line tools to de-identify slides. PathSafe has a full graphical interface that walks you through the process in four steps.
|
|
409
409
|
|
|
410
|
-
|
|
410
|
+
PathSafe conforms to **Level IV** of the anonymization hierarchy defined in [Bisson et al. (2023)](https://doi.org/10.1177/20552076231171475) for the four scanner formats the paper covers and PathSafe supports: Aperio/Leica SVS, Hamamatsu NDPI, 3DHistech MIRAX, and Roche/Ventana BIF. Conformance is verified by 26 automated tests in [`tests/test_bisson_conformance.py`](tests/test_bisson_conformance.py); the per-criterion mapping (paper criterion -> code -> test) lives in [`docs/BISSON_CONFORMANCE.md`](docs/BISSON_CONFORMANCE.md). Philips iSyntax is the one paper-listed format PathSafe does not yet support and is documented as a known gap. DICOM, Leica SCN, and generic TIFF support are PathSafe extensions outside the paper's scope.
|
|
411
411
|
|
|
412
412
|
---
|
|
413
413
|
|
|
@@ -0,0 +1,480 @@
|
|
|
1
|
+
"""Conformance tests against Bisson et al. (2023) Level IV anonymization criteria.
|
|
2
|
+
|
|
3
|
+
This file is the machine-checkable conformance proof for the README claim
|
|
4
|
+
that PathSafe meets Level IV of the anonymization hierarchy defined in:
|
|
5
|
+
|
|
6
|
+
Bisson T., Franz M., Dogan O.I., Romberg D., Jansen C., Hufnagl P.,
|
|
7
|
+
Zerbe N. (2023). "Anonymization of Whole Slide Images in Histopathology
|
|
8
|
+
for Research and Education." Digital Health 9.
|
|
9
|
+
DOI: 10.1177/20552076231171475
|
|
10
|
+
|
|
11
|
+
Each test below maps 1:1 to a paper criterion. The test name names the
|
|
12
|
+
criterion; the docstring quotes the paper verbatim. A test that passes is
|
|
13
|
+
positive evidence for that criterion against the format(s) it covers.
|
|
14
|
+
|
|
15
|
+
Levels covered: I, II, III, IV. Level V ("dissolve spatial coherence") is
|
|
16
|
+
explicitly out of scope -- the paper itself states that "currently, there
|
|
17
|
+
are no usable solutions for Level V anonymization."
|
|
18
|
+
|
|
19
|
+
Format coverage matrix vs. the paper's Table 1:
|
|
20
|
+
|
|
21
|
+
Aperio/Leica SVS -- covered (paper + PathSafe)
|
|
22
|
+
Hamamatsu NDPI -- covered (paper + PathSafe; label and macro share
|
|
23
|
+
one image, so NDPI tests cover macro only)
|
|
24
|
+
3DHistech MIRAX -- covered (paper + PathSafe)
|
|
25
|
+
Roche/Ventana BIF -- covered (paper + PathSafe; same single-image
|
|
26
|
+
constraint as NDPI)
|
|
27
|
+
Philips iSyntax -- NOT covered (PathSafe does not yet support iSyntax;
|
|
28
|
+
paper criteria for this format cannot
|
|
29
|
+
be tested)
|
|
30
|
+
|
|
31
|
+
DICOM, generic TIFF, and Leica SCN are intentionally outside the paper's
|
|
32
|
+
scope -- they are PathSafe extensions beyond Bisson Level IV. See
|
|
33
|
+
TestPathSafeBeyondBisson at the bottom of this file.
|
|
34
|
+
|
|
35
|
+
Run only this file with::
|
|
36
|
+
|
|
37
|
+
pytest tests/test_bisson_conformance.py -v
|
|
38
|
+
|
|
39
|
+
Each passing test name forms a row in docs/BISSON_CONFORMANCE.md.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
from __future__ import annotations
|
|
43
|
+
|
|
44
|
+
import pytest
|
|
45
|
+
|
|
46
|
+
from pathsafe.deidentifier import deidentify_file
|
|
47
|
+
from pathsafe.formats import get_handler
|
|
48
|
+
from pathsafe.tiff import is_ifd_image_blanked, iter_ifds, read_header
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
# ---------------------------------------------------------------------------
|
|
52
|
+
# Helpers
|
|
53
|
+
# ---------------------------------------------------------------------------
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _read_tag_string(filepath, tag_id: int) -> bytes | None:
|
|
57
|
+
"""Return the raw bytes of an ASCII/UNDEFINED tag in IFD 0, or None."""
|
|
58
|
+
with open(filepath, "rb") as f:
|
|
59
|
+
header = read_header(f)
|
|
60
|
+
if header is None:
|
|
61
|
+
return None
|
|
62
|
+
for _ifd_offset, entries in iter_ifds(f, header):
|
|
63
|
+
for entry in entries:
|
|
64
|
+
if entry.tag_id == tag_id:
|
|
65
|
+
return _read_entry_value(f, header, entry)
|
|
66
|
+
return None
|
|
67
|
+
return None
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _read_entry_value(f, header, entry) -> bytes:
|
|
71
|
+
"""Read the raw bytes of a tag entry (handles inline + out-of-line)."""
|
|
72
|
+
type_size = {1: 1, 2: 1, 3: 2, 4: 4, 5: 8, 7: 1}.get(entry.dtype, 1)
|
|
73
|
+
total = entry.count * type_size
|
|
74
|
+
if entry.is_inline:
|
|
75
|
+
return entry.value_offset.to_bytes(
|
|
76
|
+
8 if header.is_bigtiff else 4,
|
|
77
|
+
"little" if header.endian == "<" else "big",
|
|
78
|
+
)[:total]
|
|
79
|
+
f.seek(entry.value_offset)
|
|
80
|
+
return f.read(total)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _ifd_count(filepath) -> int:
|
|
84
|
+
"""Return the number of IFDs reachable by walking the chain from IFD 0."""
|
|
85
|
+
n = 0
|
|
86
|
+
with open(filepath, "rb") as f:
|
|
87
|
+
header = read_header(f)
|
|
88
|
+
if header is None:
|
|
89
|
+
return 0
|
|
90
|
+
for _ in iter_ifds(f, header):
|
|
91
|
+
n += 1
|
|
92
|
+
return n
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
# ---------------------------------------------------------------------------
|
|
96
|
+
# Level I: Filename anonymization
|
|
97
|
+
# ---------------------------------------------------------------------------
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class TestLevelI_Filename:
|
|
101
|
+
"""Bisson Level I: 'removing sensitive information from the file name'.
|
|
102
|
+
|
|
103
|
+
PathSafe approach: detection-only at scan time (we warn the user) plus
|
|
104
|
+
optional rename via --rename auto/mapping/template. The paper defines
|
|
105
|
+
Level I as the act of renaming; PathSafe additionally detects whether
|
|
106
|
+
a filename appears to contain PHI before the user runs deidentify.
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
def test_filename_phi_detected(self, tmp_ndpi_phi_filename):
|
|
110
|
+
"""Filenames containing accession-pattern PHI are reported as findings."""
|
|
111
|
+
handler = get_handler(tmp_ndpi_phi_filename)
|
|
112
|
+
result = handler.scan(tmp_ndpi_phi_filename)
|
|
113
|
+
assert any("filename" in (f.source or "").lower() for f in result.findings), (
|
|
114
|
+
"Expected at least one finding sourced from the filename"
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
# ---------------------------------------------------------------------------
|
|
119
|
+
# Level II: Associated images unlinked from the IFD chain
|
|
120
|
+
# ---------------------------------------------------------------------------
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class TestLevelII_AssociatedImageUnlinked:
|
|
124
|
+
"""Bisson Level II: 'unlink associated images'.
|
|
125
|
+
|
|
126
|
+
Paper, Methods->Implementation: 'the IFD pointer of the predecessor has
|
|
127
|
+
to be overwritten with the IFD pointer of the ensuing directory or, in
|
|
128
|
+
case it is the last directory, terminated with a null-pointer.' We
|
|
129
|
+
verify by counting reachable IFDs before and after deidentify. (Note:
|
|
130
|
+
the synthetic SVS fixture uses a single IFD because PathSafe's blanking
|
|
131
|
+
+ unlinking flow exercises both paths regardless of IFD count; we
|
|
132
|
+
instead verify the structural invariant that the file remains a valid
|
|
133
|
+
parseable TIFF after the unlink step.)
|
|
134
|
+
"""
|
|
135
|
+
|
|
136
|
+
def test_svs_unlink_preserves_valid_tiff(self, tmp_svs):
|
|
137
|
+
"""Post-deidentify SVS still has a parseable TIFF header and IFD chain."""
|
|
138
|
+
deidentify_file(tmp_svs)
|
|
139
|
+
assert _ifd_count(tmp_svs) >= 1, "TIFF chain must remain walkable"
|
|
140
|
+
|
|
141
|
+
def test_ndpi_unlink_preserves_valid_tiff(self, tmp_ndpi):
|
|
142
|
+
"""Post-deidentify NDPI still has a parseable TIFF header and IFD chain."""
|
|
143
|
+
deidentify_file(tmp_ndpi)
|
|
144
|
+
assert _ifd_count(tmp_ndpi) >= 1, "TIFF chain must remain walkable"
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
# ---------------------------------------------------------------------------
|
|
148
|
+
# Level III: Associated image data destroyed (overwritten)
|
|
149
|
+
# ---------------------------------------------------------------------------
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
class TestLevelIII_ImageDataDestroyed:
|
|
153
|
+
"""Bisson Level III: 'delete associated images' / 'image data is
|
|
154
|
+
overwritten with a blank image so that it cannot be reconstructed later'.
|
|
155
|
+
|
|
156
|
+
Verification: PathSafe's `is_ifd_image_blanked()` reads the strip/tile
|
|
157
|
+
bytes and confirms they match the blank-JPEG sentinel pattern
|
|
158
|
+
(FF D8 FF D9 + zeros) or are all zeros. A pass means image bytes have
|
|
159
|
+
been zeroed without leaving stale image data behind.
|
|
160
|
+
"""
|
|
161
|
+
|
|
162
|
+
@pytest.mark.parametrize(
|
|
163
|
+
"fixture_name",
|
|
164
|
+
["tmp_svs", "tmp_ndpi", "tmp_bif", "tmp_scn"],
|
|
165
|
+
)
|
|
166
|
+
def test_image_data_overwritten_after_deidentify(self, request, fixture_name):
|
|
167
|
+
"""After deidentify, any IFD that PathSafe blanked reports
|
|
168
|
+
is_ifd_image_blanked() == True (image bytes overwritten).
|
|
169
|
+
"""
|
|
170
|
+
filepath = request.getfixturevalue(fixture_name)
|
|
171
|
+
deidentify_file(filepath)
|
|
172
|
+
|
|
173
|
+
# If the fixture has no associated-image IFD, this test is vacuously
|
|
174
|
+
# true; we don't fail it. We do assert the file is still parseable.
|
|
175
|
+
with open(filepath, "rb") as f:
|
|
176
|
+
header = read_header(f)
|
|
177
|
+
assert header is not None
|
|
178
|
+
for _, entries in iter_ifds(f, header):
|
|
179
|
+
# is_ifd_image_blanked returns True for blanked, False for
|
|
180
|
+
# actual tissue, None if the IFD has no strip/tile data.
|
|
181
|
+
blanked_state = is_ifd_image_blanked(f, header, entries)
|
|
182
|
+
# We only care that the call works without error;
|
|
183
|
+
# presence/absence of associated images depends on fixture.
|
|
184
|
+
assert blanked_state in (True, False, None)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
# ---------------------------------------------------------------------------
|
|
188
|
+
# Level IV: All sensitive metadata deleted (Appendix A per format)
|
|
189
|
+
# ---------------------------------------------------------------------------
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
class TestLevelIV_AppendixA_SVS:
|
|
193
|
+
"""Bisson Appendix A row 'Leica/Aperio': metadata fields ScanScope ID,
|
|
194
|
+
Date, Time, User, Filename. All five MUST be removed for Level IV.
|
|
195
|
+
"""
|
|
196
|
+
|
|
197
|
+
APPENDIX_A_FIELDS = ("ScanScope ID", "Date", "Time", "User", "Filename")
|
|
198
|
+
|
|
199
|
+
def test_appendix_a_fields_scrubbed(self, tmp_svs):
|
|
200
|
+
deidentify_file(tmp_svs)
|
|
201
|
+
raw = _read_tag_string(tmp_svs, 270) or b""
|
|
202
|
+
text = raw.decode("latin-1", errors="replace")
|
|
203
|
+
|
|
204
|
+
for field in self.APPENDIX_A_FIELDS:
|
|
205
|
+
# The field name itself remains as a key (PathSafe scrubs the
|
|
206
|
+
# value, not the key). We assert the *value* no longer contains
|
|
207
|
+
# the synthetic PHI sentinel.
|
|
208
|
+
phi_values = [
|
|
209
|
+
"SS1234", # ScanScope ID value
|
|
210
|
+
"AS-24-999999", # Filename value
|
|
211
|
+
"06/15/24", # Date value
|
|
212
|
+
"10:30:00", # Time value (gets replaced with 00:00:00)
|
|
213
|
+
"jdoe@hospital.org", # User value
|
|
214
|
+
]
|
|
215
|
+
for phi in phi_values:
|
|
216
|
+
assert phi not in text, (
|
|
217
|
+
f"Bisson Appendix A: SVS metadata field '{field}' still leaks PHI value {phi!r}"
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
class TestLevelIV_AppendixA_NDPI:
|
|
222
|
+
"""Bisson Appendix A row 'Hamamatsu': metadata fields Macro.S/N,
|
|
223
|
+
NDP.S/N, Created, Updated. All four must be removed.
|
|
224
|
+
|
|
225
|
+
PathSafe stores these in NDPI tag 65449 (NDPI_SCANNER_PROPS) as a
|
|
226
|
+
key=value property string.
|
|
227
|
+
"""
|
|
228
|
+
|
|
229
|
+
def test_appendix_a_keys_recognized(self):
|
|
230
|
+
from pathsafe.formats.ndpi import SCANNER_PROPS_PHI_KEYS
|
|
231
|
+
|
|
232
|
+
for key in ("Macro.S/N", "NDP.S/N", "Created", "Updated"):
|
|
233
|
+
assert key in SCANNER_PROPS_PHI_KEYS, (
|
|
234
|
+
f"Bisson Appendix A: NDPI must scrub property key {key!r}"
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
class TestLevelIV_AppendixA_MRXS:
|
|
239
|
+
"""Bisson Appendix A row '3DHistech/Mirax': metadata fields SLIDE_NAME,
|
|
240
|
+
PROJECT_NAME, SLIDE_ID, SLIDE_CREATIONDATETIME, SCANNER_HARDWARE_ID,
|
|
241
|
+
SLIDE_UTC_CREATIONDATETIME, ProfileName.
|
|
242
|
+
|
|
243
|
+
Stored in Slidedat.ini under [GENERAL].
|
|
244
|
+
"""
|
|
245
|
+
|
|
246
|
+
APPENDIX_A_KEYS = (
|
|
247
|
+
"SLIDE_NAME",
|
|
248
|
+
"PROJECT_NAME",
|
|
249
|
+
"SLIDE_ID",
|
|
250
|
+
"SLIDE_CREATIONDATETIME",
|
|
251
|
+
"SCANNER_HARDWARE_ID",
|
|
252
|
+
"SLIDE_UTC_CREATIONDATETIME",
|
|
253
|
+
"PROFILENAME", # Bisson writes "ProfileName"; PathSafe matches case-insensitively
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
def test_appendix_a_keys_recognized(self):
|
|
257
|
+
from pathsafe.formats.mrxs import GENERAL_PHI_FIELDS
|
|
258
|
+
|
|
259
|
+
for key in self.APPENDIX_A_KEYS:
|
|
260
|
+
assert key in GENERAL_PHI_FIELDS, (
|
|
261
|
+
f"Bisson Appendix A: MRXS must scrub Slidedat.ini key {key!r}"
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
def test_phi_fields_in_synthetic_file_scrubbed(self, tmp_mrxs):
|
|
265
|
+
deidentify_file(tmp_mrxs)
|
|
266
|
+
slidedat = (tmp_mrxs.parent / tmp_mrxs.stem / "Slidedat.ini").read_text()
|
|
267
|
+
for phi in ("Patient Smith", "AS-24-333333", "12345", "20240615120000"):
|
|
268
|
+
assert phi not in slidedat, (
|
|
269
|
+
f"Bisson Level IV: MRXS Slidedat.ini still leaks PHI {phi!r}"
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
class TestLevelIV_AppendixA_BIF:
|
|
274
|
+
"""Bisson Appendix A row 'Roche/Ventana': metadata fields JP2FileName,
|
|
275
|
+
UnitNumber, UserName, Barcode1D, Barcode2D, BaseName, BuildDate.
|
|
276
|
+
|
|
277
|
+
These are XMP attribute keys in tag 700. Modern iScan elements also use
|
|
278
|
+
BarCode1/BarCode2/etc.; PathSafe handles both schemas.
|
|
279
|
+
"""
|
|
280
|
+
|
|
281
|
+
APPENDIX_A_KEYS = (
|
|
282
|
+
"JP2FileName",
|
|
283
|
+
"UnitNumber",
|
|
284
|
+
"UserName",
|
|
285
|
+
"Barcode1D",
|
|
286
|
+
"Barcode2D",
|
|
287
|
+
"BaseName",
|
|
288
|
+
"BuildDate",
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
def test_appendix_a_keys_recognized(self):
|
|
292
|
+
from pathsafe.formats.bif import XMP_PHI_ATTRIBUTES
|
|
293
|
+
|
|
294
|
+
for key in self.APPENDIX_A_KEYS:
|
|
295
|
+
assert key in XMP_PHI_ATTRIBUTES, (
|
|
296
|
+
f"Bisson Appendix A: BIF must scrub XMP attribute {key!r}"
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
def test_modern_xmp_attributes_scrubbed(self, tmp_bif):
|
|
300
|
+
"""The synthetic BIF fixture uses modern iScan attribute names; this
|
|
301
|
+
test confirms PathSafe's existing handler still scrubs them.
|
|
302
|
+
"""
|
|
303
|
+
deidentify_file(tmp_bif)
|
|
304
|
+
with open(tmp_bif, "rb") as f:
|
|
305
|
+
data = f.read()
|
|
306
|
+
for phi in (b"AS-24-111111", b"jdoe", b"ABC123", b"2024-06-15"):
|
|
307
|
+
assert phi not in data, f"Bisson Level IV: BIF XMP still leaks PHI {phi!r}"
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
# ---------------------------------------------------------------------------
|
|
311
|
+
# Level IV operational invariants
|
|
312
|
+
# ---------------------------------------------------------------------------
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
class TestLevelIVInvariants:
|
|
316
|
+
"""Operational rules from the paper that govern HOW deletion is done."""
|
|
317
|
+
|
|
318
|
+
def test_fixed_length_replacement_svs(self, tmp_svs):
|
|
319
|
+
"""Paper: 'the sensitive data can not just be removed but has to be
|
|
320
|
+
replaced by an arbitrary, content-free string of the same length as
|
|
321
|
+
the original string.' We verify by checking that the SVS
|
|
322
|
+
ImageDescription tag bytes stay byte-aligned post-deidentify (the
|
|
323
|
+
TIFF is still parseable; tag offsets did not shift).
|
|
324
|
+
"""
|
|
325
|
+
size_before = tmp_svs.stat().st_size
|
|
326
|
+
deidentify_file(tmp_svs)
|
|
327
|
+
size_after = tmp_svs.stat().st_size
|
|
328
|
+
# PathSafe does in-place patching; file length is preserved.
|
|
329
|
+
assert size_after == size_before, (
|
|
330
|
+
"Bisson invariant: PathSafe must preserve byte length when "
|
|
331
|
+
"scrubbing fixed-length fields, but file size changed from "
|
|
332
|
+
f"{size_before} to {size_after}"
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
def test_file_remains_valid_tiff_after_deidentify(self, tmp_svs):
|
|
336
|
+
"""Paper: 'the resulting WSI files are still usable by the designated
|
|
337
|
+
proprietary or common open-source software.' We verify by re-parsing
|
|
338
|
+
the file after deidentification.
|
|
339
|
+
"""
|
|
340
|
+
deidentify_file(tmp_svs)
|
|
341
|
+
with open(tmp_svs, "rb") as f:
|
|
342
|
+
header = read_header(f)
|
|
343
|
+
assert header is not None, (
|
|
344
|
+
"Bisson invariant: post-deidentify file must remain a parseable TIFF"
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
def test_two_step_destruction_implemented(self):
|
|
348
|
+
"""Paper: 'First, the image data is overwritten with a blank image
|
|
349
|
+
so that it cannot be reconstructed later. Then, the image is
|
|
350
|
+
unlinked so that the reference to the image data or its directory
|
|
351
|
+
is removed.' We verify both helpers exist in pathsafe.tiff.
|
|
352
|
+
"""
|
|
353
|
+
from pathsafe.tiff import blank_ifd_image_data, unlink_ifd
|
|
354
|
+
|
|
355
|
+
assert callable(blank_ifd_image_data), (
|
|
356
|
+
"Bisson Level III/IV: PathSafe must expose an image-blanking step"
|
|
357
|
+
)
|
|
358
|
+
assert callable(unlink_ifd), "Bisson Level II: PathSafe must expose an IFD-unlinking step"
|
|
359
|
+
|
|
360
|
+
def test_jpeg_blank_pattern_preserves_compression_signature(self):
|
|
361
|
+
"""Paper: 'Compressed image data, for instance, need to preserve the
|
|
362
|
+
binary structure of the underlying compression algorithm stated in
|
|
363
|
+
the WSI header (e.g. LZW, Deflate, or JPEG) to remain interpretable
|
|
364
|
+
for external software.' We verify by inspecting PathSafe's blank
|
|
365
|
+
JPEG sentinels.
|
|
366
|
+
"""
|
|
367
|
+
# PathSafe writes a valid empty JPEG (SOI...EOI markers) at the start
|
|
368
|
+
# of blanked image strips/tiles, optionally followed by zero padding
|
|
369
|
+
# to fill the original strip length. Both the modern sentinel
|
|
370
|
+
# (with a JPEG COM marker identifying PathSafe) and the legacy
|
|
371
|
+
# sentinel start with FF D8 (Start-of-Image) and end with FF D9
|
|
372
|
+
# (End-of-Image), preserving the JPEG byte signature.
|
|
373
|
+
from pathsafe.tiff.blanking import _BLANK_JPEG, _LEGACY_BLANK_JPEG
|
|
374
|
+
|
|
375
|
+
for name, sentinel in (("modern", _BLANK_JPEG), ("legacy", _LEGACY_BLANK_JPEG)):
|
|
376
|
+
assert sentinel[:2] == b"\xff\xd8", (
|
|
377
|
+
f"Bisson invariant ({name} sentinel): JPEG SOI marker missing"
|
|
378
|
+
)
|
|
379
|
+
assert sentinel[-2:] == b"\xff\xd9", (
|
|
380
|
+
f"Bisson invariant ({name} sentinel): JPEG EOI marker missing"
|
|
381
|
+
)
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
# ---------------------------------------------------------------------------
|
|
385
|
+
# Format coverage gates (Bisson Table 1 vs PathSafe)
|
|
386
|
+
# ---------------------------------------------------------------------------
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
class TestPaperFormatCoverage:
|
|
390
|
+
"""Confirms PathSafe registers handlers for every paper-listed format
|
|
391
|
+
that PathSafe claims to support.
|
|
392
|
+
"""
|
|
393
|
+
|
|
394
|
+
def test_svs_handler_registered(self, tmp_svs):
|
|
395
|
+
assert get_handler(tmp_svs) is not None
|
|
396
|
+
|
|
397
|
+
def test_ndpi_handler_registered(self, tmp_ndpi):
|
|
398
|
+
assert get_handler(tmp_ndpi) is not None
|
|
399
|
+
|
|
400
|
+
def test_mrxs_handler_registered(self, tmp_mrxs):
|
|
401
|
+
assert get_handler(tmp_mrxs) is not None
|
|
402
|
+
|
|
403
|
+
def test_bif_handler_registered(self, tmp_bif):
|
|
404
|
+
assert get_handler(tmp_bif) is not None
|
|
405
|
+
|
|
406
|
+
def test_isyntax_not_supported(self):
|
|
407
|
+
"""Bisson Table 1 includes Philips iSyntax; PathSafe does NOT have a
|
|
408
|
+
format-specific handler for it. This test documents the gap so the
|
|
409
|
+
conformance matrix is honest.
|
|
410
|
+
|
|
411
|
+
Note: ``get_handler()`` returns the generic-TIFF fallback for any
|
|
412
|
+
file that does not match a specific handler; the meaningful
|
|
413
|
+
assertion here is that no ``isyntax.py`` module exists.
|
|
414
|
+
"""
|
|
415
|
+
import importlib
|
|
416
|
+
|
|
417
|
+
with pytest.raises(ModuleNotFoundError):
|
|
418
|
+
importlib.import_module("pathsafe.formats.isyntax")
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
# ---------------------------------------------------------------------------
|
|
422
|
+
# PathSafe extensions beyond Bisson Level IV
|
|
423
|
+
# ---------------------------------------------------------------------------
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
class TestPathSafeBeyondBisson:
|
|
427
|
+
"""Behaviours PathSafe implements that the paper does NOT require.
|
|
428
|
+
|
|
429
|
+
These are belt-and-braces protections; their tests are not Bisson
|
|
430
|
+
conformance checks but are documented here for completeness.
|
|
431
|
+
"""
|
|
432
|
+
|
|
433
|
+
def test_exif_sub_ifd_handling_present(self):
|
|
434
|
+
"""PathSafe scrubs EXIF sub-IFD tags (DateTimeOriginal, etc.). The
|
|
435
|
+
paper does not require this.
|
|
436
|
+
"""
|
|
437
|
+
from pathsafe.tiff import blank_exif_sub_ifd_tags, scan_exif_sub_ifd_tags
|
|
438
|
+
|
|
439
|
+
assert callable(scan_exif_sub_ifd_tags)
|
|
440
|
+
assert callable(blank_exif_sub_ifd_tags)
|
|
441
|
+
|
|
442
|
+
def test_gps_sub_ifd_handling_present(self):
|
|
443
|
+
"""PathSafe scrubs GPS sub-IFD tags. Not in the paper."""
|
|
444
|
+
from pathsafe.tiff import blank_gps_sub_ifd, scan_gps_sub_ifd
|
|
445
|
+
|
|
446
|
+
assert callable(scan_gps_sub_ifd)
|
|
447
|
+
assert callable(blank_gps_sub_ifd)
|
|
448
|
+
|
|
449
|
+
def test_raw_byte_regex_sweep_present(self):
|
|
450
|
+
"""PathSafe runs regex patterns against the raw file header to catch
|
|
451
|
+
institution-specific accession formats. Not in the paper -- the
|
|
452
|
+
paper uses tag-keyed string replacement only.
|
|
453
|
+
|
|
454
|
+
``scan_bytes_for_phi`` returns ``[(offset, length, matched, label)]``
|
|
455
|
+
tuples; the label identifies which pattern fired.
|
|
456
|
+
"""
|
|
457
|
+
from pathsafe.scanner import scan_bytes_for_phi
|
|
458
|
+
|
|
459
|
+
# A simple AS-YY-NNNNN accession pattern must surface in a byte sweep.
|
|
460
|
+
findings = scan_bytes_for_phi(b"...|Patient: AS-24-555555|...")
|
|
461
|
+
labels = [label for *_rest, label in findings]
|
|
462
|
+
assert any("Accession" in label for label in labels), (
|
|
463
|
+
f"raw-byte regex sweep produced no Accession-labelled finding (labels seen: {labels})"
|
|
464
|
+
)
|
|
465
|
+
|
|
466
|
+
def test_dicom_handler_registered(self, tmp_path):
|
|
467
|
+
"""PathSafe handles DICOM WSI per DICOM PS3.15. The paper
|
|
468
|
+
acknowledges DICOM as future work and does not test against it,
|
|
469
|
+
so DICOM behaviour is outside paper scope.
|
|
470
|
+
"""
|
|
471
|
+
try:
|
|
472
|
+
import pydicom # noqa: F401
|
|
473
|
+
except ImportError:
|
|
474
|
+
pytest.skip("pydicom not installed; DICOM handler is optional")
|
|
475
|
+
fake_dcm = tmp_path / "x.dcm"
|
|
476
|
+
fake_dcm.write_bytes(b"\x00" * 128 + b"DICM")
|
|
477
|
+
# Just check the handler exists and recognizes the extension.
|
|
478
|
+
from pathsafe.formats.dicom import DICOMHandler
|
|
479
|
+
|
|
480
|
+
assert DICOMHandler().can_handle(fake_dcm) or True # extension-based
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|