esrf-data-compressor 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. esrf_data_compressor-0.1.0/LICENSE +20 -0
  2. esrf_data_compressor-0.1.0/PKG-INFO +183 -0
  3. esrf_data_compressor-0.1.0/README.md +122 -0
  4. esrf_data_compressor-0.1.0/pyproject.toml +72 -0
  5. esrf_data_compressor-0.1.0/setup.cfg +4 -0
  6. esrf_data_compressor-0.1.0/src/esrf_data_compressor/__init__.py +0 -0
  7. esrf_data_compressor-0.1.0/src/esrf_data_compressor/checker/run_check.py +76 -0
  8. esrf_data_compressor-0.1.0/src/esrf_data_compressor/checker/ssim.py +87 -0
  9. esrf_data_compressor-0.1.0/src/esrf_data_compressor/cli.py +162 -0
  10. esrf_data_compressor-0.1.0/src/esrf_data_compressor/compressors/__init__.py +0 -0
  11. esrf_data_compressor-0.1.0/src/esrf_data_compressor/compressors/base.py +167 -0
  12. esrf_data_compressor-0.1.0/src/esrf_data_compressor/compressors/jp2k.py +149 -0
  13. esrf_data_compressor-0.1.0/src/esrf_data_compressor/finder/finder.py +173 -0
  14. esrf_data_compressor-0.1.0/src/esrf_data_compressor/tests/__init__.py +0 -0
  15. esrf_data_compressor-0.1.0/src/esrf_data_compressor/tests/test_cli.py +176 -0
  16. esrf_data_compressor-0.1.0/src/esrf_data_compressor/tests/test_finder.py +70 -0
  17. esrf_data_compressor-0.1.0/src/esrf_data_compressor/tests/test_hdf5_helpers.py +9 -0
  18. esrf_data_compressor-0.1.0/src/esrf_data_compressor/tests/test_jp2k.py +87 -0
  19. esrf_data_compressor-0.1.0/src/esrf_data_compressor/tests/test_run_check.py +107 -0
  20. esrf_data_compressor-0.1.0/src/esrf_data_compressor/tests/test_ssim.py +106 -0
  21. esrf_data_compressor-0.1.0/src/esrf_data_compressor/tests/test_utils.py +64 -0
  22. esrf_data_compressor-0.1.0/src/esrf_data_compressor/utils/hdf5_helpers.py +18 -0
  23. esrf_data_compressor-0.1.0/src/esrf_data_compressor/utils/utils.py +34 -0
  24. esrf_data_compressor-0.1.0/src/esrf_data_compressor.egg-info/PKG-INFO +183 -0
  25. esrf_data_compressor-0.1.0/src/esrf_data_compressor.egg-info/SOURCES.txt +27 -0
  26. esrf_data_compressor-0.1.0/src/esrf_data_compressor.egg-info/dependency_links.txt +1 -0
  27. esrf_data_compressor-0.1.0/src/esrf_data_compressor.egg-info/entry_points.txt +2 -0
  28. esrf_data_compressor-0.1.0/src/esrf_data_compressor.egg-info/requires.txt +20 -0
  29. esrf_data_compressor-0.1.0/src/esrf_data_compressor.egg-info/top_level.txt +1 -0
@@ -0,0 +1,20 @@
1
+ MIT License
2
+
3
+ **Copyright (c) 2025 European Synchrotron Radiation Facility**
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,183 @@
1
+ Metadata-Version: 2.4
2
+ Name: esrf-data-compressor
3
+ Version: 0.1.0
4
+ Summary: A library to compress ESRF data and reduce their footprint
5
+ Author-email: ESRF <dau-pydev@esrf.fr>
6
+ License: MIT License
7
+
8
+ **Copyright (c) 2025 European Synchrotron Radiation Facility**
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
11
+ this software and associated documentation files (the "Software"), to deal in
12
+ the Software without restriction, including without limitation the rights to
13
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
14
+ the Software, and to permit persons to whom the Software is furnished to do so,
15
+ subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
22
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
23
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
24
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
25
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
+
27
+ Project-URL: Homepage, https://gitlab.esrf.fr/dau/esrf-data-compressor
28
+ Project-URL: Documentation, https://esrf-data-compressor.readthedocs.io/
29
+ Project-URL: Repository, https://gitlab.esrf.fr/dau/esrf-data-compressor
30
+ Project-URL: Issues, https://gitlab.esrf.fr/dau/esrf-data-compressor/issues
31
+ Project-URL: Changelog, https://gitlab.esrf.fr/dau/esrf-data-compressor/-/blob/main/CHANGELOG.md
32
+ Keywords: ESRF,pathlib
33
+ Classifier: Intended Audience :: Science/Research
34
+ Classifier: License :: OSI Approved :: MIT License
35
+ Classifier: Programming Language :: Python :: 3
36
+ Classifier: Programming Language :: Python :: 3.10
37
+ Classifier: Programming Language :: Python :: 3.11
38
+ Classifier: Programming Language :: Python :: 3.12
39
+ Classifier: Programming Language :: Python :: 3.13
40
+ Requires-Python: >=3.10
41
+ Description-Content-Type: text/markdown
42
+ License-File: LICENSE
43
+ Requires-Dist: h5py
44
+ Requires-Dist: hdf5plugin
45
+ Requires-Dist: blosc2-grok
46
+ Requires-Dist: scikit-image
47
+ Requires-Dist: tqdm
48
+ Provides-Extra: test
49
+ Requires-Dist: pytest>=7.0; extra == "test"
50
+ Provides-Extra: dev
51
+ Requires-Dist: esrf-data-compressor[test]; extra == "dev"
52
+ Requires-Dist: black>=22; extra == "dev"
53
+ Requires-Dist: flake8>=4.0; extra == "dev"
54
+ Requires-Dist: ruff; extra == "dev"
55
+ Provides-Extra: doc
56
+ Requires-Dist: sphinx>=6.0; extra == "doc"
57
+ Requires-Dist: sphinxcontrib-mermaid>=0.7; extra == "doc"
58
+ Requires-Dist: sphinx-autodoc-typehints>=1.16; extra == "doc"
59
+ Requires-Dist: pydata-sphinx-theme; extra == "doc"
60
+ Dynamic: license-file
61
+
62
+ # ESRF Data Compressor
63
+
64
+ **ESRF Data Compressor** is a command-line tool and Python library designed to compress large ESRF HDF5 datasets (3D volumes) and verify data consistency via SSIM. The default compression backend uses Blosc2 + Grok (JPEG2000).
65
+
66
+ ---
67
+
68
+ ## Features
69
+
70
+ * **Discover raw HDF5 dataset files** under an experiment’s `RAW_DATA`
71
+
72
+ * Goes through the HDF5 Virtual Datasets to find the data to compress
73
+ * Allows to filter down scan by scan based on the value of a key
74
+
75
+ * **Slice-by-slice compression**
76
+
77
+ * Uses Blosc2 + Grok (JPEG2000) on every slice of each 3D dataset (axis 0)
78
+ * User-configurable compression ratio (e.g. `--cratio 10`)
79
+
80
+ * **Parallel execution**
81
+
82
+ * Automatically factors CPU cores into worker processes × per-process threads
83
+ * By default, each worker runs up to 4 Blosc2 threads (or falls back to 1 thread if < 4 cores)
84
+
85
+ * **Non-destructive workflow**
86
+
87
+ 1. `compress` writes a sibling file `<basename>_<compression_method>.h5` next to each original
88
+ 2. `check` computes SSIM (first and last frames) and writes a report
89
+ 3. `overwrite` (optional) swaps out the raw frame file (irreversible)
90
+
91
+ * **Four simple CLI subcommands**
92
+
93
+ * `list`  Show all raw HDF5 files to be processed
94
+ * `compress` Generate compressed siblings
95
+ * `check`  Produce a per-dataset SSIM report between raw & compressed
96
+ * `overwrite` Atomically replace each raw frame file (irreversible)
97
+
98
+ ---
99
+
100
+ ## Installation
101
+
102
+ ### From PyPI
103
+
104
+ ```bash
105
+ pip install esrf-data-compressor
106
+ ```
107
+
108
+ Once installed, the `compress-hdf5` command will be available in your `PATH`.
109
+
110
+ ### From Source (for development)
111
+
112
+ ```bash
113
+ git clone https://gitlab.esrf.fr/dau/esrf-data-compressor.git
114
+ cd esrf-data-compressor
115
+
116
+ # (Optional) Create & activate a virtual environment
117
+ python -m venv venv
118
+ source venv/bin/activate
119
+
120
+ # Install build dependencies & the package itself
121
+ pip install .
122
+ ```
123
+
124
+ ---
125
+
126
+ ## Documentation
127
+
128
+ Full documentation is available online:
129
+ [ESRF Data Compressor Docs](https://esrf-data-compressor.readthedocs.io/en/latest/index.html)
130
+
131
+ ## Contributing & Development
132
+
133
+ * **Clone** the repository:
134
+
135
+ ```bash
136
+ git clone https://gitlab.esrf.fr/dau/esrf-data-compressor.git
137
+ cd esrf-data-compressor
138
+ ```
139
+
140
+ * **Install** dependencies (in a virtual environment):
141
+
142
+ ```bash
143
+ python -m venv venv
144
+ source venv/bin/activate
145
+ pip install -e "[dev]"
146
+ ```
147
+
148
+ * **Run tests** with coverage:
149
+
150
+ ```bash
151
+ pytest -v --cov=esrf_data_compressor --cov-report=term-missing
152
+ ```
153
+
154
+ * **Style:**
155
+
156
+ * `black .`
157
+ * `flake8 .`
158
+ * `ruff .`
159
+
160
+ * **Build docs** (Sphinx + pydata theme):
161
+
162
+ ```bash
163
+ sphinx-build doc build/html
164
+ ```
165
+
166
+ ---
167
+
168
+ ## License
169
+
170
+ This project is licensed under the [MIT License](LICENSE). See `LICENSE` for full text.
171
+
172
+ ---
173
+
174
+ ## Changelog
175
+
176
+ All noteworthy changes are recorded in [CHANGELOG.md](CHANGELOG.md). Version 0.1.0 marks the first public release with:
177
+
178
+ * Initial implementation of Blosc2 + Grok (JPEG2000) compression for 3D HDF5 datasets.
179
+ * SSIM-based integrity check (first & last slice).
180
+ * Four-command CLI (`list`, `compress`, `check`, `overwrite`).
181
+ * Parallelism with worker×thread auto-factoring.
182
+
183
+ For more details, see the full history in [CHANGELOG.md](CHANGELOG.md).
@@ -0,0 +1,122 @@
1
+ # ESRF Data Compressor
2
+
3
+ **ESRF Data Compressor** is a command-line tool and Python library designed to compress large ESRF HDF5 datasets (3D volumes) and verify data consistency via SSIM. The default compression backend uses Blosc2 + Grok (JPEG2000).
4
+
5
+ ---
6
+
7
+ ## Features
8
+
9
+ * **Discover raw HDF5 dataset files** under an experiment’s `RAW_DATA`
10
+
11
+ * Goes through the HDF5 Virtual Datasets to find the data to compress
12
+ * Allows to filter down scan by scan based on the value of a key
13
+
14
+ * **Slice-by-slice compression**
15
+
16
+ * Uses Blosc2 + Grok (JPEG2000) on every slice of each 3D dataset (axis 0)
17
+ * User-configurable compression ratio (e.g. `--cratio 10`)
18
+
19
+ * **Parallel execution**
20
+
21
+ * Automatically factors CPU cores into worker processes × per-process threads
22
+ * By default, each worker runs up to 4 Blosc2 threads (or falls back to 1 thread if < 4 cores)
23
+
24
+ * **Non-destructive workflow**
25
+
26
+ 1. `compress` writes a sibling file `<basename>_<compression_method>.h5` next to each original
27
+ 2. `check` computes SSIM (first and last frames) and writes a report
28
+ 3. `overwrite` (optional) swaps out the raw frame file (irreversible)
29
+
30
+ * **Four simple CLI subcommands**
31
+
32
+ * `list`  Show all raw HDF5 files to be processed
33
+ * `compress` Generate compressed siblings
34
+ * `check`  Produce a per-dataset SSIM report between raw & compressed
35
+ * `overwrite` Atomically replace each raw frame file (irreversible)
36
+
37
+ ---
38
+
39
+ ## Installation
40
+
41
+ ### From PyPI
42
+
43
+ ```bash
44
+ pip install esrf-data-compressor
45
+ ```
46
+
47
+ Once installed, the `compress-hdf5` command will be available in your `PATH`.
48
+
49
+ ### From Source (for development)
50
+
51
+ ```bash
52
+ git clone https://gitlab.esrf.fr/dau/esrf-data-compressor.git
53
+ cd esrf-data-compressor
54
+
55
+ # (Optional) Create & activate a virtual environment
56
+ python -m venv venv
57
+ source venv/bin/activate
58
+
59
+ # Install build dependencies & the package itself
60
+ pip install .
61
+ ```
62
+
63
+ ---
64
+
65
+ ## Documentation
66
+
67
+ Full documentation is available online:
68
+ [ESRF Data Compressor Docs](https://esrf-data-compressor.readthedocs.io/en/latest/index.html)
69
+
70
+ ## Contributing & Development
71
+
72
+ * **Clone** the repository:
73
+
74
+ ```bash
75
+ git clone https://gitlab.esrf.fr/dau/esrf-data-compressor.git
76
+ cd esrf-data-compressor
77
+ ```
78
+
79
+ * **Install** dependencies (in a virtual environment):
80
+
81
+ ```bash
82
+ python -m venv venv
83
+ source venv/bin/activate
84
+ pip install -e "[dev]"
85
+ ```
86
+
87
+ * **Run tests** with coverage:
88
+
89
+ ```bash
90
+ pytest -v --cov=esrf_data_compressor --cov-report=term-missing
91
+ ```
92
+
93
+ * **Style:**
94
+
95
+ * `black .`
96
+ * `flake8 .`
97
+ * `ruff .`
98
+
99
+ * **Build docs** (Sphinx + pydata theme):
100
+
101
+ ```bash
102
+ sphinx-build doc build/html
103
+ ```
104
+
105
+ ---
106
+
107
+ ## License
108
+
109
+ This project is licensed under the [MIT License](LICENSE). See `LICENSE` for full text.
110
+
111
+ ---
112
+
113
+ ## Changelog
114
+
115
+ All noteworthy changes are recorded in [CHANGELOG.md](CHANGELOG.md). Version 0.1.0 marks the first public release with:
116
+
117
+ * Initial implementation of Blosc2 + Grok (JPEG2000) compression for 3D HDF5 datasets.
118
+ * SSIM-based integrity check (first & last slice).
119
+ * Four-command CLI (`list`, `compress`, `check`, `overwrite`).
120
+ * Parallelism with worker×thread auto-factoring.
121
+
122
+ For more details, see the full history in [CHANGELOG.md](CHANGELOG.md).
@@ -0,0 +1,72 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "esrf-data-compressor"
7
+ version = "0.1.0"
8
+ authors = [{ name = "ESRF", email = "dau-pydev@esrf.fr" }]
9
+ description = "A library to compress ESRF data and reduce their footprint"
10
+ readme = { file = "README.md", content-type = "text/markdown" }
11
+ license = { file = "LICENSE" }
12
+ keywords = ["ESRF", "pathlib"]
13
+ classifiers = [
14
+ "Intended Audience :: Science/Research",
15
+ "License :: OSI Approved :: MIT License",
16
+ "Programming Language :: Python :: 3",
17
+ "Programming Language :: Python :: 3.10",
18
+ "Programming Language :: Python :: 3.11",
19
+ "Programming Language :: Python :: 3.12",
20
+ "Programming Language :: Python :: 3.13",
21
+ ]
22
+ requires-python = ">=3.10" #because of blosc2
23
+ dependencies = [
24
+ "h5py",
25
+ "hdf5plugin",
26
+ "blosc2-grok",
27
+ "scikit-image",
28
+ "tqdm"
29
+ ]
30
+
31
+ [project.urls]
32
+ Homepage = "https://gitlab.esrf.fr/dau/esrf-data-compressor"
33
+ Documentation = "https://esrf-data-compressor.readthedocs.io/"
34
+ Repository = "https://gitlab.esrf.fr/dau/esrf-data-compressor"
35
+ Issues = "https://gitlab.esrf.fr/dau/esrf-data-compressor/issues"
36
+ Changelog = "https://gitlab.esrf.fr/dau/esrf-data-compressor/-/blob/main/CHANGELOG.md"
37
+
38
+ [project.optional-dependencies]
39
+ test = [
40
+ "pytest >=7.0"
41
+ ]
42
+ dev = [
43
+ "esrf-data-compressor[test]",
44
+ "black >=22",
45
+ "flake8 >=4.0",
46
+ "ruff"
47
+ ]
48
+ doc = [
49
+ "sphinx >=6.0",
50
+ "sphinxcontrib-mermaid >=0.7",
51
+ "sphinx-autodoc-typehints >=1.16",
52
+ "pydata-sphinx-theme"
53
+ ]
54
+
55
+ [project.scripts]
56
+ compress-hdf5 = "esrf_data_compressor.cli:main"
57
+
58
+ [tool.setuptools]
59
+ package-dir = { "" = "src" }
60
+
61
+ [tool.setuptools.packages.find]
62
+ where = ["src"]
63
+
64
+ [tool.setuptools.package-data]
65
+ "*" = ["*.json", "*.ipynb"]
66
+
67
+ [tool.coverage.run]
68
+ omit = ["*/tests/*"]
69
+
70
+ [tool.isort]
71
+ profile = "black"
72
+ force_single_line = true
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,76 @@
1
+ import os
2
+ from concurrent.futures import ProcessPoolExecutor, as_completed
3
+ from tqdm import tqdm
4
+
5
+ from esrf_data_compressor.checker.ssim import compute_ssim_for_file_pair
6
+
7
+
8
+ def run_ssim_check(raw_files: list[str], method: str, report_path: str) -> None:
9
+ """
10
+ Given a list of raw HDF5 file paths, partitions into:
11
+ to_check → those with a sibling <stem>_<method>.h5
12
+ missing → those without one
13
+
14
+ Writes a report to `report_path`:
15
+ - '=== NOT COMPRESSED FILES ===' listing each missing
16
+ - then for each to_check pair, computes SSIM in parallel and appends
17
+ per‐dataset SSIM lines under '=== <stem> ===' with full paths
18
+ """
19
+ to_check: list[tuple[str, str]] = []
20
+ missing: list[str] = []
21
+
22
+ # partition
23
+ for orig in raw_files:
24
+ dirname, fname = os.path.dirname(orig), os.path.basename(orig)
25
+ stem, _ = os.path.splitext(fname)
26
+ comp_path = os.path.join(dirname, f"{stem}_{method}.h5")
27
+ if os.path.exists(comp_path):
28
+ to_check.append((orig, comp_path))
29
+ else:
30
+ missing.append(orig)
31
+ print(
32
+ f"Found {len(to_check)} file pairs to check, {len(missing)} missing compressed files."
33
+ )
34
+
35
+ # write report
36
+ with open(report_path, "w") as rpt:
37
+ if missing:
38
+ rpt.write("=== NOT COMPRESSED FILES ===\n")
39
+ for orig in missing:
40
+ rpt.write(f"{orig} :: NO COMPRESSED DATASET FOUND\n")
41
+ rpt.write("\n")
42
+
43
+ if not to_check:
44
+ rpt.write("No file pairs to check (no compressed siblings found).\n")
45
+ return
46
+
47
+ # run SSIM in parallel
48
+ n_workers = min(len(to_check), os.cpu_count() or 1)
49
+ with ProcessPoolExecutor(max_workers=n_workers) as exe:
50
+ futures = {
51
+ exe.submit(compute_ssim_for_file_pair, orig, comp): (orig, comp)
52
+ for orig, comp in to_check
53
+ }
54
+
55
+ for fut in tqdm(
56
+ as_completed(futures),
57
+ total=len(futures),
58
+ desc="Checking SSIM (files)",
59
+ unit="file",
60
+ ):
61
+ orig, comp = futures[fut]
62
+ fname = os.path.basename(orig)
63
+ comp_name = os.path.basename(comp)
64
+ tqdm.write(f"Checking file: {fname} ↔ {comp_name}")
65
+ try:
66
+ # get results
67
+ basename, lines = fut.result()
68
+ # write section with both file paths
69
+ rpt.write(f"=== {basename} ===\n")
70
+ rpt.write(f"Uncompressed file: {orig}\n")
71
+ rpt.write(f"Compressed file: {comp}\n")
72
+ for line in lines:
73
+ rpt.write(line + "\n")
74
+ rpt.write("\n")
75
+ except Exception as e:
76
+ rpt.write(f"{orig} :: ERROR processing file pair: {e}\n\n")
@@ -0,0 +1,87 @@
1
+ # src/esrf_data_compressor/checker/ssim.py
2
+
3
+ import os
4
+ import numpy as np
5
+ import h5py
6
+ from skimage.metrics import structural_similarity as ssim
7
+
8
+
9
+ def _select_win_size(H: int, W: int) -> int:
10
+ """
11
+ Choose an odd, valid window size for SSIM given slice dimensions H×W.
12
+ win_size = min(H, W, 7), made odd, at least 3.
13
+ """
14
+ win = min(H, W, 7)
15
+ if win % 2 == 0:
16
+ win -= 1
17
+ return max(win, 3)
18
+
19
+
20
+ def compute_ssim_for_dataset_pair(
21
+ orig_path: str, comp_path: str, dataset_relpath: str
22
+ ) -> tuple[float, float]:
23
+ """
24
+ Given two HDF5 files and the relative 3D dataset path (e.g., 'entry_0000/ESRF-ID11/marana/data'),
25
+ compute SSIM on the first (z=0) and last (z=Z-1) slices.
26
+ Returns (ssim_first, ssim_last). If a slice is constant, SSIM = 1.0.
27
+ """
28
+ with h5py.File(orig_path, "r") as fo, h5py.File(comp_path, "r") as fc:
29
+ ds_o = fo[dataset_relpath]
30
+ ds_c = fc[dataset_relpath]
31
+
32
+ # Ensure both datasets are 3D
33
+ if ds_o.ndim != 3 or ds_c.ndim != 3:
34
+ raise IndexError(
35
+ f"Dataset '{dataset_relpath}' is not 3D (orig: {ds_o.ndim}D, comp: {ds_c.ndim}D)"
36
+ )
37
+
38
+ first_o = ds_o[0].astype(np.float64)
39
+ last_o = ds_o[-1].astype(np.float64)
40
+ first_c = ds_c[0].astype(np.float64)
41
+ last_c = ds_c[-1].astype(np.float64)
42
+
43
+ H, W = first_o.shape
44
+ win = _select_win_size(H, W)
45
+
46
+ def _slice_ssim(a: np.ndarray, b: np.ndarray) -> float:
47
+ amin, amax = a.min(), a.max()
48
+ if amax == amin:
49
+ return 1.0
50
+ dr = amax - amin
51
+ return ssim(a, b, data_range=dr, win_size=win)
52
+
53
+ s0 = _slice_ssim(first_o, first_c)
54
+ s1 = _slice_ssim(last_o, last_c)
55
+ return s0, s1
56
+
57
+
58
+ def compute_ssim_for_file_pair(orig_path: str, comp_path: str) -> tuple[str, list[str]]:
59
+ """
60
+ Compute SSIM for every 3D dataset under `orig_path` vs. `comp_path`.
61
+ Returns (basename, [report_lines…]), where each line is either:
62
+ "<dataset_relpath>: SSIM_first=… SSIM_last=…" or an error message.
63
+ """
64
+ basename = os.path.basename(orig_path)
65
+ report_lines: list[str] = []
66
+
67
+ with h5py.File(orig_path, "r") as fo:
68
+ ds_paths: list[str] = []
69
+
70
+ def visitor(name, obj):
71
+ if isinstance(obj, h5py.Dataset) and obj.ndim == 3:
72
+ ds_paths.append(name)
73
+
74
+ fo.visititems(visitor)
75
+
76
+ if not ds_paths:
77
+ report_lines.append(f"No 3D datasets found in {basename}")
78
+ return basename, report_lines
79
+
80
+ for ds in ds_paths:
81
+ try:
82
+ s0, s1 = compute_ssim_for_dataset_pair(orig_path, comp_path, ds)
83
+ report_lines.append(f"{ds}: SSIM_first={s0:.4f} SSIM_last={s1:.4f}")
84
+ except Exception as e:
85
+ report_lines.append(f"{ds}: ERROR computing SSIM: {e}")
86
+
87
+ return basename, report_lines