esrf-data-compressor 0.1.1__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {esrf_data_compressor-0.1.1/src/esrf_data_compressor.egg-info → esrf_data_compressor-0.1.2}/PKG-INFO +1 -1
- {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/pyproject.toml +1 -1
- {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/cli.py +36 -17
- {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/compressors/base.py +67 -21
- {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/tests/test_cli.py +40 -4
- {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2/src/esrf_data_compressor.egg-info}/PKG-INFO +1 -1
- {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/LICENSE +0 -0
- {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/README.md +0 -0
- {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/setup.cfg +0 -0
- {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/__init__.py +0 -0
- {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/checker/run_check.py +0 -0
- {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/checker/ssim.py +0 -0
- {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/compressors/__init__.py +0 -0
- {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/compressors/jp2k.py +0 -0
- {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/finder/finder.py +0 -0
- {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/tests/__init__.py +0 -0
- {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/tests/test_finder.py +0 -0
- {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/tests/test_hdf5_helpers.py +0 -0
- {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/tests/test_jp2k.py +0 -0
- {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/tests/test_run_check.py +0 -0
- {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/tests/test_ssim.py +0 -0
- {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/tests/test_utils.py +0 -0
- {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/utils/hdf5_helpers.py +0 -0
- {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/utils/utils.py +0 -0
- {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor.egg-info/SOURCES.txt +0 -0
- {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor.egg-info/dependency_links.txt +0 -0
- {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor.egg-info/entry_points.txt +0 -0
- {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor.egg-info/requires.txt +0 -0
- {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor.egg-info/top_level.txt +0 -0
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "esrf-data-compressor"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.2"
|
|
8
8
|
authors = [{ name = "ESRF", email = "dau-pydev@esrf.fr" }]
|
|
9
9
|
description = "A library to compress ESRF data and reduce their footprint"
|
|
10
10
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
@@ -46,7 +46,7 @@ def do_compress(args):
|
|
|
46
46
|
exit_with_error(f"Failed to read report '{report}': {e}")
|
|
47
47
|
|
|
48
48
|
if not files:
|
|
49
|
-
print("Nothing to compress (TO
|
|
49
|
+
print("Nothing to compress (TO COMPRESS list is empty).")
|
|
50
50
|
return
|
|
51
51
|
|
|
52
52
|
print(
|
|
@@ -65,10 +65,9 @@ def do_check(args):
|
|
|
65
65
|
exit_with_error(f"Failed to read report '{report}': {e}")
|
|
66
66
|
|
|
67
67
|
if not files:
|
|
68
|
-
print("Nothing to check (TO
|
|
68
|
+
print("Nothing to check (TO COMPRESS list is empty).")
|
|
69
69
|
return
|
|
70
70
|
|
|
71
|
-
# We reuse run_ssim_check in its 3‑arg form (raw_files, method, report_path)
|
|
72
71
|
report_fname = f"{os.path.splitext(report)[0]}_{args.method}_ssim_report.txt"
|
|
73
72
|
report_path = os.path.abspath(report_fname)
|
|
74
73
|
|
|
@@ -81,9 +80,6 @@ def do_check(args):
|
|
|
81
80
|
|
|
82
81
|
|
|
83
82
|
def do_overwrite(args):
|
|
84
|
-
"""
|
|
85
|
-
Overwrite TO COMPRESS files with their original sources.
|
|
86
|
-
"""
|
|
87
83
|
report = args.input or "file_list.txt"
|
|
88
84
|
try:
|
|
89
85
|
files = parse_report(report)
|
|
@@ -91,13 +87,26 @@ def do_overwrite(args):
|
|
|
91
87
|
exit_with_error(f"Failed to read report '{report}': {e}")
|
|
92
88
|
|
|
93
89
|
if not files:
|
|
94
|
-
print("Nothing to
|
|
90
|
+
print("Nothing to process (TO COMPRESS list is empty).")
|
|
95
91
|
return
|
|
96
92
|
|
|
97
|
-
print(f"Overwriting {len(files)} file(s) from '{report}' …")
|
|
98
93
|
mgr = CompressorManager()
|
|
94
|
+
|
|
95
|
+
if args.final:
|
|
96
|
+
print(f"Finalizing overwrite for {len(files)} file(s) from '{report}' …")
|
|
97
|
+
mgr.remove_backups(files)
|
|
98
|
+
print("Finalize step complete.\n")
|
|
99
|
+
return
|
|
100
|
+
|
|
101
|
+
if args.undo:
|
|
102
|
+
print(f"Undoing overwrite for {len(files)} file(s) from '{report}' …")
|
|
103
|
+
mgr.restore_backups(files)
|
|
104
|
+
print("Undo step complete.\n")
|
|
105
|
+
return
|
|
106
|
+
|
|
107
|
+
print(f"Overwriting {len(files)} file(s) from '{report}' …")
|
|
99
108
|
mgr.overwrite_files(files)
|
|
100
|
-
print("Overwrite complete.\n")
|
|
109
|
+
print("Overwrite complete (backups kept).\n")
|
|
101
110
|
|
|
102
111
|
|
|
103
112
|
def main():
|
|
@@ -106,7 +115,6 @@ def main():
|
|
|
106
115
|
)
|
|
107
116
|
sub = parser.add_subparsers(dest="command", required=True)
|
|
108
117
|
|
|
109
|
-
# list
|
|
110
118
|
p = sub.add_parser("list", help="Report VDS sources → TO COMPRESS vs REMAINING")
|
|
111
119
|
p.add_argument("experiment", help="Experiment ID")
|
|
112
120
|
p.add_argument("beamline", nargs="?", help="Optional beamline")
|
|
@@ -115,13 +123,12 @@ def main():
|
|
|
115
123
|
p.add_argument(
|
|
116
124
|
"--filter",
|
|
117
125
|
metavar="KEY:VAL[,KEY2:VAL2...]",
|
|
118
|
-
help="Dataset
|
|
126
|
+
help="Dataset-level attribute substring filters",
|
|
119
127
|
)
|
|
120
128
|
p.add_argument("--output", help="Report file (default = file_list.txt)")
|
|
121
129
|
p.set_defaults(func=do_list)
|
|
122
130
|
|
|
123
|
-
|
|
124
|
-
p = sub.add_parser("compress", help="Compress only the TO COMPRESS files")
|
|
131
|
+
p = sub.add_parser("compress", help="Compress only the TO COMPRESS files")
|
|
125
132
|
p.add_argument(
|
|
126
133
|
"--input",
|
|
127
134
|
"-i",
|
|
@@ -137,8 +144,7 @@ def main():
|
|
|
137
144
|
)
|
|
138
145
|
p.set_defaults(func=do_compress)
|
|
139
146
|
|
|
140
|
-
|
|
141
|
-
p = sub.add_parser("check", help="Generate SSIM report for TO COMPRESS files")
|
|
147
|
+
p = sub.add_parser("check", help="Generate SSIM report for TO COMPRESS files")
|
|
142
148
|
p.add_argument(
|
|
143
149
|
"--input", "-i", help="Report file to read (default = file_list.txt)"
|
|
144
150
|
)
|
|
@@ -147,11 +153,24 @@ def main():
|
|
|
147
153
|
)
|
|
148
154
|
p.set_defaults(func=do_check)
|
|
149
155
|
|
|
150
|
-
|
|
151
|
-
|
|
156
|
+
p = sub.add_parser(
|
|
157
|
+
"overwrite",
|
|
158
|
+
help="Swap in compressed files and keep backups; with --final or --undo, perform cleanup/restore only.",
|
|
159
|
+
)
|
|
152
160
|
p.add_argument(
|
|
153
161
|
"--input", "-i", help="Report file to read (default = file_list.txt)"
|
|
154
162
|
)
|
|
163
|
+
group = p.add_mutually_exclusive_group()
|
|
164
|
+
group.add_argument(
|
|
165
|
+
"--final",
|
|
166
|
+
action="store_true",
|
|
167
|
+
help="Cleanup only: delete existing *.h5.bak backups after confirmation (no overwrite).",
|
|
168
|
+
)
|
|
169
|
+
group.add_argument(
|
|
170
|
+
"--undo",
|
|
171
|
+
action="store_true",
|
|
172
|
+
help="Restore only: move <file>.h5.bak back to <file>.h5 and preserve the current file as <file>_<method>.h5 when needed.",
|
|
173
|
+
)
|
|
155
174
|
p.set_defaults(func=do_overwrite)
|
|
156
175
|
|
|
157
176
|
args = parser.parse_args()
|
|
@@ -34,24 +34,14 @@ class CompressorManager:
|
|
|
34
34
|
self, workers: int | None = None, cratio: int = 10, method: str = "jp2k"
|
|
35
35
|
):
|
|
36
36
|
total_cores = os.cpu_count() or 1
|
|
37
|
-
|
|
38
|
-
# Determine default threads per worker (4, or fewer if total_cores < 4)
|
|
39
|
-
if total_cores >= 4:
|
|
40
|
-
default_nthreads = 4
|
|
41
|
-
else:
|
|
42
|
-
default_nthreads = 1
|
|
43
|
-
|
|
44
|
-
# Default worker count
|
|
37
|
+
default_nthreads = 4 if total_cores >= 4 else 1
|
|
45
38
|
default_workers = max(1, total_cores // default_nthreads)
|
|
46
39
|
|
|
47
40
|
if workers is None:
|
|
48
|
-
# Use default workers and default_nthreads
|
|
49
41
|
w = default_workers
|
|
50
42
|
nthreads = default_nthreads
|
|
51
43
|
else:
|
|
52
|
-
# Cap workers to total_cores
|
|
53
44
|
w = min(workers, total_cores)
|
|
54
|
-
# Recompute threads per worker so that (w * nthreads) ≤ total_cores, up to 4
|
|
55
45
|
possible = total_cores // w
|
|
56
46
|
nthreads = min(possible, 4) if possible >= 1 else 1
|
|
57
47
|
|
|
@@ -60,7 +50,6 @@ class CompressorManager:
|
|
|
60
50
|
self.cratio = cratio
|
|
61
51
|
self.method = method
|
|
62
52
|
|
|
63
|
-
# Instantiate compressor based on method
|
|
64
53
|
if self.method == "jp2k":
|
|
65
54
|
self.compressor = JP2KCompressorWrapper(
|
|
66
55
|
cratio=cratio, nthreads=self.nthreads
|
|
@@ -122,8 +111,7 @@ class CompressorManager:
|
|
|
122
111
|
except Exception as e:
|
|
123
112
|
print(f"Failed to compress '{pth}': {e}")
|
|
124
113
|
|
|
125
|
-
|
|
126
|
-
elapsed = t1 - t0
|
|
114
|
+
elapsed = time.time() - t0
|
|
127
115
|
total_mb = total_bytes / (1024 * 1024)
|
|
128
116
|
rate_mb_s = total_mb / elapsed if elapsed > 0 else float("inf")
|
|
129
117
|
print(f"\nTotal elapsed time: {elapsed:.3f}s")
|
|
@@ -138,7 +126,6 @@ class CompressorManager:
|
|
|
138
126
|
|
|
139
127
|
After processing all files, removes the backup .h5.bak files.
|
|
140
128
|
"""
|
|
141
|
-
backups = []
|
|
142
129
|
for ipath in file_list:
|
|
143
130
|
if not ipath.lower().endswith(".h5"):
|
|
144
131
|
continue
|
|
@@ -151,17 +138,76 @@ class CompressorManager:
|
|
|
151
138
|
try:
|
|
152
139
|
os.replace(ipath, backup)
|
|
153
140
|
os.replace(compressed_path, ipath)
|
|
154
|
-
backups.append(backup)
|
|
155
141
|
print(f"Overwritten '{ipath}' (backup at '{backup}').")
|
|
156
142
|
except Exception as e:
|
|
157
143
|
print(f"ERROR overwriting '{ipath}': {e}")
|
|
158
144
|
else:
|
|
159
145
|
print(f"SKIP (no compressed file): {ipath}")
|
|
160
146
|
|
|
161
|
-
|
|
162
|
-
for
|
|
147
|
+
def remove_backups(self, file_list: list[str]) -> None:
|
|
148
|
+
candidates = {p + ".bak" for p in file_list if p.lower().endswith(".h5")}
|
|
149
|
+
backups = [b for b in candidates if os.path.exists(b)]
|
|
150
|
+
if not backups:
|
|
151
|
+
print("No backup files to remove.")
|
|
152
|
+
return
|
|
153
|
+
|
|
154
|
+
total_bytes = 0
|
|
155
|
+
for b in backups:
|
|
156
|
+
try:
|
|
157
|
+
total_bytes += os.path.getsize(b)
|
|
158
|
+
except OSError:
|
|
159
|
+
pass
|
|
160
|
+
total_mb = total_bytes / (1024 * 1024)
|
|
161
|
+
|
|
162
|
+
print(
|
|
163
|
+
f"About to remove {len(backups)} backup file(s), ~{total_mb:.2f} MB total."
|
|
164
|
+
)
|
|
165
|
+
ans = input("Proceed? [y/N]: ").strip().lower()
|
|
166
|
+
if ans not in ("y", "yes"):
|
|
167
|
+
print("Backups kept.")
|
|
168
|
+
return
|
|
169
|
+
|
|
170
|
+
removed = 0
|
|
171
|
+
for b in backups:
|
|
163
172
|
try:
|
|
164
|
-
os.remove(
|
|
165
|
-
|
|
173
|
+
os.remove(b)
|
|
174
|
+
removed += 1
|
|
166
175
|
except Exception as e:
|
|
167
|
-
print(f"ERROR deleting backup '{
|
|
176
|
+
print(f"ERROR deleting backup '{b}': {e}")
|
|
177
|
+
|
|
178
|
+
print(f"Deleted {removed} backup file(s).")
|
|
179
|
+
|
|
180
|
+
def restore_backups(self, file_list: list[str]) -> None:
|
|
181
|
+
restored = 0
|
|
182
|
+
preserved = 0
|
|
183
|
+
for ipath in file_list:
|
|
184
|
+
if not ipath.lower().endswith(".h5"):
|
|
185
|
+
continue
|
|
186
|
+
|
|
187
|
+
base, _ = os.path.splitext(ipath)
|
|
188
|
+
backup = ipath + ".bak"
|
|
189
|
+
method_path = f"{base}_{self.method}.h5"
|
|
190
|
+
|
|
191
|
+
if not os.path.exists(backup):
|
|
192
|
+
print(f"SKIP (no backup): {ipath}")
|
|
193
|
+
continue
|
|
194
|
+
|
|
195
|
+
if os.path.exists(ipath) and not os.path.exists(method_path):
|
|
196
|
+
try:
|
|
197
|
+
os.replace(ipath, method_path)
|
|
198
|
+
preserved += 1
|
|
199
|
+
print(f"Preserved current file to '{method_path}'.")
|
|
200
|
+
except Exception as e:
|
|
201
|
+
print(f"ERROR preserving current '{ipath}' to '{method_path}': {e}")
|
|
202
|
+
continue
|
|
203
|
+
|
|
204
|
+
try:
|
|
205
|
+
os.replace(backup, ipath)
|
|
206
|
+
restored += 1
|
|
207
|
+
print(f"Restored '{ipath}' from backup.")
|
|
208
|
+
except Exception as e:
|
|
209
|
+
print(f"ERROR restoring '{ipath}' from '{backup}': {e}")
|
|
210
|
+
|
|
211
|
+
print(
|
|
212
|
+
f"Restore complete. Restored: {restored}, preserved compressed copies: {preserved}."
|
|
213
|
+
)
|
{esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/tests/test_cli.py
RENAMED
|
@@ -119,12 +119,14 @@ def test_commands_with_non_empty_list(
|
|
|
119
119
|
for f in files:
|
|
120
120
|
comp = tmp_path / f.replace(".h5", "_jp2k.h5")
|
|
121
121
|
assert comp.exists()
|
|
122
|
-
# For overwrite, verify original replaced and backup
|
|
122
|
+
# For overwrite, verify original replaced and backup KEPT
|
|
123
123
|
if cmd == "overwrite":
|
|
124
124
|
# f1 was overwritten, f2 was skipped
|
|
125
125
|
assert (tmp_path / "f1.h5").exists()
|
|
126
|
-
#
|
|
127
|
-
assert
|
|
126
|
+
# backup remains by default
|
|
127
|
+
assert (tmp_path / "f1.h5.bak").exists()
|
|
128
|
+
# f2 had no compressed sibling → no backup
|
|
129
|
+
assert not (tmp_path / "f2.h5.bak").exists()
|
|
128
130
|
|
|
129
131
|
|
|
130
132
|
def test_list_success_and_output_file(argv_runner, monkeypatch, capsys, tmp_path):
|
|
@@ -150,7 +152,7 @@ def test_list_success_and_output_file(argv_runner, monkeypatch, capsys, tmp_path
|
|
|
150
152
|
[
|
|
151
153
|
("compress", "Nothing to compress"),
|
|
152
154
|
("check", "Nothing to check"),
|
|
153
|
-
("overwrite", "Nothing to
|
|
155
|
+
("overwrite", "Nothing to process"),
|
|
154
156
|
],
|
|
155
157
|
)
|
|
156
158
|
def test_empty_reports(argv_runner, monkeypatch, capsys, cmd, empty_msg, tmp_path):
|
|
@@ -174,3 +176,37 @@ def test_check_success_writes_report(argv_runner, monkeypatch, capsys, tmp_path)
|
|
|
174
176
|
argv_runner(["check", "-i", str(report), "--method", "jp2k"])
|
|
175
177
|
out = capsys.readouterr().out
|
|
176
178
|
assert "SSIM report written to" in out
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def test_overwrite_final_deletes_backups(argv_runner, monkeypatch, capsys, tmp_path):
|
|
182
|
+
# Prepare a file and its backup
|
|
183
|
+
(tmp_path / "f1.h5").write_text("current")
|
|
184
|
+
(tmp_path / "f1.h5.bak").write_text("backup")
|
|
185
|
+
# parse_report returns the original .h5 path(s)
|
|
186
|
+
monkeypatch.setattr(cli, "parse_report", lambda rpt: [str(tmp_path / "f1.h5")])
|
|
187
|
+
# auto-confirm deletion
|
|
188
|
+
monkeypatch.setattr("builtins.input", lambda *a, **k: "y")
|
|
189
|
+
|
|
190
|
+
argv_runner(["overwrite", "-i", "report.txt", "--final"])
|
|
191
|
+
out = capsys.readouterr().out
|
|
192
|
+
assert "About to remove" in out
|
|
193
|
+
assert not (tmp_path / "f1.h5.bak").exists()
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def test_overwrite_undo_restores_and_preserves(
|
|
197
|
+
argv_runner, monkeypatch, capsys, tmp_path
|
|
198
|
+
):
|
|
199
|
+
# Start with current file and a backup; no <method> file yet
|
|
200
|
+
(tmp_path / "f1.h5").write_text("CUR")
|
|
201
|
+
(tmp_path / "f1.h5.bak").write_text("BAK")
|
|
202
|
+
monkeypatch.setattr(cli, "parse_report", lambda rpt: [str(tmp_path / "f1.h5")])
|
|
203
|
+
|
|
204
|
+
argv_runner(["overwrite", "-i", "report.txt", "--undo"])
|
|
205
|
+
out = capsys.readouterr().out
|
|
206
|
+
assert "Undoing overwrite" in out
|
|
207
|
+
# Backup should have been restored to f1.h5
|
|
208
|
+
assert (tmp_path / "f1.h5").read_text() == "BAK"
|
|
209
|
+
# Previous current should have been preserved as f1_jp2k.h5
|
|
210
|
+
assert (tmp_path / "f1_jp2k.h5").read_text() == "CUR"
|
|
211
|
+
# .bak should be gone after restore (moved)
|
|
212
|
+
assert not (tmp_path / "f1.h5.bak").exists()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
{esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/checker/ssim.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/finder/finder.py
RENAMED
|
File without changes
|
{esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/tests/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/utils/utils.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|