esrf-data-compressor 0.1.1__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. {esrf_data_compressor-0.1.1/src/esrf_data_compressor.egg-info → esrf_data_compressor-0.1.2}/PKG-INFO +1 -1
  2. {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/pyproject.toml +1 -1
  3. {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/cli.py +36 -17
  4. {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/compressors/base.py +67 -21
  5. {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/tests/test_cli.py +40 -4
  6. {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2/src/esrf_data_compressor.egg-info}/PKG-INFO +1 -1
  7. {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/LICENSE +0 -0
  8. {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/README.md +0 -0
  9. {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/setup.cfg +0 -0
  10. {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/__init__.py +0 -0
  11. {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/checker/run_check.py +0 -0
  12. {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/checker/ssim.py +0 -0
  13. {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/compressors/__init__.py +0 -0
  14. {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/compressors/jp2k.py +0 -0
  15. {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/finder/finder.py +0 -0
  16. {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/tests/__init__.py +0 -0
  17. {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/tests/test_finder.py +0 -0
  18. {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/tests/test_hdf5_helpers.py +0 -0
  19. {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/tests/test_jp2k.py +0 -0
  20. {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/tests/test_run_check.py +0 -0
  21. {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/tests/test_ssim.py +0 -0
  22. {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/tests/test_utils.py +0 -0
  23. {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/utils/hdf5_helpers.py +0 -0
  24. {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor/utils/utils.py +0 -0
  25. {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor.egg-info/SOURCES.txt +0 -0
  26. {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor.egg-info/dependency_links.txt +0 -0
  27. {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor.egg-info/entry_points.txt +0 -0
  28. {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor.egg-info/requires.txt +0 -0
  29. {esrf_data_compressor-0.1.1 → esrf_data_compressor-0.1.2}/src/esrf_data_compressor.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: esrf-data-compressor
3
- Version: 0.1.1
3
+ Version: 0.1.2
4
4
  Summary: A library to compress ESRF data and reduce their footprint
5
5
  Author-email: ESRF <dau-pydev@esrf.fr>
6
6
  License: MIT License
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "esrf-data-compressor"
7
- version = "0.1.1"
7
+ version = "0.1.2"
8
8
  authors = [{ name = "ESRF", email = "dau-pydev@esrf.fr" }]
9
9
  description = "A library to compress ESRF data and reduce their footprint"
10
10
  readme = { file = "README.md", content-type = "text/markdown" }
@@ -46,7 +46,7 @@ def do_compress(args):
46
46
  exit_with_error(f"Failed to read report '{report}': {e}")
47
47
 
48
48
  if not files:
49
- print("Nothing to compress (TO COMPRESS list is empty).")
49
+ print("Nothing to compress (TO COMPRESS list is empty).")
50
50
  return
51
51
 
52
52
  print(
@@ -65,10 +65,9 @@ def do_check(args):
65
65
  exit_with_error(f"Failed to read report '{report}': {e}")
66
66
 
67
67
  if not files:
68
- print("Nothing to check (TO COMPRESS list is empty).")
68
+ print("Nothing to check (TO COMPRESS list is empty).")
69
69
  return
70
70
 
71
- # We reuse run_ssim_check in its 3‑arg form (raw_files, method, report_path)
72
71
  report_fname = f"{os.path.splitext(report)[0]}_{args.method}_ssim_report.txt"
73
72
  report_path = os.path.abspath(report_fname)
74
73
 
@@ -81,9 +80,6 @@ def do_check(args):
81
80
 
82
81
 
83
82
  def do_overwrite(args):
84
- """
85
- Overwrite TO COMPRESS files with their original sources.
86
- """
87
83
  report = args.input or "file_list.txt"
88
84
  try:
89
85
  files = parse_report(report)
@@ -91,13 +87,26 @@ def do_overwrite(args):
91
87
  exit_with_error(f"Failed to read report '{report}': {e}")
92
88
 
93
89
  if not files:
94
- print("Nothing to overwrite (TO COMPRESS list is empty).")
90
+ print("Nothing to process (TO COMPRESS list is empty).")
95
91
  return
96
92
 
97
- print(f"Overwriting {len(files)} file(s) from '{report}' …")
98
93
  mgr = CompressorManager()
94
+
95
+ if args.final:
96
+ print(f"Finalizing overwrite for {len(files)} file(s) from '{report}' …")
97
+ mgr.remove_backups(files)
98
+ print("Finalize step complete.\n")
99
+ return
100
+
101
+ if args.undo:
102
+ print(f"Undoing overwrite for {len(files)} file(s) from '{report}' …")
103
+ mgr.restore_backups(files)
104
+ print("Undo step complete.\n")
105
+ return
106
+
107
+ print(f"Overwriting {len(files)} file(s) from '{report}' …")
99
108
  mgr.overwrite_files(files)
100
- print("Overwrite complete.\n")
109
+ print("Overwrite complete (backups kept).\n")
101
110
 
102
111
 
103
112
  def main():
@@ -106,7 +115,6 @@ def main():
106
115
  )
107
116
  sub = parser.add_subparsers(dest="command", required=True)
108
117
 
109
- # list
110
118
  p = sub.add_parser("list", help="Report VDS sources → TO COMPRESS vs REMAINING")
111
119
  p.add_argument("experiment", help="Experiment ID")
112
120
  p.add_argument("beamline", nargs="?", help="Optional beamline")
@@ -115,13 +123,12 @@ def main():
115
123
  p.add_argument(
116
124
  "--filter",
117
125
  metavar="KEY:VAL[,KEY2:VAL2...]",
118
- help="Datasetlevel attribute substring filters",
126
+ help="Dataset-level attribute substring filters",
119
127
  )
120
128
  p.add_argument("--output", help="Report file (default = file_list.txt)")
121
129
  p.set_defaults(func=do_list)
122
130
 
123
- # compress
124
- p = sub.add_parser("compress", help="Compress only the TO COMPRESS files")
131
+ p = sub.add_parser("compress", help="Compress only the TO COMPRESS files")
125
132
  p.add_argument(
126
133
  "--input",
127
134
  "-i",
@@ -137,8 +144,7 @@ def main():
137
144
  )
138
145
  p.set_defaults(func=do_compress)
139
146
 
140
- # check
141
- p = sub.add_parser("check", help="Generate SSIM report for TO COMPRESS files")
147
+ p = sub.add_parser("check", help="Generate SSIM report for TO COMPRESS files")
142
148
  p.add_argument(
143
149
  "--input", "-i", help="Report file to read (default = file_list.txt)"
144
150
  )
@@ -147,11 +153,24 @@ def main():
147
153
  )
148
154
  p.set_defaults(func=do_check)
149
155
 
150
- # overwrite
151
- p = sub.add_parser("overwrite", help="Overwrite only TO COMPRESS files")
156
+ p = sub.add_parser(
157
+ "overwrite",
158
+ help="Swap in compressed files and keep backups; with --final or --undo, perform cleanup/restore only.",
159
+ )
152
160
  p.add_argument(
153
161
  "--input", "-i", help="Report file to read (default = file_list.txt)"
154
162
  )
163
+ group = p.add_mutually_exclusive_group()
164
+ group.add_argument(
165
+ "--final",
166
+ action="store_true",
167
+ help="Cleanup only: delete existing *.h5.bak backups after confirmation (no overwrite).",
168
+ )
169
+ group.add_argument(
170
+ "--undo",
171
+ action="store_true",
172
+ help="Restore only: move <file>.h5.bak back to <file>.h5 and preserve the current file as <file>_<method>.h5 when needed.",
173
+ )
155
174
  p.set_defaults(func=do_overwrite)
156
175
 
157
176
  args = parser.parse_args()
@@ -34,24 +34,14 @@ class CompressorManager:
34
34
  self, workers: int | None = None, cratio: int = 10, method: str = "jp2k"
35
35
  ):
36
36
  total_cores = os.cpu_count() or 1
37
-
38
- # Determine default threads per worker (4, or fewer if total_cores < 4)
39
- if total_cores >= 4:
40
- default_nthreads = 4
41
- else:
42
- default_nthreads = 1
43
-
44
- # Default worker count
37
+ default_nthreads = 4 if total_cores >= 4 else 1
45
38
  default_workers = max(1, total_cores // default_nthreads)
46
39
 
47
40
  if workers is None:
48
- # Use default workers and default_nthreads
49
41
  w = default_workers
50
42
  nthreads = default_nthreads
51
43
  else:
52
- # Cap workers to total_cores
53
44
  w = min(workers, total_cores)
54
- # Recompute threads per worker so that (w * nthreads) ≤ total_cores, up to 4
55
45
  possible = total_cores // w
56
46
  nthreads = min(possible, 4) if possible >= 1 else 1
57
47
 
@@ -60,7 +50,6 @@ class CompressorManager:
60
50
  self.cratio = cratio
61
51
  self.method = method
62
52
 
63
- # Instantiate compressor based on method
64
53
  if self.method == "jp2k":
65
54
  self.compressor = JP2KCompressorWrapper(
66
55
  cratio=cratio, nthreads=self.nthreads
@@ -122,8 +111,7 @@ class CompressorManager:
122
111
  except Exception as e:
123
112
  print(f"Failed to compress '{pth}': {e}")
124
113
 
125
- t1 = time.time()
126
- elapsed = t1 - t0
114
+ elapsed = time.time() - t0
127
115
  total_mb = total_bytes / (1024 * 1024)
128
116
  rate_mb_s = total_mb / elapsed if elapsed > 0 else float("inf")
129
117
  print(f"\nTotal elapsed time: {elapsed:.3f}s")
@@ -138,7 +126,6 @@ class CompressorManager:
138
126
 
139
127
  After processing all files, removes the backup .h5.bak files.
140
128
  """
141
- backups = []
142
129
  for ipath in file_list:
143
130
  if not ipath.lower().endswith(".h5"):
144
131
  continue
@@ -151,17 +138,76 @@ class CompressorManager:
151
138
  try:
152
139
  os.replace(ipath, backup)
153
140
  os.replace(compressed_path, ipath)
154
- backups.append(backup)
155
141
  print(f"Overwritten '{ipath}' (backup at '{backup}').")
156
142
  except Exception as e:
157
143
  print(f"ERROR overwriting '{ipath}': {e}")
158
144
  else:
159
145
  print(f"SKIP (no compressed file): {ipath}")
160
146
 
161
- # Remove all backup files
162
- for backup in backups:
147
+ def remove_backups(self, file_list: list[str]) -> None:
148
+ candidates = {p + ".bak" for p in file_list if p.lower().endswith(".h5")}
149
+ backups = [b for b in candidates if os.path.exists(b)]
150
+ if not backups:
151
+ print("No backup files to remove.")
152
+ return
153
+
154
+ total_bytes = 0
155
+ for b in backups:
156
+ try:
157
+ total_bytes += os.path.getsize(b)
158
+ except OSError:
159
+ pass
160
+ total_mb = total_bytes / (1024 * 1024)
161
+
162
+ print(
163
+ f"About to remove {len(backups)} backup file(s), ~{total_mb:.2f} MB total."
164
+ )
165
+ ans = input("Proceed? [y/N]: ").strip().lower()
166
+ if ans not in ("y", "yes"):
167
+ print("Backups kept.")
168
+ return
169
+
170
+ removed = 0
171
+ for b in backups:
163
172
  try:
164
- os.remove(backup)
165
- print(f"Deleted backup '{backup}'.")
173
+ os.remove(b)
174
+ removed += 1
166
175
  except Exception as e:
167
- print(f"ERROR deleting backup '{backup}': {e}")
176
+ print(f"ERROR deleting backup '{b}': {e}")
177
+
178
+ print(f"Deleted {removed} backup file(s).")
179
+
180
+ def restore_backups(self, file_list: list[str]) -> None:
181
+ restored = 0
182
+ preserved = 0
183
+ for ipath in file_list:
184
+ if not ipath.lower().endswith(".h5"):
185
+ continue
186
+
187
+ base, _ = os.path.splitext(ipath)
188
+ backup = ipath + ".bak"
189
+ method_path = f"{base}_{self.method}.h5"
190
+
191
+ if not os.path.exists(backup):
192
+ print(f"SKIP (no backup): {ipath}")
193
+ continue
194
+
195
+ if os.path.exists(ipath) and not os.path.exists(method_path):
196
+ try:
197
+ os.replace(ipath, method_path)
198
+ preserved += 1
199
+ print(f"Preserved current file to '{method_path}'.")
200
+ except Exception as e:
201
+ print(f"ERROR preserving current '{ipath}' to '{method_path}': {e}")
202
+ continue
203
+
204
+ try:
205
+ os.replace(backup, ipath)
206
+ restored += 1
207
+ print(f"Restored '{ipath}' from backup.")
208
+ except Exception as e:
209
+ print(f"ERROR restoring '{ipath}' from '{backup}': {e}")
210
+
211
+ print(
212
+ f"Restore complete. Restored: {restored}, preserved compressed copies: {preserved}."
213
+ )
@@ -119,12 +119,14 @@ def test_commands_with_non_empty_list(
119
119
  for f in files:
120
120
  comp = tmp_path / f.replace(".h5", "_jp2k.h5")
121
121
  assert comp.exists()
122
- # For overwrite, verify original replaced and backup removed
122
+ # For overwrite, verify original replaced and backup KEPT
123
123
  if cmd == "overwrite":
124
124
  # f1 was overwritten, f2 was skipped
125
125
  assert (tmp_path / "f1.h5").exists()
126
- # no backup remains
127
- assert not (tmp_path / "f1.h5.bak").exists()
126
+ # backup remains by default
127
+ assert (tmp_path / "f1.h5.bak").exists()
128
+ # f2 had no compressed sibling → no backup
129
+ assert not (tmp_path / "f2.h5.bak").exists()
128
130
 
129
131
 
130
132
  def test_list_success_and_output_file(argv_runner, monkeypatch, capsys, tmp_path):
@@ -150,7 +152,7 @@ def test_list_success_and_output_file(argv_runner, monkeypatch, capsys, tmp_path
150
152
  [
151
153
  ("compress", "Nothing to compress"),
152
154
  ("check", "Nothing to check"),
153
- ("overwrite", "Nothing to overwrite"),
155
+ ("overwrite", "Nothing to process"),
154
156
  ],
155
157
  )
156
158
  def test_empty_reports(argv_runner, monkeypatch, capsys, cmd, empty_msg, tmp_path):
@@ -174,3 +176,37 @@ def test_check_success_writes_report(argv_runner, monkeypatch, capsys, tmp_path)
174
176
  argv_runner(["check", "-i", str(report), "--method", "jp2k"])
175
177
  out = capsys.readouterr().out
176
178
  assert "SSIM report written to" in out
179
+
180
+
181
+ def test_overwrite_final_deletes_backups(argv_runner, monkeypatch, capsys, tmp_path):
182
+ # Prepare a file and its backup
183
+ (tmp_path / "f1.h5").write_text("current")
184
+ (tmp_path / "f1.h5.bak").write_text("backup")
185
+ # parse_report returns the original .h5 path(s)
186
+ monkeypatch.setattr(cli, "parse_report", lambda rpt: [str(tmp_path / "f1.h5")])
187
+ # auto-confirm deletion
188
+ monkeypatch.setattr("builtins.input", lambda *a, **k: "y")
189
+
190
+ argv_runner(["overwrite", "-i", "report.txt", "--final"])
191
+ out = capsys.readouterr().out
192
+ assert "About to remove" in out
193
+ assert not (tmp_path / "f1.h5.bak").exists()
194
+
195
+
196
+ def test_overwrite_undo_restores_and_preserves(
197
+ argv_runner, monkeypatch, capsys, tmp_path
198
+ ):
199
+ # Start with current file and a backup; no <method> file yet
200
+ (tmp_path / "f1.h5").write_text("CUR")
201
+ (tmp_path / "f1.h5.bak").write_text("BAK")
202
+ monkeypatch.setattr(cli, "parse_report", lambda rpt: [str(tmp_path / "f1.h5")])
203
+
204
+ argv_runner(["overwrite", "-i", "report.txt", "--undo"])
205
+ out = capsys.readouterr().out
206
+ assert "Undoing overwrite" in out
207
+ # Backup should have been restored to f1.h5
208
+ assert (tmp_path / "f1.h5").read_text() == "BAK"
209
+ # Previous current should have been preserved as f1_jp2k.h5
210
+ assert (tmp_path / "f1_jp2k.h5").read_text() == "CUR"
211
+ # .bak should be gone after restore (moved)
212
+ assert not (tmp_path / "f1.h5.bak").exists()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: esrf-data-compressor
3
- Version: 0.1.1
3
+ Version: 0.1.2
4
4
  Summary: A library to compress ESRF data and reduce their footprint
5
5
  Author-email: ESRF <dau-pydev@esrf.fr>
6
6
  License: MIT License