cdo-toolkit 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cdo_toolkit/__init__.py +47 -0
- cdo_toolkit/__main__.py +6 -0
- cdo_toolkit/api.py +573 -0
- cdo_toolkit/cli.py +166 -0
- cdo_toolkit/cmip.py +61 -0
- cdo_toolkit/constants.py +9 -0
- cdo_toolkit/errors.py +79 -0
- cdo_toolkit/memory.py +22 -0
- cdo_toolkit/paths.py +30 -0
- cdo_toolkit/pipeline.py +2230 -0
- cdo_toolkit/resolution.py +19 -0
- cdo_toolkit/timing.py +36 -0
- cdo_toolkit/ui.py +650 -0
- cdo_toolkit/workers.py +277 -0
- cdo_toolkit-0.1.0.dist-info/METADATA +78 -0
- cdo_toolkit-0.1.0.dist-info/RECORD +19 -0
- cdo_toolkit-0.1.0.dist-info/WHEEL +4 -0
- cdo_toolkit-0.1.0.dist-info/entry_points.txt +2 -0
- cdo_toolkit-0.1.0.dist-info/licenses/LICENSE +28 -0
cdo_toolkit/ui.py
ADDED
|
@@ -0,0 +1,650 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Rich UI components for CDO regridding pipeline.
|
|
4
|
+
|
|
5
|
+
This module provides comprehensive progress tracking and status display
|
|
6
|
+
for regridding operations, similar to the download UI system.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import time
|
|
10
|
+
import logging
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Optional, Dict, List, Any
|
|
14
|
+
|
|
15
|
+
from rich.progress import (
|
|
16
|
+
Progress,
|
|
17
|
+
BarColumn,
|
|
18
|
+
TimeElapsedColumn,
|
|
19
|
+
TimeRemainingColumn,
|
|
20
|
+
TextColumn,
|
|
21
|
+
SpinnerColumn,
|
|
22
|
+
TaskProgressColumn,
|
|
23
|
+
MofNCompleteColumn,
|
|
24
|
+
)
|
|
25
|
+
from rich.console import Console
|
|
26
|
+
from rich.table import Table
|
|
27
|
+
from rich.panel import Panel
|
|
28
|
+
|
|
29
|
+
from cdo_toolkit.errors import default_log_dir
|
|
30
|
+
|
|
31
|
+
_PHASE_LABELS = {
|
|
32
|
+
"starting": "Starting",
|
|
33
|
+
"creating": "Creating chunks",
|
|
34
|
+
"regridding": "Regridding chunks",
|
|
35
|
+
"combining": "Combining chunks",
|
|
36
|
+
"prep": "Preparing",
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class RegridProgressUI:
|
|
41
|
+
"""Progress UI for regridding: one progress bar per file (like download UI)."""
|
|
42
|
+
|
|
43
|
+
def __init__(
|
|
44
|
+
self,
|
|
45
|
+
files: List[Path],
|
|
46
|
+
verbose: bool = True,
|
|
47
|
+
verbose_diagnostics: bool = False,
|
|
48
|
+
log_file: Optional[Path] = None,
|
|
49
|
+
):
|
|
50
|
+
self.files = files
|
|
51
|
+
self.verbose = verbose
|
|
52
|
+
self.verbose_diagnostics = verbose_diagnostics
|
|
53
|
+
self.console = Console()
|
|
54
|
+
self.status_counts = {"completed": 0, "skipped": 0, "failed": 0}
|
|
55
|
+
self.file_status: Dict[Path, str] = {}
|
|
56
|
+
self.file_task_ids: Dict[Path, Optional[int]] = {}
|
|
57
|
+
self.failed_files: List[tuple[Path, str]] = []
|
|
58
|
+
self.processing_stats: Dict[str, Any] = {
|
|
59
|
+
"weights_reused": 0,
|
|
60
|
+
"weights_generated": 0,
|
|
61
|
+
"chunks_processed": 0,
|
|
62
|
+
"total_size_gb": 0.0,
|
|
63
|
+
"memory_peak_gb": 0.0,
|
|
64
|
+
}
|
|
65
|
+
self.overall_task: Optional[int] = None
|
|
66
|
+
self.current_files: List[Path] = []
|
|
67
|
+
self._setup_logger(log_file)
|
|
68
|
+
self._setup_progress()
|
|
69
|
+
|
|
70
|
+
def _setup_logger(self, log_file: Optional[Path] = None):
|
|
71
|
+
"""Set up logging for regridding errors. Writes to log_file if given, else creates one under default_log_dir()."""
|
|
72
|
+
self.logger = logging.getLogger(f"cdo_toolkit.ui.{id(self)}")
|
|
73
|
+
self.logger.setLevel(logging.ERROR)
|
|
74
|
+
|
|
75
|
+
if log_file is None:
|
|
76
|
+
log_dir = default_log_dir()
|
|
77
|
+
log_dir.mkdir(parents=True, exist_ok=True)
|
|
78
|
+
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
|
79
|
+
log_file = log_dir / f"regrid_errors_{timestamp}.log"
|
|
80
|
+
self.log_file = Path(log_file)
|
|
81
|
+
self.log_file.parent.mkdir(parents=True, exist_ok=True)
|
|
82
|
+
|
|
83
|
+
handler = logging.FileHandler(self.log_file)
|
|
84
|
+
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
|
|
85
|
+
handler.setFormatter(formatter)
|
|
86
|
+
if self.logger.hasHandlers():
|
|
87
|
+
self.logger.handlers.clear()
|
|
88
|
+
self.logger.addHandler(handler)
|
|
89
|
+
|
|
90
|
+
def _setup_progress(self):
|
|
91
|
+
"""Set up compact progress display components."""
|
|
92
|
+
self.progress = Progress(
|
|
93
|
+
SpinnerColumn(),
|
|
94
|
+
TextColumn("[progress.description]{task.description}"),
|
|
95
|
+
BarColumn(),
|
|
96
|
+
TaskProgressColumn(),
|
|
97
|
+
MofNCompleteColumn(),
|
|
98
|
+
TimeElapsedColumn(),
|
|
99
|
+
TimeRemainingColumn(),
|
|
100
|
+
transient=False,
|
|
101
|
+
expand=True,
|
|
102
|
+
auto_refresh=True,
|
|
103
|
+
refresh_per_second=2, # Reduced refresh rate
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
def __enter__(self):
|
|
107
|
+
"""Enter the progress context. Only overall task is added; per-file tasks added when each file starts."""
|
|
108
|
+
self.progress.__enter__()
|
|
109
|
+
file_str = "files" if len(self.files) > 1 else "file"
|
|
110
|
+
self.overall_task = self.progress.add_task(
|
|
111
|
+
f"[cyan]Regridding {len(self.files)} {file_str}[/cyan]",
|
|
112
|
+
total=len(self.files),
|
|
113
|
+
)
|
|
114
|
+
for file_path in self.files:
|
|
115
|
+
self.file_status[file_path] = "PENDING"
|
|
116
|
+
return self
|
|
117
|
+
|
|
118
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
119
|
+
"""Exit the progress context."""
|
|
120
|
+
self.progress.__exit__(exc_type, exc_val, exc_tb)
|
|
121
|
+
|
|
122
|
+
def start_file_processing(
|
|
123
|
+
self,
|
|
124
|
+
file_path: Path,
|
|
125
|
+
file_info: Dict[str, Any],
|
|
126
|
+
regrid_mode: str = "complete",
|
|
127
|
+
):
|
|
128
|
+
"""Add a progress bar for this file (only current batch visible). regrid_mode: 'surface', 'seafloor', or 'complete'."""
|
|
129
|
+
self.current_files.append(file_path)
|
|
130
|
+
self.file_status[file_path] = "PROCESSING"
|
|
131
|
+
filename = file_path.name
|
|
132
|
+
mode_str = f" ({regrid_mode})"
|
|
133
|
+
if self.verbose_diagnostics:
|
|
134
|
+
size_gb = file_info.get("file_size_gb", 0)
|
|
135
|
+
grid_type = file_info.get("grid_type", "unknown")
|
|
136
|
+
desc = f"[blue]{filename}{mode_str}[/blue] ({size_gb:.1f}GB, {grid_type})"
|
|
137
|
+
else:
|
|
138
|
+
desc = f"[blue][PENDING]{mode_str} {filename}[/blue]"
|
|
139
|
+
task_id = self.progress.add_task(desc, total=100, visible=True)
|
|
140
|
+
self.file_task_ids[file_path] = task_id
|
|
141
|
+
|
|
142
|
+
def update_file_progress(
|
|
143
|
+
self, file_path: Path, progress: int, operation: str = "", regrid_mode: str = "complete"
|
|
144
|
+
):
|
|
145
|
+
"""Update progress for a specific file."""
|
|
146
|
+
task_id = self.file_task_ids.get(file_path)
|
|
147
|
+
if task_id is None:
|
|
148
|
+
return
|
|
149
|
+
filename = file_path.name
|
|
150
|
+
mode_str = f" ({regrid_mode})"
|
|
151
|
+
desc = f"[blue]{operation}: {filename}{mode_str}[/blue]" if operation else f"[blue]{filename}{mode_str}[/blue]"
|
|
152
|
+
self.progress.update(task_id, completed=progress, description=desc)
|
|
153
|
+
|
|
154
|
+
def complete_file(self, file_path: Path, success: bool = True, message: str = ""):
|
|
155
|
+
"""Mark a file as completed with compact display."""
|
|
156
|
+
# Remove from current processing
|
|
157
|
+
if file_path in self.current_files:
|
|
158
|
+
self.current_files.remove(file_path)
|
|
159
|
+
|
|
160
|
+
# Update status
|
|
161
|
+
if success:
|
|
162
|
+
self.file_status[file_path] = "COMPLETED"
|
|
163
|
+
self.status_counts["completed"] += 1
|
|
164
|
+
else:
|
|
165
|
+
self.file_status[file_path] = "FAILED"
|
|
166
|
+
self.status_counts["failed"] += 1
|
|
167
|
+
if message:
|
|
168
|
+
self.failed_files.append((file_path, message))
|
|
169
|
+
self.logger.error(f"File: {file_path.name} - {message}")
|
|
170
|
+
|
|
171
|
+
task_id = self.file_task_ids.get(file_path)
|
|
172
|
+
if task_id is not None:
|
|
173
|
+
filename = file_path.name
|
|
174
|
+
if success:
|
|
175
|
+
self.progress.update(task_id, completed=100, description=f"[green]✓ {filename}[/green]")
|
|
176
|
+
else:
|
|
177
|
+
self.progress.update(task_id, completed=100, description=f"[red]✗ {filename}[/red]")
|
|
178
|
+
self._hide_task_after_delay(task_id, delay_seconds=2)
|
|
179
|
+
# Advance overall progress
|
|
180
|
+
if self.overall_task is not None:
|
|
181
|
+
self.progress.advance(self.overall_task)
|
|
182
|
+
|
|
183
|
+
# Update overall progress description with current stats
|
|
184
|
+
self._update_overall_description()
|
|
185
|
+
|
|
186
|
+
def skip_file(self, file_path: Path, reason: str = "Already exists"):
|
|
187
|
+
"""Mark a file as skipped."""
|
|
188
|
+
# Remove from current processing
|
|
189
|
+
if file_path in self.current_files:
|
|
190
|
+
self.current_files.remove(file_path)
|
|
191
|
+
|
|
192
|
+
self.file_status[file_path] = "SKIPPED"
|
|
193
|
+
self.status_counts["skipped"] += 1
|
|
194
|
+
|
|
195
|
+
task_id = self.file_task_ids.get(file_path)
|
|
196
|
+
if task_id is not None:
|
|
197
|
+
filename = file_path.name
|
|
198
|
+
self.progress.update(task_id, completed=100, description=f"[yellow]⏭ {filename}[/yellow]")
|
|
199
|
+
self._hide_task_after_delay(task_id, delay_seconds=2)
|
|
200
|
+
# Advance overall progress
|
|
201
|
+
if self.overall_task is not None:
|
|
202
|
+
self.progress.advance(self.overall_task)
|
|
203
|
+
|
|
204
|
+
# Update overall progress description
|
|
205
|
+
self._update_overall_description()
|
|
206
|
+
|
|
207
|
+
def _hide_task_after_delay(self, task_id: int, delay_seconds: int = 2):
|
|
208
|
+
"""Hide a task after a delay so only the current batch remains visible."""
|
|
209
|
+
import threading
|
|
210
|
+
def hide():
|
|
211
|
+
time.sleep(delay_seconds)
|
|
212
|
+
try:
|
|
213
|
+
self.progress.update(task_id, visible=False)
|
|
214
|
+
except Exception:
|
|
215
|
+
pass
|
|
216
|
+
threading.Thread(target=hide, daemon=True).start()
|
|
217
|
+
|
|
218
|
+
def _update_overall_description(self):
|
|
219
|
+
"""Update the overall progress description with current statistics."""
|
|
220
|
+
if self.overall_task is not None:
|
|
221
|
+
total = len(self.files)
|
|
222
|
+
completed = self.status_counts["completed"]
|
|
223
|
+
skipped = self.status_counts["skipped"]
|
|
224
|
+
failed = self.status_counts["failed"]
|
|
225
|
+
processing = len(self.current_files)
|
|
226
|
+
|
|
227
|
+
desc = f"[cyan]Regridding {total} files[/cyan] - "
|
|
228
|
+
desc += f"[green]✓{completed}[/green] "
|
|
229
|
+
desc += f"[yellow]⏭{skipped}[/yellow] "
|
|
230
|
+
desc += f"[red]✗{failed}[/red]"
|
|
231
|
+
if processing > 0:
|
|
232
|
+
desc += f" [blue]Processing {processing}[/blue]"
|
|
233
|
+
|
|
234
|
+
self.progress.update(self.overall_task, description=desc)
|
|
235
|
+
|
|
236
|
+
def update_chunk_progress(
|
|
237
|
+
self,
|
|
238
|
+
file_path: Path,
|
|
239
|
+
chunks_done: int,
|
|
240
|
+
total_chunks: int,
|
|
241
|
+
phase: str = "creating",
|
|
242
|
+
regrid_mode: str = "complete",
|
|
243
|
+
):
|
|
244
|
+
"""Update per-file progress from chunk counts (creation or regridding phase)."""
|
|
245
|
+
task_id = self.file_task_ids.get(file_path)
|
|
246
|
+
if task_id is None:
|
|
247
|
+
return
|
|
248
|
+
label = _PHASE_LABELS.get(phase, phase)
|
|
249
|
+
filename = file_path.name
|
|
250
|
+
mode_str = f" ({regrid_mode})" if regrid_mode != "complete" else ""
|
|
251
|
+
total = max(total_chunks, 1)
|
|
252
|
+
count_str = f" ({chunks_done}/{total_chunks})" if total_chunks > 0 else ""
|
|
253
|
+
self.progress.update(
|
|
254
|
+
task_id,
|
|
255
|
+
total=total,
|
|
256
|
+
completed=min(chunks_done, total),
|
|
257
|
+
description=f"[cyan]{label} {filename}{mode_str}{count_str}[/cyan]",
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
def update_chunking_progress(self, file_path: Path, chunk_num: int, total_chunks: int):
|
|
261
|
+
"""Backward-compatible alias for chunk-creation progress."""
|
|
262
|
+
self.update_chunk_progress(file_path, chunk_num, total_chunks, phase="creating")
|
|
263
|
+
|
|
264
|
+
def update_regridding_progress(self, file_path: Path, operation: str):
|
|
265
|
+
"""Update progress for regridding operations."""
|
|
266
|
+
task_id = self.file_task_ids.get(file_path)
|
|
267
|
+
if task_id is not None:
|
|
268
|
+
filename = file_path.name
|
|
269
|
+
self.progress.update(task_id, description=f"[magenta]{operation}: {filename}[/magenta]")
|
|
270
|
+
|
|
271
|
+
def _update_stats(self, stats: Dict[str, Any]):
|
|
272
|
+
"""Update processing statistics."""
|
|
273
|
+
self.processing_stats.update(stats)
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def print_summary(self):
|
|
277
|
+
"""Print a comprehensive summary of the regridding operation."""
|
|
278
|
+
# Stop the progress display
|
|
279
|
+
self.progress.stop()
|
|
280
|
+
|
|
281
|
+
# Create summary table
|
|
282
|
+
table = Table(show_header=True, header_style="bold magenta")
|
|
283
|
+
table.add_column("Status", style="bold")
|
|
284
|
+
table.add_column("Count", style="bold")
|
|
285
|
+
table.add_column("Percentage", style="bold")
|
|
286
|
+
|
|
287
|
+
total_files = len(self.files)
|
|
288
|
+
completed = self.status_counts["completed"]
|
|
289
|
+
skipped = self.status_counts["skipped"]
|
|
290
|
+
failed = self.status_counts["failed"]
|
|
291
|
+
|
|
292
|
+
table.add_row("[green]Completed[/green]", str(completed), f"{(completed/total_files*100):.1f}%")
|
|
293
|
+
table.add_row("[yellow]Skipped[/yellow]", str(skipped), f"{(skipped/total_files*100):.1f}%")
|
|
294
|
+
table.add_row("[red]Failed[/red]", str(failed), f"{(failed/total_files*100):.1f}%")
|
|
295
|
+
|
|
296
|
+
# Add processing statistics
|
|
297
|
+
stats_table = Table(show_header=True, header_style="bold cyan")
|
|
298
|
+
stats_table.add_column("Metric", style="bold")
|
|
299
|
+
stats_table.add_column("Value", style="bold")
|
|
300
|
+
|
|
301
|
+
stats_table.add_row("Weights Reused", str(self.processing_stats["weights_reused"]))
|
|
302
|
+
stats_table.add_row("Weights Generated", str(self.processing_stats["weights_generated"]))
|
|
303
|
+
stats_table.add_row("Chunks Processed", str(self.processing_stats["chunks_processed"]))
|
|
304
|
+
stats_table.add_row("Total Size (GB)", f"{self.processing_stats['total_size_gb']:.2f}")
|
|
305
|
+
stats_table.add_row("Peak Memory (GB)", f"{self.processing_stats['memory_peak_gb']:.2f}")
|
|
306
|
+
|
|
307
|
+
# Add timing information if available
|
|
308
|
+
if "processing_time" in self.processing_stats:
|
|
309
|
+
stats_table.add_row("Processing Time", self.processing_stats["processing_time"])
|
|
310
|
+
|
|
311
|
+
# Display summary
|
|
312
|
+
self.console.print(Panel(table, title="[bold]Regridding Summary[/bold]", border_style="green"))
|
|
313
|
+
self.console.print(Panel(stats_table, title="[bold]Processing Statistics[/bold]", border_style="cyan"))
|
|
314
|
+
|
|
315
|
+
# Show failed files if any
|
|
316
|
+
if self.failed_files:
|
|
317
|
+
self._display_failed_files()
|
|
318
|
+
|
|
319
|
+
def _display_failed_files(self):
|
|
320
|
+
"""Display information about failed files."""
|
|
321
|
+
self.console.print(f"\n[red]Failed Files ({len(self.failed_files)}):[/red]")
|
|
322
|
+
|
|
323
|
+
for i, (file_path, error_msg) in enumerate(self.failed_files):
|
|
324
|
+
self.console.print(f"[red]{i+1}. {file_path.name}[/red]")
|
|
325
|
+
self.console.print(f" [red]Error: {error_msg}[/red]")
|
|
326
|
+
self.console.print(f" [red]Path: {file_path}[/red]")
|
|
327
|
+
self.console.print("")
|
|
328
|
+
|
|
329
|
+
# Show detailed failure info for first failed file
|
|
330
|
+
if self.failed_files:
|
|
331
|
+
file_path, error_msg = self.failed_files[0]
|
|
332
|
+
self.console.print(
|
|
333
|
+
Panel(
|
|
334
|
+
f"First failed file: [bold]{file_path.name}[/bold]\n[red]{error_msg}[/red]\n\nPath: {file_path}",
|
|
335
|
+
title="[red]Detailed Failure Info[/red]",
|
|
336
|
+
style="red",
|
|
337
|
+
)
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
class BatchRegridUI:
|
|
342
|
+
"""Batch regridding UI: only files in the current batch shown at once; overall progress."""
|
|
343
|
+
|
|
344
|
+
def __init__(
|
|
345
|
+
self,
|
|
346
|
+
files: List[Path],
|
|
347
|
+
max_workers: int = 4,
|
|
348
|
+
verbose: bool = True,
|
|
349
|
+
regrid_mode: str = "complete",
|
|
350
|
+
):
|
|
351
|
+
self.files = files
|
|
352
|
+
self.max_workers = max_workers
|
|
353
|
+
self.verbose = verbose
|
|
354
|
+
self.regrid_mode = regrid_mode
|
|
355
|
+
self.console = Console()
|
|
356
|
+
self.overall_progress: Optional[int] = None
|
|
357
|
+
self.completed_files: List[Path] = []
|
|
358
|
+
self.failed_files: List[tuple[Path, str]] = []
|
|
359
|
+
self.skipped_files: List[Path] = []
|
|
360
|
+
self.stats = {
|
|
361
|
+
"files_processed": 0,
|
|
362
|
+
"weights_reused": 0,
|
|
363
|
+
"weights_generated": 0,
|
|
364
|
+
"chunks_processed": 0,
|
|
365
|
+
"errors": 0,
|
|
366
|
+
"total_size_gb": 0.0,
|
|
367
|
+
"memory_peak_gb": 0.0,
|
|
368
|
+
"processing_time": "0s",
|
|
369
|
+
}
|
|
370
|
+
self.file_task_ids: Dict[str, int] = {}
|
|
371
|
+
self.active_files: set[str] = set()
|
|
372
|
+
self.finished_paths: set[str] = set()
|
|
373
|
+
self._setup_progress()
|
|
374
|
+
|
|
375
|
+
def _setup_progress(self):
|
|
376
|
+
"""Set up progress display: overall + one bar per file."""
|
|
377
|
+
self.progress = Progress(
|
|
378
|
+
SpinnerColumn(),
|
|
379
|
+
TextColumn("[progress.description]{task.description}"),
|
|
380
|
+
BarColumn(),
|
|
381
|
+
TaskProgressColumn(),
|
|
382
|
+
MofNCompleteColumn(),
|
|
383
|
+
TimeElapsedColumn(),
|
|
384
|
+
TimeRemainingColumn(),
|
|
385
|
+
transient=False,
|
|
386
|
+
expand=True,
|
|
387
|
+
auto_refresh=True,
|
|
388
|
+
refresh_per_second=2,
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
def __enter__(self):
|
|
392
|
+
"""Enter the batch progress context. Per-file tasks added only when each completes (so only current batch visible)."""
|
|
393
|
+
self.progress.__enter__()
|
|
394
|
+
mode_str = f" ({self.regrid_mode})"
|
|
395
|
+
desc = f"[cyan]Batch Regridding {len(self.files)} files{mode_str} ({self.max_workers} workers)[/cyan]"
|
|
396
|
+
self.overall_progress = self.progress.add_task(desc, total=len(self.files))
|
|
397
|
+
return self
|
|
398
|
+
|
|
399
|
+
def _hide_task_after_delay(self, task_id: int, delay_seconds: int = 2):
|
|
400
|
+
"""Hide a task after a delay so only the current batch remains visible."""
|
|
401
|
+
import threading
|
|
402
|
+
def hide():
|
|
403
|
+
time.sleep(delay_seconds)
|
|
404
|
+
try:
|
|
405
|
+
self.progress.update(task_id, visible=False)
|
|
406
|
+
except Exception:
|
|
407
|
+
pass
|
|
408
|
+
threading.Thread(target=hide, daemon=True).start()
|
|
409
|
+
|
|
410
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
411
|
+
"""Exit the batch progress context."""
|
|
412
|
+
self.progress.__exit__(exc_type, exc_val, exc_tb)
|
|
413
|
+
|
|
414
|
+
def start_file_processing(self, file_path: Path, total_chunks: int = 0):
|
|
415
|
+
"""Show a progress bar when a worker starts processing this file."""
|
|
416
|
+
path_key = str(file_path)
|
|
417
|
+
if path_key in self.file_task_ids or path_key in self.finished_paths:
|
|
418
|
+
return
|
|
419
|
+
self.active_files.add(path_key)
|
|
420
|
+
mode_str = f" ({self.regrid_mode})" if self.regrid_mode != "complete" else ""
|
|
421
|
+
total = max(total_chunks, 1) if total_chunks > 0 else 100
|
|
422
|
+
desc = f"[blue]Starting {file_path.name}{mode_str}[/blue]"
|
|
423
|
+
task_id = self.progress.add_task(desc, total=total, completed=0, visible=True)
|
|
424
|
+
self.file_task_ids[path_key] = task_id
|
|
425
|
+
self._update_overall_description()
|
|
426
|
+
|
|
427
|
+
def update_chunk_progress(
|
|
428
|
+
self,
|
|
429
|
+
file_path: Path,
|
|
430
|
+
chunks_done: int,
|
|
431
|
+
total_chunks: int,
|
|
432
|
+
phase: str = "creating",
|
|
433
|
+
):
|
|
434
|
+
"""Update in-progress bar from chunk counts reported by a worker."""
|
|
435
|
+
path_key = str(file_path)
|
|
436
|
+
if path_key not in self.file_task_ids:
|
|
437
|
+
self.start_file_processing(file_path, total_chunks=total_chunks)
|
|
438
|
+
task_id = self.file_task_ids.get(path_key)
|
|
439
|
+
if task_id is None:
|
|
440
|
+
return
|
|
441
|
+
label = _PHASE_LABELS.get(phase, phase)
|
|
442
|
+
mode_str = f" ({self.regrid_mode})" if self.regrid_mode != "complete" else ""
|
|
443
|
+
total = max(total_chunks, 1) if total_chunks > 0 else 100
|
|
444
|
+
count_str = f" ({chunks_done}/{total_chunks})" if total_chunks > 0 else ""
|
|
445
|
+
self.progress.update(
|
|
446
|
+
task_id,
|
|
447
|
+
total=total,
|
|
448
|
+
completed=min(chunks_done, total) if total_chunks > 0 else chunks_done,
|
|
449
|
+
description=f"[cyan]{label} {file_path.name}{mode_str}{count_str}[/cyan]",
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
def sync_from_progress_state(self, progress_state: Dict[str, Dict[str, Any]]):
|
|
453
|
+
"""Poll shared worker progress and refresh in-progress bars."""
|
|
454
|
+
for path_str, info in progress_state.items():
|
|
455
|
+
if path_str in self.finished_paths or info.get("phase") == "done":
|
|
456
|
+
continue
|
|
457
|
+
file_path = Path(path_str)
|
|
458
|
+
chunks_done = int(info.get("chunks_done", 0))
|
|
459
|
+
chunks_total = int(info.get("chunks_total", 0))
|
|
460
|
+
phase = info.get("phase", "starting")
|
|
461
|
+
self.update_chunk_progress(file_path, chunks_done, chunks_total, phase=phase)
|
|
462
|
+
self._update_overall_description()
|
|
463
|
+
|
|
464
|
+
def _complete_in_progress_file(self, file_path: Path, result: Dict[str, Any]) -> Optional[int]:
|
|
465
|
+
"""Mark an in-progress file complete and schedule hiding its bar."""
|
|
466
|
+
path_key = str(file_path)
|
|
467
|
+
self.finished_paths.add(path_key)
|
|
468
|
+
self.active_files.discard(path_key)
|
|
469
|
+
task_id = self.file_task_ids.pop(path_key, None)
|
|
470
|
+
if task_id is None:
|
|
471
|
+
return None
|
|
472
|
+
name = file_path.name
|
|
473
|
+
mode_str = f" ({self.regrid_mode})" if self.regrid_mode != "complete" else ""
|
|
474
|
+
if result.get("success", False):
|
|
475
|
+
if result.get("skipped", False):
|
|
476
|
+
desc = f"[yellow]⏭ {name}{mode_str}[/yellow]"
|
|
477
|
+
else:
|
|
478
|
+
desc = f"[green]✓ {name}{mode_str}[/green]"
|
|
479
|
+
else:
|
|
480
|
+
desc = f"[red]✗ {name}{mode_str}[/red]"
|
|
481
|
+
self.progress.update(task_id, total=1, completed=1, description=desc)
|
|
482
|
+
self._hide_task_after_delay(task_id, delay_seconds=2)
|
|
483
|
+
return task_id
|
|
484
|
+
|
|
485
|
+
def update_file_result(self, file_path: Path, result: Dict[str, Any]):
|
|
486
|
+
"""Update progress when a worker finishes; complete in-progress bar or flash result briefly."""
|
|
487
|
+
name = file_path.name
|
|
488
|
+
mode_str = f" ({self.regrid_mode})" if self.regrid_mode != "complete" else ""
|
|
489
|
+
if result.get("success", False):
|
|
490
|
+
if result.get("skipped", False):
|
|
491
|
+
self.skipped_files.append(file_path)
|
|
492
|
+
desc = f"[yellow]⏭ {name}{mode_str}[/yellow]"
|
|
493
|
+
else:
|
|
494
|
+
self.completed_files.append(file_path)
|
|
495
|
+
desc = f"[green]✓ {name}{mode_str}[/green]"
|
|
496
|
+
else:
|
|
497
|
+
error_msg = result.get("message", "Unknown error")
|
|
498
|
+
self.failed_files.append((file_path, error_msg))
|
|
499
|
+
desc = f"[red]✗ {name}{mode_str}[/red]"
|
|
500
|
+
|
|
501
|
+
if self._complete_in_progress_file(file_path, result) is None:
|
|
502
|
+
task_id = self.progress.add_task(desc, total=1, completed=1, visible=True)
|
|
503
|
+
self._hide_task_after_delay(task_id, delay_seconds=2)
|
|
504
|
+
|
|
505
|
+
if self.overall_progress is not None:
|
|
506
|
+
self.progress.advance(self.overall_progress)
|
|
507
|
+
self._update_overall_description()
|
|
508
|
+
|
|
509
|
+
if "stats" in result:
|
|
510
|
+
self._update_stats(result["stats"])
|
|
511
|
+
|
|
512
|
+
def _update_overall_description(self):
|
|
513
|
+
"""Update the overall progress description with current statistics."""
|
|
514
|
+
if self.overall_progress is not None:
|
|
515
|
+
total = len(self.files)
|
|
516
|
+
completed = len(self.completed_files)
|
|
517
|
+
skipped = len(self.skipped_files)
|
|
518
|
+
failed = len(self.failed_files)
|
|
519
|
+
remaining = total - completed - skipped - failed
|
|
520
|
+
|
|
521
|
+
in_progress = len(self.active_files)
|
|
522
|
+
desc = f"[cyan]Batch Regridding {total} files ({self.max_workers} workers)[/cyan] - "
|
|
523
|
+
desc += f"[green]✓{completed}[/green] "
|
|
524
|
+
desc += f"[yellow]⏭{skipped}[/yellow] "
|
|
525
|
+
desc += f"[red]✗{failed}[/red]"
|
|
526
|
+
if in_progress > 0:
|
|
527
|
+
desc += f" [blue]In progress: {in_progress}[/blue]"
|
|
528
|
+
elif remaining > 0:
|
|
529
|
+
desc += f" [blue]Remaining: {remaining}[/blue]"
|
|
530
|
+
|
|
531
|
+
self.progress.update(self.overall_progress, description=desc)
|
|
532
|
+
|
|
533
|
+
def _update_stats(self, new_stats: Dict[str, Any]):
|
|
534
|
+
"""Update cumulative statistics."""
|
|
535
|
+
for key, value in new_stats.items():
|
|
536
|
+
if key in self.stats:
|
|
537
|
+
if isinstance(value, (int, float)):
|
|
538
|
+
self.stats[key] += value
|
|
539
|
+
else:
|
|
540
|
+
self.stats[key] = value
|
|
541
|
+
else:
|
|
542
|
+
self.stats[key] = value # add new key to stats dictionary
|
|
543
|
+
|
|
544
|
+
def print_summary(self):
|
|
545
|
+
"""Print compact batch processing summary."""
|
|
546
|
+
self.progress.stop()
|
|
547
|
+
|
|
548
|
+
# Create compact summary
|
|
549
|
+
table = Table(show_header=True, header_style="bold magenta")
|
|
550
|
+
table.add_column("Status", style="bold")
|
|
551
|
+
table.add_column("Count", style="bold")
|
|
552
|
+
table.add_column("Percentage", style="bold")
|
|
553
|
+
|
|
554
|
+
total = len(self.files)
|
|
555
|
+
completed = len(self.completed_files)
|
|
556
|
+
skipped = len(self.skipped_files)
|
|
557
|
+
failed = len(self.failed_files)
|
|
558
|
+
|
|
559
|
+
table.add_row("[green]Completed[/green]", str(completed), f"{(completed/total*100):.1f}%")
|
|
560
|
+
table.add_row("[yellow]Skipped[/yellow]", str(skipped), f"{(skipped/total*100):.1f}%")
|
|
561
|
+
table.add_row("[red]Failed[/red]", str(failed), f"{(failed/total*100):.1f}%")
|
|
562
|
+
|
|
563
|
+
# Add processing stats
|
|
564
|
+
stats_table = Table(show_header=True, header_style="bold cyan")
|
|
565
|
+
stats_table.add_column("Metric", style="bold")
|
|
566
|
+
stats_table.add_column("Value", style="bold")
|
|
567
|
+
|
|
568
|
+
stats_table.add_row("Weights Reused", str(self.stats["weights_reused"]))
|
|
569
|
+
stats_table.add_row("Weights Generated", str(self.stats["weights_generated"]))
|
|
570
|
+
stats_table.add_row("Chunks Processed", str(self.stats["chunks_processed"]))
|
|
571
|
+
stats_table.add_row("Total Size (GB)", f"{self.stats['total_size_gb']:.2f}")
|
|
572
|
+
stats_table.add_row("Peak Memory (GB)", f"{self.stats['memory_peak_gb']:.2f}")
|
|
573
|
+
|
|
574
|
+
# Add timing information if available
|
|
575
|
+
if "processing_time" in self.stats:
|
|
576
|
+
stats_table.add_row("Processing Time", self.stats["processing_time"])
|
|
577
|
+
else:
|
|
578
|
+
print("No processing time available")
|
|
579
|
+
print(self.stats)
|
|
580
|
+
|
|
581
|
+
self.console.print(Panel(table, title="[bold]Batch Regridding Summary[/bold]", border_style="green"))
|
|
582
|
+
self.console.print(Panel(stats_table, title="[bold]Processing Statistics[/bold]", border_style="cyan"))
|
|
583
|
+
|
|
584
|
+
# Show failed files if any (compact)
|
|
585
|
+
if self.failed_files:
|
|
586
|
+
self.console.print(f"\n[red]Failed Files ({len(self.failed_files)}):[/red]")
|
|
587
|
+
for i, (file_path, error_msg) in enumerate(self.failed_files[:5]): # Show only first 5
|
|
588
|
+
self.console.print(f"[red]{i+1}. {file_path.name} - {error_msg}[/red]")
|
|
589
|
+
if len(self.failed_files) > 5:
|
|
590
|
+
self.console.print(f"[red]... and {len(self.failed_files) - 5} more[/red]")
|
|
591
|
+
|
|
592
|
+
|
|
593
|
+
class ChunkingProgressUI:
|
|
594
|
+
"""UI for chunked file processing operations."""
|
|
595
|
+
|
|
596
|
+
def __init__(self, file_path: Path, total_chunks: int, verbose: bool = True):
|
|
597
|
+
self.file_path = file_path
|
|
598
|
+
self.total_chunks = total_chunks
|
|
599
|
+
self.verbose = verbose
|
|
600
|
+
self.console = Console()
|
|
601
|
+
|
|
602
|
+
self._setup_progress()
|
|
603
|
+
|
|
604
|
+
def _setup_progress(self):
|
|
605
|
+
"""Set up progress display for chunking operations."""
|
|
606
|
+
self.progress = Progress(
|
|
607
|
+
SpinnerColumn(),
|
|
608
|
+
TextColumn("[progress.description]{task.description}"),
|
|
609
|
+
BarColumn(),
|
|
610
|
+
TaskProgressColumn(),
|
|
611
|
+
MofNCompleteColumn(),
|
|
612
|
+
TimeElapsedColumn(),
|
|
613
|
+
TimeRemainingColumn(),
|
|
614
|
+
transient=True,
|
|
615
|
+
expand=True,
|
|
616
|
+
auto_refresh=True,
|
|
617
|
+
refresh_per_second=4,
|
|
618
|
+
)
|
|
619
|
+
|
|
620
|
+
def __enter__(self):
|
|
621
|
+
"""Enter the chunking progress context."""
|
|
622
|
+
self.progress.__enter__()
|
|
623
|
+
|
|
624
|
+
# Add chunking progress task
|
|
625
|
+
self.chunking_task = self.progress.add_task(
|
|
626
|
+
f"[cyan]Chunking: {self.file_path.name}[/cyan]",
|
|
627
|
+
total=self.total_chunks
|
|
628
|
+
)
|
|
629
|
+
|
|
630
|
+
return self
|
|
631
|
+
|
|
632
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
633
|
+
"""Exit the chunking progress context."""
|
|
634
|
+
self.progress.__exit__(exc_type, exc_val, exc_tb)
|
|
635
|
+
|
|
636
|
+
def update_chunk_progress(self, chunk_num: int, operation: str = "Processing"):
|
|
637
|
+
"""Update progress for chunk processing."""
|
|
638
|
+
self.progress.update(
|
|
639
|
+
self.chunking_task,
|
|
640
|
+
completed=chunk_num,
|
|
641
|
+
description=f"[cyan]{operation} chunk {chunk_num}/{self.total_chunks}: {self.file_path.name}[/cyan]"
|
|
642
|
+
)
|
|
643
|
+
|
|
644
|
+
def complete_chunking(self):
|
|
645
|
+
"""Mark chunking as complete."""
|
|
646
|
+
self.progress.update(
|
|
647
|
+
self.chunking_task,
|
|
648
|
+
completed=self.total_chunks,
|
|
649
|
+
description=f"[green]✓ Chunking complete: {self.file_path.name}[/green]"
|
|
650
|
+
)
|