gapless-crypto-clickhouse 7.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. gapless_crypto_clickhouse/__init__.py +147 -0
  2. gapless_crypto_clickhouse/__probe__.py +349 -0
  3. gapless_crypto_clickhouse/api.py +1032 -0
  4. gapless_crypto_clickhouse/clickhouse/__init__.py +17 -0
  5. gapless_crypto_clickhouse/clickhouse/config.py +119 -0
  6. gapless_crypto_clickhouse/clickhouse/connection.py +269 -0
  7. gapless_crypto_clickhouse/clickhouse/schema.sql +98 -0
  8. gapless_crypto_clickhouse/clickhouse/schema_validator.py +312 -0
  9. gapless_crypto_clickhouse/clickhouse_query.py +642 -0
  10. gapless_crypto_clickhouse/collectors/__init__.py +21 -0
  11. gapless_crypto_clickhouse/collectors/binance_public_data_collector.py +1994 -0
  12. gapless_crypto_clickhouse/collectors/clickhouse_bulk_loader.py +446 -0
  13. gapless_crypto_clickhouse/collectors/concurrent_collection_orchestrator.py +407 -0
  14. gapless_crypto_clickhouse/collectors/csv_format_detector.py +123 -0
  15. gapless_crypto_clickhouse/collectors/httpx_downloader.py +395 -0
  16. gapless_crypto_clickhouse/collectors/hybrid_url_generator.py +316 -0
  17. gapless_crypto_clickhouse/exceptions.py +145 -0
  18. gapless_crypto_clickhouse/gap_filling/__init__.py +1 -0
  19. gapless_crypto_clickhouse/gap_filling/safe_file_operations.py +439 -0
  20. gapless_crypto_clickhouse/gap_filling/universal_gap_filler.py +757 -0
  21. gapless_crypto_clickhouse/llms.txt +268 -0
  22. gapless_crypto_clickhouse/probe.py +235 -0
  23. gapless_crypto_clickhouse/py.typed +0 -0
  24. gapless_crypto_clickhouse/query_api.py +374 -0
  25. gapless_crypto_clickhouse/resume/__init__.py +12 -0
  26. gapless_crypto_clickhouse/resume/intelligent_checkpointing.py +383 -0
  27. gapless_crypto_clickhouse/utils/__init__.py +29 -0
  28. gapless_crypto_clickhouse/utils/error_handling.py +202 -0
  29. gapless_crypto_clickhouse/utils/etag_cache.py +194 -0
  30. gapless_crypto_clickhouse/utils/timeframe_constants.py +90 -0
  31. gapless_crypto_clickhouse/utils/timestamp_format_analyzer.py +256 -0
  32. gapless_crypto_clickhouse/utils/timestamp_utils.py +130 -0
  33. gapless_crypto_clickhouse/validation/__init__.py +36 -0
  34. gapless_crypto_clickhouse/validation/csv_validator.py +677 -0
  35. gapless_crypto_clickhouse/validation/models.py +220 -0
  36. gapless_crypto_clickhouse/validation/storage.py +502 -0
  37. gapless_crypto_clickhouse-7.1.0.dist-info/METADATA +1277 -0
  38. gapless_crypto_clickhouse-7.1.0.dist-info/RECORD +40 -0
  39. gapless_crypto_clickhouse-7.1.0.dist-info/WHEEL +4 -0
  40. gapless_crypto_clickhouse-7.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,383 @@
1
+ """
2
+ Intelligent Resume System with SOTA Checkpointing
3
+
4
+ Provides bulletproof resume capabilities for large-scale cryptocurrency data collection.
5
+ Uses joblib Memory for disk-cached computation with automatic resume from last successful checkpoint.
6
+ Eliminates restart frustration for multi-symbol, multi-timeframe, multi-year collections.
7
+
8
+ Architecture:
9
+ - Symbol-level checkpointing: Resume from last completed symbol
10
+ - Timeframe-level checkpointing: Resume from last completed timeframe within symbol
11
+ - Collection-level checkpointing: Resume from last completed collection task
12
+ - Progress persistence: Maintains collection state across interruptions
13
+ - Integrity validation: Verifies checkpoint consistency before resume
14
+ """
15
+
16
+ import hashlib
17
+ import json
18
+ from datetime import datetime
19
+ from pathlib import Path
20
+ from typing import Any, Dict, List, Optional, Union
21
+
22
+ # joblib removed - using simple JSON state persistence
23
+ from ..utils import GaplessCryptoError, get_standard_logger
24
+
25
+
26
+ class CheckpointError(GaplessCryptoError):
27
+ """Checkpoint-specific errors"""
28
+
29
+ pass
30
+
31
+
32
+ class IntelligentCheckpointManager:
33
+ """
34
+ SOTA checkpoint manager using joblib Memory for disk caching and resume capabilities.
35
+
36
+ Provides enterprise-grade resume functionality for large-scale cryptocurrency data collection
37
+ with automatic progress tracking, integrity validation, and efficient storage.
38
+ """
39
+
40
+ def __init__(
41
+ self,
42
+ cache_dir: Optional[Union[str, Path]] = None,
43
+ verbose: int = 1,
44
+ compress: Union[bool, int] = True,
45
+ ):
46
+ """
47
+ Initialize checkpoint manager with SOTA joblib configuration.
48
+
49
+ Args:
50
+ cache_dir: Directory for checkpoint cache (default: ./.gapless_checkpoints)
51
+ verbose: Joblib verbosity level (0=silent, 1=progress, 2=debug)
52
+ compress: Compression level for checkpoints (True/False or 0-9)
53
+ """
54
+ self.cache_dir = Path(cache_dir or ".gapless_checkpoints").resolve()
55
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
56
+
57
+ # Simple JSON state persistence (joblib removed)
58
+ # Parameters kept for backward compatibility but ignored
59
+ self._verbose = verbose
60
+ self._compress = compress
61
+
62
+ self.logger = get_standard_logger("checkpoint_manager")
63
+ self.session_id = self._generate_session_id()
64
+ self.checkpoint_file = self.cache_dir / f"session_{self.session_id}.json"
65
+
66
+ # Progress tracking
67
+ self.progress_data: Dict[str, Any] = {
68
+ "session_id": self.session_id,
69
+ "created_at": datetime.now().isoformat(),
70
+ "last_updated": datetime.now().isoformat(),
71
+ "symbols_completed": [],
72
+ "symbols_in_progress": {},
73
+ "total_datasets_collected": 0,
74
+ "collection_parameters": {},
75
+ "errors": [],
76
+ }
77
+
78
+ self.logger.info(f"🔄 Checkpoint manager initialized: {self.cache_dir}")
79
+ self.logger.info(f"📋 Session ID: {self.session_id}")
80
+
81
+ def _generate_session_id(self) -> str:
82
+ """Generate unique session identifier for checkpoint isolation."""
83
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
84
+ random_suffix = hashlib.md5(str(datetime.now().timestamp()).encode()).hexdigest()[:8]
85
+ return f"{timestamp}_{random_suffix}"
86
+
87
+ def save_checkpoint(self, checkpoint_data: Dict[str, Any]) -> None:
88
+ """
89
+ Save checkpoint with atomic operations and integrity validation.
90
+
91
+ Args:
92
+ checkpoint_data: Checkpoint state to persist
93
+ """
94
+ try:
95
+ # Update progress data
96
+ self.progress_data.update(checkpoint_data)
97
+ self.progress_data["last_updated"] = datetime.now().isoformat()
98
+
99
+ # Atomic write to prevent corruption
100
+ temp_file = self.checkpoint_file.with_suffix(".tmp")
101
+ with open(temp_file, "w") as f:
102
+ json.dump(self.progress_data, f, indent=2, default=str)
103
+
104
+ # Atomic rename for consistency
105
+ temp_file.replace(self.checkpoint_file)
106
+
107
+ self.logger.debug(
108
+ f"💾 Checkpoint saved: {checkpoint_data.get('current_symbol', 'unknown')}"
109
+ )
110
+
111
+ except Exception as e:
112
+ raise CheckpointError(f"Failed to save checkpoint: {e}")
113
+
114
+ def load_checkpoint(self) -> Optional[Dict[str, Any]]:
115
+ """
116
+ Load checkpoint with integrity validation.
117
+
118
+ Returns:
119
+ Checkpoint data if valid, None if no valid checkpoint exists
120
+ """
121
+ try:
122
+ if not self.checkpoint_file.exists():
123
+ self.logger.info("📂 No existing checkpoint found")
124
+ return None
125
+
126
+ with open(self.checkpoint_file, "r") as f:
127
+ checkpoint_data = json.load(f)
128
+
129
+ # Validate checkpoint integrity
130
+ if not self._validate_checkpoint(checkpoint_data):
131
+ self.logger.warning("⚠️ Invalid checkpoint detected, starting fresh")
132
+ return None
133
+
134
+ self.progress_data = checkpoint_data
135
+ self.logger.info(f"📋 Loaded checkpoint: Session {checkpoint_data.get('session_id')}")
136
+ self.logger.info(
137
+ f"✅ Completed symbols: {len(checkpoint_data.get('symbols_completed', []))}"
138
+ )
139
+
140
+ return checkpoint_data
141
+
142
+ except Exception as e:
143
+ self.logger.warning(f"⚠️ Failed to load checkpoint: {e}")
144
+ return None
145
+
146
+ def _validate_checkpoint(self, checkpoint_data: Dict[str, Any]) -> bool:
147
+ """Validate checkpoint data integrity and completeness."""
148
+ required_fields = [
149
+ "session_id",
150
+ "created_at",
151
+ "symbols_completed",
152
+ "symbols_in_progress",
153
+ "collection_parameters",
154
+ ]
155
+
156
+ for field in required_fields:
157
+ if field not in checkpoint_data:
158
+ self.logger.warning(f"❌ Missing checkpoint field: {field}")
159
+ return False
160
+
161
+ return True
162
+
163
+ def get_resume_plan(
164
+ self,
165
+ requested_symbols: List[str],
166
+ requested_timeframes: List[str],
167
+ collection_params: Dict[str, Any],
168
+ ) -> Dict[str, Any]:
169
+ """
170
+ Generate intelligent resume plan based on checkpoint state.
171
+
172
+ Args:
173
+ requested_symbols: Symbols to collect
174
+ requested_timeframes: Timeframes to collect
175
+ collection_params: Collection parameters (dates, output_dir, etc.)
176
+
177
+ Returns:
178
+ Resume plan with remaining work and progress summary
179
+ """
180
+ checkpoint = self.load_checkpoint()
181
+
182
+ if not checkpoint:
183
+ # No checkpoint - start from beginning
184
+ return {
185
+ "resume_required": False,
186
+ "remaining_symbols": requested_symbols,
187
+ "completed_symbols": [],
188
+ "symbols_in_progress": {},
189
+ "total_progress": 0.0,
190
+ "message": "Starting fresh collection",
191
+ }
192
+
193
+ # Validate parameters match checkpoint
194
+ checkpoint_params = checkpoint.get("collection_parameters", {})
195
+ if not self._params_compatible(checkpoint_params, collection_params):
196
+ self.logger.warning("⚠️ Parameters changed, starting fresh collection")
197
+ self.clear_checkpoint()
198
+ return {
199
+ "resume_required": False,
200
+ "remaining_symbols": requested_symbols,
201
+ "completed_symbols": [],
202
+ "symbols_in_progress": {},
203
+ "total_progress": 0.0,
204
+ "message": "Parameters changed - starting fresh",
205
+ }
206
+
207
+ # Calculate remaining work
208
+ completed_symbols = set(checkpoint.get("symbols_completed", []))
209
+ symbols_in_progress = checkpoint.get("symbols_in_progress", {})
210
+ remaining_symbols = [s for s in requested_symbols if s not in completed_symbols]
211
+
212
+ # Calculate progress
213
+ total_tasks = len(requested_symbols) * len(requested_timeframes)
214
+ completed_tasks = len(completed_symbols) * len(requested_timeframes)
215
+
216
+ # Add partial progress for symbols in progress
217
+ for symbol, progress in symbols_in_progress.items():
218
+ completed_tasks += len(progress.get("completed_timeframes", []))
219
+
220
+ progress_percentage = (completed_tasks / total_tasks * 100) if total_tasks > 0 else 0
221
+
222
+ resume_plan = {
223
+ "resume_required": len(completed_symbols) > 0 or len(symbols_in_progress) > 0,
224
+ "remaining_symbols": remaining_symbols,
225
+ "completed_symbols": list(completed_symbols),
226
+ "symbols_in_progress": symbols_in_progress,
227
+ "total_progress": progress_percentage,
228
+ "completed_datasets": checkpoint.get("total_datasets_collected", 0),
229
+ "message": f"Resuming from {progress_percentage:.1f}% complete",
230
+ }
231
+
232
+ if resume_plan["resume_required"]:
233
+ self.logger.info(f"🔄 Resume plan: {progress_percentage:.1f}% complete")
234
+ self.logger.info(f"✅ Completed: {len(completed_symbols)} symbols")
235
+ self.logger.info(f"⏳ In progress: {len(symbols_in_progress)} symbols")
236
+ self.logger.info(f"🔵 Remaining: {len(remaining_symbols)} symbols")
237
+
238
+ return resume_plan
239
+
240
+ def _params_compatible(
241
+ self, checkpoint_params: Dict[str, Any], current_params: Dict[str, Any]
242
+ ) -> bool:
243
+ """Check if collection parameters are compatible for resume."""
244
+ critical_params = ["start_date", "end_date", "output_dir"]
245
+
246
+ for param in critical_params:
247
+ checkpoint_val = checkpoint_params.get(param)
248
+ current_val = current_params.get(param)
249
+
250
+ if checkpoint_val != current_val:
251
+ self.logger.debug(
252
+ f"Parameter mismatch: {param} changed from {checkpoint_val} to {current_val}"
253
+ )
254
+ return False
255
+
256
+ return True
257
+
258
+ def mark_symbol_start(self, symbol: str, timeframes: List[str]) -> None:
259
+ """Mark symbol collection as started."""
260
+ self.progress_data["symbols_in_progress"][symbol] = {
261
+ "started_at": datetime.now().isoformat(),
262
+ "timeframes": timeframes,
263
+ "completed_timeframes": [],
264
+ "failed_timeframes": [],
265
+ }
266
+ self.save_checkpoint({"current_symbol": symbol})
267
+
268
+ def mark_timeframe_complete(
269
+ self, symbol: str, timeframe: str, filepath: Path, file_size_mb: float
270
+ ) -> None:
271
+ """Mark timeframe collection as completed."""
272
+ if symbol in self.progress_data["symbols_in_progress"]:
273
+ symbol_progress = self.progress_data["symbols_in_progress"][symbol]
274
+ symbol_progress["completed_timeframes"].append(
275
+ {
276
+ "timeframe": timeframe,
277
+ "completed_at": datetime.now().isoformat(),
278
+ "filepath": str(filepath),
279
+ "file_size_mb": file_size_mb,
280
+ }
281
+ )
282
+
283
+ self.progress_data["total_datasets_collected"] += 1
284
+ self.save_checkpoint({})
285
+
286
+ def mark_symbol_complete(self, symbol: str) -> None:
287
+ """Mark symbol collection as fully completed."""
288
+ if symbol in self.progress_data["symbols_in_progress"]:
289
+ # Move from in_progress to completed
290
+ self.progress_data["symbols_completed"].append(symbol)
291
+ del self.progress_data["symbols_in_progress"][symbol]
292
+
293
+ self.save_checkpoint({"completed_symbol": symbol})
294
+ self.logger.info(f"✅ Symbol completed: {symbol}")
295
+
296
+ def mark_symbol_failed(self, symbol: str, error: str) -> None:
297
+ """Mark symbol collection as failed."""
298
+ self.progress_data["errors"].append(
299
+ {"symbol": symbol, "error": error, "timestamp": datetime.now().isoformat()}
300
+ )
301
+
302
+ if symbol in self.progress_data["symbols_in_progress"]:
303
+ del self.progress_data["symbols_in_progress"][symbol]
304
+
305
+ self.save_checkpoint({"failed_symbol": symbol})
306
+
307
+ def clear_checkpoint(self) -> None:
308
+ """Clear checkpoint and start fresh."""
309
+ try:
310
+ if self.checkpoint_file.exists():
311
+ self.checkpoint_file.unlink()
312
+
313
+ # Clear cache directory (joblib removed)
314
+ import shutil
315
+
316
+ cache_dir = self.cache_dir / "cache"
317
+ if cache_dir.exists():
318
+ shutil.rmtree(cache_dir)
319
+
320
+ self.logger.info("🗑️ Checkpoint cleared - starting fresh")
321
+
322
+ except Exception as e:
323
+ self.logger.warning(f"⚠️ Failed to clear checkpoint: {e}")
324
+
325
+ def get_cached_collection_function(self, func):
326
+ """
327
+ Simple wrapper for collection function (joblib caching removed).
328
+
329
+ Args:
330
+ func: Function to wrap (deterministic functions recommended)
331
+
332
+ Returns:
333
+ Original function (no caching applied)
334
+ """
335
+ # Return original function - caching removed for simplicity
336
+ return func
337
+
338
+ def cleanup_old_sessions(self, max_age_days: int = 7) -> None:
339
+ """Clean up old checkpoint sessions."""
340
+ try:
341
+ cutoff_time = datetime.now().timestamp() - (max_age_days * 24 * 3600)
342
+
343
+ for checkpoint_file in self.cache_dir.glob("session_*.json"):
344
+ if checkpoint_file.stat().st_mtime < cutoff_time:
345
+ checkpoint_file.unlink()
346
+ self.logger.debug(f"🗑️ Cleaned up old session: {checkpoint_file.name}")
347
+
348
+ except Exception as e:
349
+ self.logger.warning(f"⚠️ Failed to cleanup old sessions: {e}")
350
+
351
+ def get_progress_summary(self) -> Dict[str, Any]:
352
+ """Get current progress summary for display."""
353
+ return {
354
+ "session_id": self.session_id,
355
+ "completed_symbols": len(self.progress_data.get("symbols_completed", [])),
356
+ "symbols_in_progress": len(self.progress_data.get("symbols_in_progress", {})),
357
+ "total_datasets": self.progress_data.get("total_datasets_collected", 0),
358
+ "last_updated": self.progress_data.get("last_updated"),
359
+ "errors": len(self.progress_data.get("errors", [])),
360
+ }
361
+
362
+ def export_progress_report(self, output_file: Optional[Path] = None) -> Path:
363
+ """Export detailed progress report for analysis."""
364
+ if output_file is None:
365
+ output_file = self.cache_dir / f"progress_report_{self.session_id}.json"
366
+
367
+ report = {
368
+ "progress_summary": self.get_progress_summary(),
369
+ "detailed_progress": self.progress_data,
370
+ "cache_info": {
371
+ "cache_dir": str(self.cache_dir),
372
+ "cache_size_mb": sum(
373
+ f.stat().st_size for f in self.cache_dir.rglob("*") if f.is_file()
374
+ )
375
+ / (1024 * 1024),
376
+ },
377
+ }
378
+
379
+ with open(output_file, "w") as f:
380
+ json.dump(report, f, indent=2, default=str)
381
+
382
+ self.logger.info(f"📊 Progress report exported: {output_file}")
383
+ return output_file
@@ -0,0 +1,29 @@
1
+ """Utility modules for gapless-crypto-data."""
2
+
3
+ from .error_handling import (
4
+ DataCollectionError,
5
+ FileOperationError,
6
+ GapFillingError,
7
+ GaplessCryptoError,
8
+ ValidationError,
9
+ format_user_error,
10
+ format_user_warning,
11
+ get_standard_logger,
12
+ handle_operation_error,
13
+ safe_operation,
14
+ validate_file_path,
15
+ )
16
+
17
+ __all__ = [
18
+ "GaplessCryptoError",
19
+ "DataCollectionError",
20
+ "GapFillingError",
21
+ "FileOperationError",
22
+ "ValidationError",
23
+ "get_standard_logger",
24
+ "handle_operation_error",
25
+ "safe_operation",
26
+ "validate_file_path",
27
+ "format_user_error",
28
+ "format_user_warning",
29
+ ]
@@ -0,0 +1,202 @@
1
+ """
2
+ Standardized error handling framework for gapless-crypto-data.
3
+
4
+ Provides consistent exception handling, logging, and error reporting across all modules.
5
+ Eliminates duplicate error handling patterns and ensures consistent debugging experience.
6
+ """
7
+
8
+ import logging
9
+ import traceback
10
+ from pathlib import Path
11
+ from typing import Any, Callable, Dict, Optional, Union
12
+
13
+
14
+ class GaplessCryptoError(Exception):
15
+ """Base exception for all gapless-crypto-data errors."""
16
+
17
+ def __init__(self, message: str, context: Optional[Dict[str, Any]] = None):
18
+ super().__init__(message)
19
+ self.context = context or {}
20
+
21
+
22
+ class DataCollectionError(GaplessCryptoError):
23
+ """Errors during data collection from Binance."""
24
+
25
+ pass
26
+
27
+
28
+ class GapFillingError(GaplessCryptoError):
29
+ """Errors during gap detection or filling operations."""
30
+
31
+ pass
32
+
33
+
34
+ class FileOperationError(GaplessCryptoError):
35
+ """Errors during file I/O operations."""
36
+
37
+ pass
38
+
39
+
40
+ class ValidationError(GaplessCryptoError):
41
+ """Errors during data validation."""
42
+
43
+ pass
44
+
45
+
46
+ def get_standard_logger(module_name: str) -> logging.Logger:
47
+ """Get standardized logger for consistent formatting across modules."""
48
+ logger = logging.getLogger(f"gapless_crypto_clickhouse.{module_name}")
49
+
50
+ if not logger.handlers:
51
+ handler = logging.StreamHandler()
52
+ formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
53
+ handler.setFormatter(formatter)
54
+ logger.addHandler(handler)
55
+ logger.setLevel(logging.INFO)
56
+
57
+ return logger
58
+
59
+
60
+ def handle_operation_error(
61
+ operation_name: str,
62
+ exception: Exception,
63
+ context: Optional[Dict[str, Any]] = None,
64
+ logger: Optional[logging.Logger] = None,
65
+ reraise: bool = False,
66
+ default_return: Any = None,
67
+ ) -> Any:
68
+ """
69
+ Standardized error handling for operations.
70
+
71
+ Args:
72
+ operation_name: Human-readable operation description
73
+ exception: The caught exception
74
+ context: Additional context for debugging
75
+ logger: Logger instance (uses default if None)
76
+ reraise: Whether to re-raise the exception after logging
77
+ default_return: Value to return if not re-raising
78
+
79
+ Returns:
80
+ default_return value, or re-raises if reraise=True
81
+ """
82
+ if logger is None:
83
+ logger = get_standard_logger("error_handler")
84
+
85
+ # Format context information
86
+ context_str = ""
87
+ if context:
88
+ context_items = [f"{k}={v}" for k, v in context.items()]
89
+ context_str = f" (Context: {', '.join(context_items)})"
90
+
91
+ # Log the error with standard format
92
+ error_msg = f"❌ {operation_name} failed: {str(exception)}{context_str}"
93
+ logger.error(error_msg)
94
+
95
+ # Optionally log full traceback for debugging
96
+ if logger.isEnabledFor(logging.DEBUG):
97
+ logger.debug(f"Full traceback: {traceback.format_exc()}")
98
+
99
+ if reraise:
100
+ raise
101
+
102
+ return default_return
103
+
104
+
105
+ def safe_operation(
106
+ operation_name: str,
107
+ func: Callable,
108
+ context: Optional[Dict[str, Any]] = None,
109
+ logger: Optional[logging.Logger] = None,
110
+ exception_types: tuple = (Exception,),
111
+ default_return: Any = None,
112
+ reraise: bool = False,
113
+ ) -> Any:
114
+ """
115
+ Execute operation with standardized error handling.
116
+
117
+ Args:
118
+ operation_name: Human-readable operation description
119
+ func: Function to execute
120
+ context: Additional context for debugging
121
+ logger: Logger instance (uses default if None)
122
+ exception_types: Tuple of exception types to catch
123
+ default_return: Value to return on error
124
+ reraise: Whether to re-raise caught exceptions
125
+
126
+ Returns:
127
+ Function result or default_return on error
128
+ """
129
+ try:
130
+ return func()
131
+ except exception_types as e:
132
+ return handle_operation_error(
133
+ operation_name=operation_name,
134
+ exception=e,
135
+ context=context,
136
+ logger=logger,
137
+ reraise=reraise,
138
+ default_return=default_return,
139
+ )
140
+
141
+
142
+ def validate_file_path(file_path: Union[str, Path], operation: str = "file operation") -> Path:
143
+ """
144
+ Validate file path with standardized error handling.
145
+
146
+ Args:
147
+ file_path: Path to validate
148
+ operation: Operation description for error messages
149
+
150
+ Returns:
151
+ Validated Path object
152
+
153
+ Raises:
154
+ FileOperationError: If path is invalid
155
+ """
156
+ try:
157
+ path = Path(file_path)
158
+ if not path.exists():
159
+ raise FileOperationError(
160
+ f"File not found: {path}", context={"operation": operation, "path": str(path)}
161
+ )
162
+ return path
163
+ except Exception as e:
164
+ if isinstance(e, FileOperationError):
165
+ raise
166
+ raise FileOperationError(
167
+ f"Invalid file path: {file_path}", context={"operation": operation, "error": str(e)}
168
+ )
169
+
170
+
171
+ def format_user_error(message: str, suggestion: Optional[str] = None) -> str:
172
+ """
173
+ Format user-facing error message with consistent styling.
174
+
175
+ Args:
176
+ message: Error message
177
+ suggestion: Optional suggestion for resolution
178
+
179
+ Returns:
180
+ Formatted error message
181
+ """
182
+ formatted = f"❌ ERROR: {message}"
183
+ if suggestion:
184
+ formatted += f"\n💡 SUGGESTION: {suggestion}"
185
+ return formatted
186
+
187
+
188
+ def format_user_warning(message: str, suggestion: Optional[str] = None) -> str:
189
+ """
190
+ Format user-facing warning message with consistent styling.
191
+
192
+ Args:
193
+ message: Warning message
194
+ suggestion: Optional suggestion for resolution
195
+
196
+ Returns:
197
+ Formatted warning message
198
+ """
199
+ formatted = f"⚠️ WARNING: {message}"
200
+ if suggestion:
201
+ formatted += f"\n💡 SUGGESTION: {suggestion}"
202
+ return formatted