gapless-crypto-clickhouse 7.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. gapless_crypto_clickhouse/__init__.py +147 -0
  2. gapless_crypto_clickhouse/__probe__.py +349 -0
  3. gapless_crypto_clickhouse/api.py +1032 -0
  4. gapless_crypto_clickhouse/clickhouse/__init__.py +17 -0
  5. gapless_crypto_clickhouse/clickhouse/config.py +119 -0
  6. gapless_crypto_clickhouse/clickhouse/connection.py +269 -0
  7. gapless_crypto_clickhouse/clickhouse/schema.sql +98 -0
  8. gapless_crypto_clickhouse/clickhouse/schema_validator.py +312 -0
  9. gapless_crypto_clickhouse/clickhouse_query.py +642 -0
  10. gapless_crypto_clickhouse/collectors/__init__.py +21 -0
  11. gapless_crypto_clickhouse/collectors/binance_public_data_collector.py +1994 -0
  12. gapless_crypto_clickhouse/collectors/clickhouse_bulk_loader.py +446 -0
  13. gapless_crypto_clickhouse/collectors/concurrent_collection_orchestrator.py +407 -0
  14. gapless_crypto_clickhouse/collectors/csv_format_detector.py +123 -0
  15. gapless_crypto_clickhouse/collectors/httpx_downloader.py +395 -0
  16. gapless_crypto_clickhouse/collectors/hybrid_url_generator.py +316 -0
  17. gapless_crypto_clickhouse/exceptions.py +145 -0
  18. gapless_crypto_clickhouse/gap_filling/__init__.py +1 -0
  19. gapless_crypto_clickhouse/gap_filling/safe_file_operations.py +439 -0
  20. gapless_crypto_clickhouse/gap_filling/universal_gap_filler.py +757 -0
  21. gapless_crypto_clickhouse/llms.txt +268 -0
  22. gapless_crypto_clickhouse/probe.py +235 -0
  23. gapless_crypto_clickhouse/py.typed +0 -0
  24. gapless_crypto_clickhouse/query_api.py +374 -0
  25. gapless_crypto_clickhouse/resume/__init__.py +12 -0
  26. gapless_crypto_clickhouse/resume/intelligent_checkpointing.py +383 -0
  27. gapless_crypto_clickhouse/utils/__init__.py +29 -0
  28. gapless_crypto_clickhouse/utils/error_handling.py +202 -0
  29. gapless_crypto_clickhouse/utils/etag_cache.py +194 -0
  30. gapless_crypto_clickhouse/utils/timeframe_constants.py +90 -0
  31. gapless_crypto_clickhouse/utils/timestamp_format_analyzer.py +256 -0
  32. gapless_crypto_clickhouse/utils/timestamp_utils.py +130 -0
  33. gapless_crypto_clickhouse/validation/__init__.py +36 -0
  34. gapless_crypto_clickhouse/validation/csv_validator.py +677 -0
  35. gapless_crypto_clickhouse/validation/models.py +220 -0
  36. gapless_crypto_clickhouse/validation/storage.py +502 -0
  37. gapless_crypto_clickhouse-7.1.0.dist-info/METADATA +1277 -0
  38. gapless_crypto_clickhouse-7.1.0.dist-info/RECORD +40 -0
  39. gapless_crypto_clickhouse-7.1.0.dist-info/WHEEL +4 -0
  40. gapless_crypto_clickhouse-7.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,220 @@
1
+ """Pydantic models for validation report persistence.
2
+
3
+ This module provides type-safe data models for validation reports with
4
+ OpenAPI 3.1.1 compatibility for AI coding agent consumption.
5
+ """
6
+
7
+ from datetime import datetime
8
+ from typing import Any, Dict, Optional
9
+
10
+ from pydantic import BaseModel, ConfigDict, Field
11
+
12
+
13
+ class ValidationReport(BaseModel):
14
+ """Structured validation report with full observability.
15
+
16
+ This model provides type-safe representation of CSV validation results
17
+ with automatic schema generation for OpenAPI/JSON Schema compliance.
18
+
19
+ Examples:
20
+ >>> from datetime import datetime, timezone
21
+ >>> report = ValidationReport(
22
+ ... validation_timestamp=datetime.now(timezone.utc),
23
+ ... file_path="/path/to/BTCUSDT-1h.csv",
24
+ ... file_size_mb=15.3,
25
+ ... total_bars=8760,
26
+ ... total_errors=0,
27
+ ... total_warnings=2,
28
+ ... validation_summary="GOOD - 2 warnings",
29
+ ... validation_duration_ms=123.45,
30
+ ... structure_validation={},
31
+ ... datetime_validation={},
32
+ ... ohlcv_validation={},
33
+ ... coverage_validation={},
34
+ ... anomaly_validation={}
35
+ ... )
36
+ >>> report.model_dump_json()
37
+ """
38
+
39
+ model_config = ConfigDict(
40
+ json_schema_extra={
41
+ "example": {
42
+ "validation_timestamp": "2025-10-18T12:00:00Z",
43
+ "file_path": "/data/BTCUSDT-1h.csv",
44
+ "validator_version": "3.3.0",
45
+ "total_errors": 0,
46
+ "total_warnings": 2,
47
+ "validation_summary": "GOOD - 2 warnings",
48
+ }
49
+ }
50
+ )
51
+
52
+ # Metadata
53
+ validation_timestamp: datetime = Field(
54
+ description="ISO 8601 validation timestamp with timezone"
55
+ )
56
+ file_path: str = Field(description="Absolute path to validated CSV file")
57
+ file_size_mb: float = Field(description="File size in megabytes", ge=0)
58
+ validator_version: str = Field(default="3.3.0", description="Validator version (SemVer)")
59
+
60
+ # Extracted context from file path
61
+ symbol: Optional[str] = Field(
62
+ default=None,
63
+ description="Trading pair symbol extracted from filename (e.g., BTCUSDT)",
64
+ )
65
+ timeframe: Optional[str] = Field(
66
+ default=None, description="Timeframe extracted from filename (e.g., 1h)"
67
+ )
68
+
69
+ # Core Results
70
+ total_bars: int = Field(description="Total number of data bars validated", ge=0)
71
+ total_errors: int = Field(description="Total validation errors detected", ge=0)
72
+ total_warnings: int = Field(description="Total validation warnings detected", ge=0)
73
+ validation_summary: str = Field(description="Summary status: PERFECT | GOOD | FAILED")
74
+
75
+ # Performance Metrics
76
+ validation_duration_ms: float = Field(description="Validation duration in milliseconds", ge=0)
77
+
78
+ # Layer Results (detailed validation results as JSON)
79
+ structure_validation: Dict[str, Any] = Field(
80
+ description="Layer 1: Structure validation results"
81
+ )
82
+ datetime_validation: Dict[str, Any] = Field(description="Layer 2: DateTime validation results")
83
+ ohlcv_validation: Dict[str, Any] = Field(
84
+ description="Layer 3: OHLCV quality validation results"
85
+ )
86
+ coverage_validation: Dict[str, Any] = Field(description="Layer 4: Coverage validation results")
87
+ anomaly_validation: Dict[str, Any] = Field(description="Layer 5: Anomaly detection results")
88
+
89
+ # Flattened metrics for efficient querying (extracted from layer results)
90
+ date_range_start: Optional[datetime] = Field(
91
+ default=None, description="Start of data date range"
92
+ )
93
+ date_range_end: Optional[datetime] = Field(default=None, description="End of data date range")
94
+ duration_days: Optional[float] = Field(default=None, description="Duration of data in days")
95
+ gaps_found: Optional[int] = Field(default=None, description="Number of timestamp gaps detected")
96
+ chronological_order: Optional[bool] = Field(
97
+ default=None, description="Whether timestamps are chronologically ordered"
98
+ )
99
+
100
+ price_min: Optional[float] = Field(default=None, description="Minimum price value")
101
+ price_max: Optional[float] = Field(default=None, description="Maximum price value")
102
+ volume_min: Optional[float] = Field(default=None, description="Minimum volume")
103
+ volume_max: Optional[float] = Field(default=None, description="Maximum volume")
104
+ volume_mean: Optional[float] = Field(default=None, description="Mean volume")
105
+ ohlc_errors: Optional[int] = Field(default=None, description="Number of OHLC logic errors")
106
+ negative_zero_values: Optional[int] = Field(
107
+ default=None, description="Count of negative or zero price values"
108
+ )
109
+
110
+ expected_bars: Optional[int] = Field(default=None, description="Expected number of bars")
111
+ actual_bars: Optional[int] = Field(default=None, description="Actual number of bars")
112
+ coverage_percentage: Optional[float] = Field(
113
+ default=None, description="Coverage percentage (actual/expected * 100)"
114
+ )
115
+
116
+ price_outliers: Optional[int] = Field(
117
+ default=None, description="Number of price outliers detected"
118
+ )
119
+ volume_outliers: Optional[int] = Field(
120
+ default=None, description="Number of volume outliers detected"
121
+ )
122
+ suspicious_patterns: Optional[int] = Field(
123
+ default=None, description="Number of suspicious patterns detected"
124
+ )
125
+
126
+ @classmethod
127
+ def from_legacy_dict(
128
+ cls,
129
+ legacy: Dict[str, Any],
130
+ duration_ms: float = 0,
131
+ symbol: Optional[str] = None,
132
+ timeframe: Optional[str] = None,
133
+ ) -> "ValidationReport":
134
+ """Convert legacy dict-based validation results to typed report.
135
+
136
+ Args:
137
+ legacy: Legacy validation results dictionary from CSVValidator
138
+ duration_ms: Validation duration in milliseconds
139
+ symbol: Optional trading pair symbol (extracted from filename)
140
+ timeframe: Optional timeframe (extracted from filename)
141
+
142
+ Returns:
143
+ Typed ValidationReport instance
144
+
145
+ Examples:
146
+ >>> legacy_results = {
147
+ ... "validation_timestamp": "2025-10-18T12:00:00Z",
148
+ ... "file_path": "/data/BTCUSDT-1h.csv",
149
+ ... "total_errors": 0,
150
+ ... "total_warnings": 2,
151
+ ... # ... more fields
152
+ ... }
153
+ >>> report = ValidationReport.from_legacy_dict(
154
+ ... legacy_results,
155
+ ... duration_ms=123.45,
156
+ ... symbol="BTCUSDT",
157
+ ... timeframe="1h"
158
+ ... )
159
+ """
160
+ # Parse datetime if string
161
+ validation_ts = legacy["validation_timestamp"]
162
+ if isinstance(validation_ts, str):
163
+ validation_ts = datetime.fromisoformat(validation_ts.rstrip("Z"))
164
+
165
+ # Extract flattened metrics from layer results
166
+ datetime_val = legacy.get("datetime_validation", {})
167
+ ohlcv_val = legacy.get("ohlcv_validation", {})
168
+ coverage_val = legacy.get("coverage_validation", {})
169
+ anomaly_val = legacy.get("anomaly_validation", {})
170
+
171
+ # Parse date range timestamps
172
+ date_range = datetime_val.get("date_range", {})
173
+ date_range_start = None
174
+ date_range_end = None
175
+ if date_range:
176
+ if "start" in date_range:
177
+ date_range_start = datetime.fromisoformat(date_range["start"].rstrip("Z"))
178
+ if "end" in date_range:
179
+ date_range_end = datetime.fromisoformat(date_range["end"].rstrip("Z"))
180
+
181
+ # Extract price range
182
+ price_range = ohlcv_val.get("price_range", {})
183
+ volume_stats = ohlcv_val.get("volume_stats", {})
184
+
185
+ return cls(
186
+ validation_timestamp=validation_ts,
187
+ file_path=legacy["file_path"],
188
+ file_size_mb=legacy.get("file_size_mb", 0.0),
189
+ symbol=symbol,
190
+ timeframe=timeframe,
191
+ total_bars=legacy.get("total_bars", 0),
192
+ total_errors=legacy["total_errors"],
193
+ total_warnings=legacy["total_warnings"],
194
+ validation_summary=legacy["validation_summary"],
195
+ validation_duration_ms=duration_ms,
196
+ structure_validation=legacy.get("structure_validation", {}),
197
+ datetime_validation=datetime_val,
198
+ ohlcv_validation=ohlcv_val,
199
+ coverage_validation=coverage_val,
200
+ anomaly_validation=anomaly_val,
201
+ # Flattened metrics for SQL queries
202
+ date_range_start=date_range_start,
203
+ date_range_end=date_range_end,
204
+ duration_days=datetime_val.get("duration_days"),
205
+ gaps_found=datetime_val.get("gaps_found"),
206
+ chronological_order=datetime_val.get("chronological_order"),
207
+ price_min=price_range.get("min"),
208
+ price_max=price_range.get("max"),
209
+ volume_min=volume_stats.get("min"),
210
+ volume_max=volume_stats.get("max"),
211
+ volume_mean=volume_stats.get("mean"),
212
+ ohlc_errors=ohlcv_val.get("ohlc_errors"),
213
+ negative_zero_values=ohlcv_val.get("negative_zero_values"),
214
+ expected_bars=coverage_val.get("expected_bars"),
215
+ actual_bars=coverage_val.get("actual_bars"),
216
+ coverage_percentage=coverage_val.get("coverage_percentage"),
217
+ price_outliers=anomaly_val.get("price_outliers"),
218
+ volume_outliers=anomaly_val.get("volume_outliers"),
219
+ suspicious_patterns=anomaly_val.get("suspicious_patterns"),
220
+ )