gapless-crypto-clickhouse 7.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gapless_crypto_clickhouse/__init__.py +147 -0
- gapless_crypto_clickhouse/__probe__.py +349 -0
- gapless_crypto_clickhouse/api.py +1032 -0
- gapless_crypto_clickhouse/clickhouse/__init__.py +17 -0
- gapless_crypto_clickhouse/clickhouse/config.py +119 -0
- gapless_crypto_clickhouse/clickhouse/connection.py +269 -0
- gapless_crypto_clickhouse/clickhouse/schema.sql +98 -0
- gapless_crypto_clickhouse/clickhouse/schema_validator.py +312 -0
- gapless_crypto_clickhouse/clickhouse_query.py +642 -0
- gapless_crypto_clickhouse/collectors/__init__.py +21 -0
- gapless_crypto_clickhouse/collectors/binance_public_data_collector.py +1994 -0
- gapless_crypto_clickhouse/collectors/clickhouse_bulk_loader.py +446 -0
- gapless_crypto_clickhouse/collectors/concurrent_collection_orchestrator.py +407 -0
- gapless_crypto_clickhouse/collectors/csv_format_detector.py +123 -0
- gapless_crypto_clickhouse/collectors/httpx_downloader.py +395 -0
- gapless_crypto_clickhouse/collectors/hybrid_url_generator.py +316 -0
- gapless_crypto_clickhouse/exceptions.py +145 -0
- gapless_crypto_clickhouse/gap_filling/__init__.py +1 -0
- gapless_crypto_clickhouse/gap_filling/safe_file_operations.py +439 -0
- gapless_crypto_clickhouse/gap_filling/universal_gap_filler.py +757 -0
- gapless_crypto_clickhouse/llms.txt +268 -0
- gapless_crypto_clickhouse/probe.py +235 -0
- gapless_crypto_clickhouse/py.typed +0 -0
- gapless_crypto_clickhouse/query_api.py +374 -0
- gapless_crypto_clickhouse/resume/__init__.py +12 -0
- gapless_crypto_clickhouse/resume/intelligent_checkpointing.py +383 -0
- gapless_crypto_clickhouse/utils/__init__.py +29 -0
- gapless_crypto_clickhouse/utils/error_handling.py +202 -0
- gapless_crypto_clickhouse/utils/etag_cache.py +194 -0
- gapless_crypto_clickhouse/utils/timeframe_constants.py +90 -0
- gapless_crypto_clickhouse/utils/timestamp_format_analyzer.py +256 -0
- gapless_crypto_clickhouse/utils/timestamp_utils.py +130 -0
- gapless_crypto_clickhouse/validation/__init__.py +36 -0
- gapless_crypto_clickhouse/validation/csv_validator.py +677 -0
- gapless_crypto_clickhouse/validation/models.py +220 -0
- gapless_crypto_clickhouse/validation/storage.py +502 -0
- gapless_crypto_clickhouse-7.1.0.dist-info/METADATA +1277 -0
- gapless_crypto_clickhouse-7.1.0.dist-info/RECORD +40 -0
- gapless_crypto_clickhouse-7.1.0.dist-info/WHEEL +4 -0
- gapless_crypto_clickhouse-7.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
"""Pydantic models for validation report persistence.
|
|
2
|
+
|
|
3
|
+
This module provides type-safe data models for validation reports with
|
|
4
|
+
OpenAPI 3.1.1 compatibility for AI coding agent consumption.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from typing import Any, Dict, Optional
|
|
9
|
+
|
|
10
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ValidationReport(BaseModel):
|
|
14
|
+
"""Structured validation report with full observability.
|
|
15
|
+
|
|
16
|
+
This model provides type-safe representation of CSV validation results
|
|
17
|
+
with automatic schema generation for OpenAPI/JSON Schema compliance.
|
|
18
|
+
|
|
19
|
+
Examples:
|
|
20
|
+
>>> from datetime import datetime, timezone
|
|
21
|
+
>>> report = ValidationReport(
|
|
22
|
+
... validation_timestamp=datetime.now(timezone.utc),
|
|
23
|
+
... file_path="/path/to/BTCUSDT-1h.csv",
|
|
24
|
+
... file_size_mb=15.3,
|
|
25
|
+
... total_bars=8760,
|
|
26
|
+
... total_errors=0,
|
|
27
|
+
... total_warnings=2,
|
|
28
|
+
... validation_summary="GOOD - 2 warnings",
|
|
29
|
+
... validation_duration_ms=123.45,
|
|
30
|
+
... structure_validation={},
|
|
31
|
+
... datetime_validation={},
|
|
32
|
+
... ohlcv_validation={},
|
|
33
|
+
... coverage_validation={},
|
|
34
|
+
... anomaly_validation={}
|
|
35
|
+
... )
|
|
36
|
+
>>> report.model_dump_json()
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
model_config = ConfigDict(
|
|
40
|
+
json_schema_extra={
|
|
41
|
+
"example": {
|
|
42
|
+
"validation_timestamp": "2025-10-18T12:00:00Z",
|
|
43
|
+
"file_path": "/data/BTCUSDT-1h.csv",
|
|
44
|
+
"validator_version": "3.3.0",
|
|
45
|
+
"total_errors": 0,
|
|
46
|
+
"total_warnings": 2,
|
|
47
|
+
"validation_summary": "GOOD - 2 warnings",
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
# Metadata
|
|
53
|
+
validation_timestamp: datetime = Field(
|
|
54
|
+
description="ISO 8601 validation timestamp with timezone"
|
|
55
|
+
)
|
|
56
|
+
file_path: str = Field(description="Absolute path to validated CSV file")
|
|
57
|
+
file_size_mb: float = Field(description="File size in megabytes", ge=0)
|
|
58
|
+
validator_version: str = Field(default="3.3.0", description="Validator version (SemVer)")
|
|
59
|
+
|
|
60
|
+
# Extracted context from file path
|
|
61
|
+
symbol: Optional[str] = Field(
|
|
62
|
+
default=None,
|
|
63
|
+
description="Trading pair symbol extracted from filename (e.g., BTCUSDT)",
|
|
64
|
+
)
|
|
65
|
+
timeframe: Optional[str] = Field(
|
|
66
|
+
default=None, description="Timeframe extracted from filename (e.g., 1h)"
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# Core Results
|
|
70
|
+
total_bars: int = Field(description="Total number of data bars validated", ge=0)
|
|
71
|
+
total_errors: int = Field(description="Total validation errors detected", ge=0)
|
|
72
|
+
total_warnings: int = Field(description="Total validation warnings detected", ge=0)
|
|
73
|
+
validation_summary: str = Field(description="Summary status: PERFECT | GOOD | FAILED")
|
|
74
|
+
|
|
75
|
+
# Performance Metrics
|
|
76
|
+
validation_duration_ms: float = Field(description="Validation duration in milliseconds", ge=0)
|
|
77
|
+
|
|
78
|
+
# Layer Results (detailed validation results as JSON)
|
|
79
|
+
structure_validation: Dict[str, Any] = Field(
|
|
80
|
+
description="Layer 1: Structure validation results"
|
|
81
|
+
)
|
|
82
|
+
datetime_validation: Dict[str, Any] = Field(description="Layer 2: DateTime validation results")
|
|
83
|
+
ohlcv_validation: Dict[str, Any] = Field(
|
|
84
|
+
description="Layer 3: OHLCV quality validation results"
|
|
85
|
+
)
|
|
86
|
+
coverage_validation: Dict[str, Any] = Field(description="Layer 4: Coverage validation results")
|
|
87
|
+
anomaly_validation: Dict[str, Any] = Field(description="Layer 5: Anomaly detection results")
|
|
88
|
+
|
|
89
|
+
# Flattened metrics for efficient querying (extracted from layer results)
|
|
90
|
+
date_range_start: Optional[datetime] = Field(
|
|
91
|
+
default=None, description="Start of data date range"
|
|
92
|
+
)
|
|
93
|
+
date_range_end: Optional[datetime] = Field(default=None, description="End of data date range")
|
|
94
|
+
duration_days: Optional[float] = Field(default=None, description="Duration of data in days")
|
|
95
|
+
gaps_found: Optional[int] = Field(default=None, description="Number of timestamp gaps detected")
|
|
96
|
+
chronological_order: Optional[bool] = Field(
|
|
97
|
+
default=None, description="Whether timestamps are chronologically ordered"
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
price_min: Optional[float] = Field(default=None, description="Minimum price value")
|
|
101
|
+
price_max: Optional[float] = Field(default=None, description="Maximum price value")
|
|
102
|
+
volume_min: Optional[float] = Field(default=None, description="Minimum volume")
|
|
103
|
+
volume_max: Optional[float] = Field(default=None, description="Maximum volume")
|
|
104
|
+
volume_mean: Optional[float] = Field(default=None, description="Mean volume")
|
|
105
|
+
ohlc_errors: Optional[int] = Field(default=None, description="Number of OHLC logic errors")
|
|
106
|
+
negative_zero_values: Optional[int] = Field(
|
|
107
|
+
default=None, description="Count of negative or zero price values"
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
expected_bars: Optional[int] = Field(default=None, description="Expected number of bars")
|
|
111
|
+
actual_bars: Optional[int] = Field(default=None, description="Actual number of bars")
|
|
112
|
+
coverage_percentage: Optional[float] = Field(
|
|
113
|
+
default=None, description="Coverage percentage (actual/expected * 100)"
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
price_outliers: Optional[int] = Field(
|
|
117
|
+
default=None, description="Number of price outliers detected"
|
|
118
|
+
)
|
|
119
|
+
volume_outliers: Optional[int] = Field(
|
|
120
|
+
default=None, description="Number of volume outliers detected"
|
|
121
|
+
)
|
|
122
|
+
suspicious_patterns: Optional[int] = Field(
|
|
123
|
+
default=None, description="Number of suspicious patterns detected"
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
@classmethod
|
|
127
|
+
def from_legacy_dict(
|
|
128
|
+
cls,
|
|
129
|
+
legacy: Dict[str, Any],
|
|
130
|
+
duration_ms: float = 0,
|
|
131
|
+
symbol: Optional[str] = None,
|
|
132
|
+
timeframe: Optional[str] = None,
|
|
133
|
+
) -> "ValidationReport":
|
|
134
|
+
"""Convert legacy dict-based validation results to typed report.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
legacy: Legacy validation results dictionary from CSVValidator
|
|
138
|
+
duration_ms: Validation duration in milliseconds
|
|
139
|
+
symbol: Optional trading pair symbol (extracted from filename)
|
|
140
|
+
timeframe: Optional timeframe (extracted from filename)
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
Typed ValidationReport instance
|
|
144
|
+
|
|
145
|
+
Examples:
|
|
146
|
+
>>> legacy_results = {
|
|
147
|
+
... "validation_timestamp": "2025-10-18T12:00:00Z",
|
|
148
|
+
... "file_path": "/data/BTCUSDT-1h.csv",
|
|
149
|
+
... "total_errors": 0,
|
|
150
|
+
... "total_warnings": 2,
|
|
151
|
+
... # ... more fields
|
|
152
|
+
... }
|
|
153
|
+
>>> report = ValidationReport.from_legacy_dict(
|
|
154
|
+
... legacy_results,
|
|
155
|
+
... duration_ms=123.45,
|
|
156
|
+
... symbol="BTCUSDT",
|
|
157
|
+
... timeframe="1h"
|
|
158
|
+
... )
|
|
159
|
+
"""
|
|
160
|
+
# Parse datetime if string
|
|
161
|
+
validation_ts = legacy["validation_timestamp"]
|
|
162
|
+
if isinstance(validation_ts, str):
|
|
163
|
+
validation_ts = datetime.fromisoformat(validation_ts.rstrip("Z"))
|
|
164
|
+
|
|
165
|
+
# Extract flattened metrics from layer results
|
|
166
|
+
datetime_val = legacy.get("datetime_validation", {})
|
|
167
|
+
ohlcv_val = legacy.get("ohlcv_validation", {})
|
|
168
|
+
coverage_val = legacy.get("coverage_validation", {})
|
|
169
|
+
anomaly_val = legacy.get("anomaly_validation", {})
|
|
170
|
+
|
|
171
|
+
# Parse date range timestamps
|
|
172
|
+
date_range = datetime_val.get("date_range", {})
|
|
173
|
+
date_range_start = None
|
|
174
|
+
date_range_end = None
|
|
175
|
+
if date_range:
|
|
176
|
+
if "start" in date_range:
|
|
177
|
+
date_range_start = datetime.fromisoformat(date_range["start"].rstrip("Z"))
|
|
178
|
+
if "end" in date_range:
|
|
179
|
+
date_range_end = datetime.fromisoformat(date_range["end"].rstrip("Z"))
|
|
180
|
+
|
|
181
|
+
# Extract price range
|
|
182
|
+
price_range = ohlcv_val.get("price_range", {})
|
|
183
|
+
volume_stats = ohlcv_val.get("volume_stats", {})
|
|
184
|
+
|
|
185
|
+
return cls(
|
|
186
|
+
validation_timestamp=validation_ts,
|
|
187
|
+
file_path=legacy["file_path"],
|
|
188
|
+
file_size_mb=legacy.get("file_size_mb", 0.0),
|
|
189
|
+
symbol=symbol,
|
|
190
|
+
timeframe=timeframe,
|
|
191
|
+
total_bars=legacy.get("total_bars", 0),
|
|
192
|
+
total_errors=legacy["total_errors"],
|
|
193
|
+
total_warnings=legacy["total_warnings"],
|
|
194
|
+
validation_summary=legacy["validation_summary"],
|
|
195
|
+
validation_duration_ms=duration_ms,
|
|
196
|
+
structure_validation=legacy.get("structure_validation", {}),
|
|
197
|
+
datetime_validation=datetime_val,
|
|
198
|
+
ohlcv_validation=ohlcv_val,
|
|
199
|
+
coverage_validation=coverage_val,
|
|
200
|
+
anomaly_validation=anomaly_val,
|
|
201
|
+
# Flattened metrics for SQL queries
|
|
202
|
+
date_range_start=date_range_start,
|
|
203
|
+
date_range_end=date_range_end,
|
|
204
|
+
duration_days=datetime_val.get("duration_days"),
|
|
205
|
+
gaps_found=datetime_val.get("gaps_found"),
|
|
206
|
+
chronological_order=datetime_val.get("chronological_order"),
|
|
207
|
+
price_min=price_range.get("min"),
|
|
208
|
+
price_max=price_range.get("max"),
|
|
209
|
+
volume_min=volume_stats.get("min"),
|
|
210
|
+
volume_max=volume_stats.get("max"),
|
|
211
|
+
volume_mean=volume_stats.get("mean"),
|
|
212
|
+
ohlc_errors=ohlcv_val.get("ohlc_errors"),
|
|
213
|
+
negative_zero_values=ohlcv_val.get("negative_zero_values"),
|
|
214
|
+
expected_bars=coverage_val.get("expected_bars"),
|
|
215
|
+
actual_bars=coverage_val.get("actual_bars"),
|
|
216
|
+
coverage_percentage=coverage_val.get("coverage_percentage"),
|
|
217
|
+
price_outliers=anomaly_val.get("price_outliers"),
|
|
218
|
+
volume_outliers=anomaly_val.get("volume_outliers"),
|
|
219
|
+
suspicious_patterns=anomaly_val.get("suspicious_patterns"),
|
|
220
|
+
)
|