rangebar 11.6.1__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. rangebar/CLAUDE.md +327 -0
  2. rangebar/__init__.py +227 -0
  3. rangebar/__init__.pyi +1089 -0
  4. rangebar/_core.cpython-313-darwin.so +0 -0
  5. rangebar/checkpoint.py +472 -0
  6. rangebar/cli.py +298 -0
  7. rangebar/clickhouse/CLAUDE.md +139 -0
  8. rangebar/clickhouse/__init__.py +100 -0
  9. rangebar/clickhouse/bulk_operations.py +309 -0
  10. rangebar/clickhouse/cache.py +734 -0
  11. rangebar/clickhouse/client.py +121 -0
  12. rangebar/clickhouse/config.py +141 -0
  13. rangebar/clickhouse/mixin.py +120 -0
  14. rangebar/clickhouse/preflight.py +504 -0
  15. rangebar/clickhouse/query_operations.py +345 -0
  16. rangebar/clickhouse/schema.sql +187 -0
  17. rangebar/clickhouse/tunnel.py +222 -0
  18. rangebar/constants.py +288 -0
  19. rangebar/conversion.py +177 -0
  20. rangebar/exceptions.py +207 -0
  21. rangebar/exness.py +364 -0
  22. rangebar/hooks.py +311 -0
  23. rangebar/logging.py +171 -0
  24. rangebar/notify/__init__.py +15 -0
  25. rangebar/notify/pushover.py +155 -0
  26. rangebar/notify/telegram.py +271 -0
  27. rangebar/orchestration/__init__.py +20 -0
  28. rangebar/orchestration/count_bounded.py +797 -0
  29. rangebar/orchestration/helpers.py +412 -0
  30. rangebar/orchestration/models.py +76 -0
  31. rangebar/orchestration/precompute.py +498 -0
  32. rangebar/orchestration/range_bars.py +736 -0
  33. rangebar/orchestration/tick_fetcher.py +226 -0
  34. rangebar/ouroboros.py +454 -0
  35. rangebar/processors/__init__.py +22 -0
  36. rangebar/processors/api.py +383 -0
  37. rangebar/processors/core.py +522 -0
  38. rangebar/resource_guard.py +567 -0
  39. rangebar/storage/__init__.py +22 -0
  40. rangebar/storage/checksum_registry.py +218 -0
  41. rangebar/storage/parquet.py +728 -0
  42. rangebar/streaming.py +300 -0
  43. rangebar/validation/__init__.py +69 -0
  44. rangebar/validation/cache_staleness.py +277 -0
  45. rangebar/validation/continuity.py +664 -0
  46. rangebar/validation/gap_classification.py +294 -0
  47. rangebar/validation/post_storage.py +317 -0
  48. rangebar/validation/tier1.py +175 -0
  49. rangebar/validation/tier2.py +261 -0
  50. rangebar-11.6.1.dist-info/METADATA +308 -0
  51. rangebar-11.6.1.dist-info/RECORD +54 -0
  52. rangebar-11.6.1.dist-info/WHEEL +4 -0
  53. rangebar-11.6.1.dist-info/entry_points.txt +2 -0
  54. rangebar-11.6.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,294 @@
1
+ # Issue #19: Gap classification extracted from continuity.py for modularization
2
+ """Gap classification types and presets for range bar validation.
3
+
4
+ This module provides the tiered gap classification system based on empirical
5
+ analysis of 30-month BTC data (Issue #19). It identifies 49 legitimate market
6
+ microstructure events and classifies gaps into severity tiers.
7
+
8
+ Gap Tiers:
9
+ - PRECISION: < 0.001% - Floating-point artifacts (always ignored)
10
+ - NOISE: 0.001% - 0.01% - Tick-level noise (logged, not flagged)
11
+ - MARKET_MOVE: 0.01% - 0.1% - Normal market movement (configurable)
12
+ - MICROSTRUCTURE: > 0.1% - Flash crashes, liquidations (warning/error)
13
+ - SESSION_BOUNDARY: > threshold*2 - Definite session break (always error)
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ from dataclasses import dataclass, field
19
+ from enum import Enum, IntEnum
20
+ from typing import Literal
21
+
22
+ from rangebar.constants import _CRYPTO_BASES, _FOREX_CURRENCIES
23
+
24
+ __all__ = [
25
+ "ASSET_CLASS_MULTIPLIERS",
26
+ "VALIDATION_PRESETS",
27
+ "AssetClass",
28
+ "GapTier",
29
+ "TierThresholds",
30
+ "ValidationPreset",
31
+ "detect_asset_class",
32
+ ]
33
+
34
+
35
+ # ============================================================================
36
+ # Gap Tier Enum
37
+ # ============================================================================
38
+
39
+
40
+ class GapTier(IntEnum):
41
+ """Gap severity classification for range bar continuity validation.
42
+
43
+ Tiers are based on empirical analysis of 30-month BTC data (Issue #19)
44
+ which identified 49 legitimate market microstructure events.
45
+
46
+ Examples
47
+ --------
48
+ >>> gap_pct = 0.05 # 0.05% gap
49
+ >>> if gap_pct < 0.00001:
50
+ ... tier = GapTier.PRECISION
51
+ >>> elif gap_pct < 0.0001:
52
+ ... tier = GapTier.NOISE
53
+ >>> elif gap_pct < 0.001:
54
+ ... tier = GapTier.MARKET_MOVE
55
+ >>> elif gap_pct < threshold * 2:
56
+ ... tier = GapTier.MICROSTRUCTURE
57
+ >>> else:
58
+ ... tier = GapTier.SESSION_BOUNDARY
59
+ """
60
+
61
+ PRECISION = 1 # < 0.001% - Floating-point artifacts (always ignored)
62
+ NOISE = 2 # 0.001% - 0.01% - Tick-level noise (logged, not flagged)
63
+ MARKET_MOVE = 3 # 0.01% - 0.1% - Normal market movement (configurable)
64
+ MICROSTRUCTURE = 4 # > 0.1% - Flash crashes, liquidations (warning/error)
65
+ SESSION_BOUNDARY = 5 # > threshold*2 - Definite session break (always error)
66
+
67
+
68
+ # ============================================================================
69
+ # Asset Class Enum
70
+ # ============================================================================
71
+
72
+
73
+ class AssetClass(Enum):
74
+ """Asset class for tolerance calibration.
75
+
76
+ Different asset classes have different typical gap magnitudes:
77
+ - Crypto: 24/7 markets, flash crashes possible, baseline tolerance
78
+ - Forex: Session-based, weekend gaps, tighter tolerance
79
+ - Equities: Overnight gaps, circuit breakers, looser tolerance
80
+
81
+ Examples
82
+ --------
83
+ >>> from rangebar import detect_asset_class, AssetClass
84
+ >>> detect_asset_class("BTCUSDT")
85
+ <AssetClass.CRYPTO: 'crypto'>
86
+ >>> detect_asset_class("EURUSD")
87
+ <AssetClass.FOREX: 'forex'>
88
+ """
89
+
90
+ CRYPTO = "crypto" # 24/7 markets, flash crashes possible
91
+ FOREX = "forex" # Session-based, weekend gaps
92
+ EQUITIES = "equities" # Overnight gaps, circuit breakers
93
+ UNKNOWN = "unknown" # Fallback to crypto defaults
94
+
95
+
96
+ # Tolerance multipliers by asset class (relative to baseline)
97
+ ASSET_CLASS_MULTIPLIERS: dict[AssetClass, float] = {
98
+ AssetClass.CRYPTO: 1.0, # Baseline
99
+ AssetClass.FOREX: 0.5, # Tighter (more stable)
100
+ AssetClass.EQUITIES: 1.5, # Looser (overnight gaps)
101
+ AssetClass.UNKNOWN: 1.0, # Default to crypto
102
+ }
103
+
104
+
105
+ def detect_asset_class(symbol: str) -> AssetClass:
106
+ """Auto-detect asset class from symbol pattern.
107
+
108
+ Detection Rules:
109
+ - Crypto: Contains common crypto bases (BTC, ETH, BNB, SOL, etc.)
110
+ or ends with USDT/BUSD/USDC
111
+ - Forex: Standard 6-char pairs (EURUSD, GBPJPY, etc.)
112
+ or commodity symbols (XAU, XAG, BRENT, WTI)
113
+ - Unknown: Fallback for unrecognized patterns
114
+
115
+ Parameters
116
+ ----------
117
+ symbol : str
118
+ Trading symbol (case-insensitive)
119
+
120
+ Returns
121
+ -------
122
+ AssetClass
123
+ Detected asset class
124
+
125
+ Examples
126
+ --------
127
+ >>> detect_asset_class("BTCUSDT")
128
+ <AssetClass.CRYPTO: 'crypto'>
129
+ >>> detect_asset_class("EURUSD")
130
+ <AssetClass.FOREX: 'forex'>
131
+ >>> detect_asset_class("AAPL")
132
+ <AssetClass.UNKNOWN: 'unknown'>
133
+ """
134
+ symbol_upper = symbol.upper()
135
+
136
+ # Crypto patterns: contains known base or ends with stablecoin
137
+ if any(base in symbol_upper for base in _CRYPTO_BASES):
138
+ return AssetClass.CRYPTO
139
+ if symbol_upper.endswith(("USDT", "BUSD", "USDC", "TUSD", "FDUSD")):
140
+ return AssetClass.CRYPTO
141
+
142
+ # Forex patterns: 6-char standard pairs (e.g., EURUSD)
143
+ forex_pair_length = 6
144
+ if len(symbol_upper) == forex_pair_length:
145
+ base, quote = symbol_upper[:3], symbol_upper[3:]
146
+ if base in _FOREX_CURRENCIES and quote in _FOREX_CURRENCIES:
147
+ return AssetClass.FOREX
148
+
149
+ # Commodities via forex brokers
150
+ if any(x in symbol_upper for x in ("XAU", "XAG", "BRENT", "WTI")):
151
+ return AssetClass.FOREX
152
+
153
+ return AssetClass.UNKNOWN
154
+
155
+
156
+ # ============================================================================
157
+ # Configuration Dataclasses
158
+ # ============================================================================
159
+
160
+
161
+ @dataclass(frozen=True)
162
+ class TierThresholds:
163
+ """Configurable boundaries between gap tiers (in percentage).
164
+
165
+ These thresholds define the boundaries for classifying gaps into tiers.
166
+ Values are percentages (e.g., 0.00001 = 0.001%).
167
+
168
+ Attributes
169
+ ----------
170
+ precision : float
171
+ Tier 1/2 boundary - gaps below this are floating-point artifacts
172
+ noise : float
173
+ Tier 2/3 boundary - gaps below this are tick-level noise
174
+ market_move : float
175
+ Tier 3/4 boundary - gaps below this are normal market movement
176
+ session_factor : float
177
+ Tier 5 multiplier - gaps > (threshold * factor) are session breaks
178
+
179
+ Examples
180
+ --------
181
+ >>> thresholds = TierThresholds()
182
+ >>> thresholds.precision
183
+ 1e-05
184
+ >>> thresholds.noise
185
+ 0.0001
186
+ """
187
+
188
+ precision: float = 0.00001 # 0.001% - Tier 1/2 boundary
189
+ noise: float = 0.0001 # 0.01% - Tier 2/3 boundary
190
+ market_move: float = 0.001 # 0.1% - Tier 3/4 boundary
191
+ session_factor: float = 2.0 # Tier 5 = threshold * factor
192
+
193
+
194
+ @dataclass(frozen=True)
195
+ class ValidationPreset:
196
+ """Immutable validation configuration preset.
197
+
198
+ Presets bundle tolerance, behavior mode, and tier thresholds into
199
+ named configurations for common use cases.
200
+
201
+ Attributes
202
+ ----------
203
+ tolerance_pct : float
204
+ Maximum gap percentage before flagging (e.g., 0.01 = 1%)
205
+ mode : Literal["error", "warn", "skip"]
206
+ Behavior on validation failure
207
+ tier_thresholds : TierThresholds
208
+ Boundaries for gap tier classification
209
+ asset_class : AssetClass | None
210
+ Override auto-detection if set
211
+ description : str
212
+ Human-readable description of the preset
213
+
214
+ Examples
215
+ --------
216
+ >>> preset = VALIDATION_PRESETS["research"]
217
+ >>> preset.tolerance_pct
218
+ 0.02
219
+ >>> preset.mode
220
+ 'warn'
221
+ """
222
+
223
+ tolerance_pct: float # Max gap before flagging
224
+ mode: Literal["error", "warn", "skip"] # Behavior on failure
225
+ tier_thresholds: TierThresholds = field(default_factory=TierThresholds)
226
+ asset_class: AssetClass | None = None # Override auto-detection
227
+ description: str = ""
228
+
229
+
230
+ # Named validation presets for common scenarios
231
+ VALIDATION_PRESETS: dict[str, ValidationPreset] = {
232
+ # =========================================================================
233
+ # GENERAL-PURPOSE PRESETS
234
+ # =========================================================================
235
+ "permissive": ValidationPreset(
236
+ tolerance_pct=0.05, # 5%
237
+ mode="warn",
238
+ description="Accept most microstructure events, warn on extreme gaps",
239
+ ),
240
+ "research": ValidationPreset(
241
+ tolerance_pct=0.02, # 2%
242
+ mode="warn",
243
+ description="Standard exploratory analysis with monitoring",
244
+ ),
245
+ "standard": ValidationPreset(
246
+ tolerance_pct=0.01, # 1%
247
+ mode="warn",
248
+ description="Balanced tolerance for production backtesting",
249
+ ),
250
+ "strict": ValidationPreset(
251
+ tolerance_pct=0.005, # 0.5%
252
+ mode="error",
253
+ description="Strict validation for ML training data",
254
+ ),
255
+ "paranoid": ValidationPreset(
256
+ tolerance_pct=0.001, # 0.1%
257
+ mode="error",
258
+ description="Maximum strictness (original v6.1.0 behavior)",
259
+ ),
260
+ # =========================================================================
261
+ # ASSET-CLASS SPECIFIC PRESETS
262
+ # =========================================================================
263
+ "crypto": ValidationPreset(
264
+ tolerance_pct=0.02, # 2%
265
+ mode="warn",
266
+ asset_class=AssetClass.CRYPTO,
267
+ description="Crypto: Tuned for 24/7 markets with flash crashes",
268
+ ),
269
+ "forex": ValidationPreset(
270
+ tolerance_pct=0.01, # 1%
271
+ mode="warn",
272
+ asset_class=AssetClass.FOREX,
273
+ description="Forex: Accounts for session boundaries",
274
+ ),
275
+ "equities": ValidationPreset(
276
+ tolerance_pct=0.03, # 3%
277
+ mode="warn",
278
+ asset_class=AssetClass.EQUITIES,
279
+ description="Equities: Accounts for overnight gaps",
280
+ ),
281
+ # =========================================================================
282
+ # SPECIAL PRESETS
283
+ # =========================================================================
284
+ "skip": ValidationPreset(
285
+ tolerance_pct=0.0,
286
+ mode="skip",
287
+ description="Disable validation entirely",
288
+ ),
289
+ "audit": ValidationPreset(
290
+ tolerance_pct=0.002, # 0.2%
291
+ mode="error",
292
+ description="Data quality audit mode",
293
+ ),
294
+ }
@@ -0,0 +1,317 @@
1
+ """Tier 0: Post-storage validation for cache integrity (<1 sec).
2
+
3
+ Run after every cache write to verify data was stored correctly.
4
+ This is the fastest validation tier - critical for detecting cache corruption.
5
+
6
+ Issue #39: Post-storage validation to verify cached data matches computed data.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import hashlib
12
+ import logging
13
+ from dataclasses import dataclass, field
14
+ from datetime import UTC, datetime
15
+ from typing import TYPE_CHECKING, Any
16
+
17
+ if TYPE_CHECKING:
18
+ import pandas as pd
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ @dataclass
24
+ class ValidationResult:
25
+ """Result of post-storage validation.
26
+
27
+ Attributes
28
+ ----------
29
+ passed : bool
30
+ True if all validation checks passed.
31
+ checks : dict[str, bool]
32
+ Individual check results.
33
+ details : dict[str, Any]
34
+ Additional details about validation (counts, timestamps, etc.).
35
+ timestamp : datetime
36
+ When validation was performed.
37
+ duration_ms : float
38
+ How long validation took in milliseconds.
39
+ """
40
+
41
+ passed: bool
42
+ checks: dict[str, bool] = field(default_factory=dict)
43
+ details: dict[str, Any] = field(default_factory=dict)
44
+ timestamp: datetime = field(default_factory=lambda: datetime.now(UTC))
45
+ duration_ms: float = 0.0
46
+
47
+ def to_dict(self) -> dict[str, Any]:
48
+ """Convert to dictionary for logging/serialization."""
49
+ return {
50
+ "passed": self.passed,
51
+ "checks": self.checks,
52
+ "details": self.details,
53
+ "timestamp": self.timestamp.isoformat(),
54
+ "duration_ms": self.duration_ms,
55
+ }
56
+
57
+
58
+ def compute_dataframe_checksum(df: pd.DataFrame) -> str:
59
+ """Compute a checksum for a DataFrame's key columns.
60
+
61
+ Uses MD5 hash of (timestamp, OHLCV) tuples for fast comparison.
62
+ xxHash64 would be faster but MD5 is available without deps.
63
+
64
+ Parameters
65
+ ----------
66
+ df : pd.DataFrame
67
+ Range bar DataFrame with DatetimeIndex and OHLCV columns.
68
+
69
+ Returns
70
+ -------
71
+ str
72
+ Hex digest of the checksum.
73
+ """
74
+ import pandas as pd
75
+
76
+ # Use key columns for checksum
77
+ key_cols = ["Open", "High", "Low", "Close", "Volume"]
78
+ present_cols = [c for c in key_cols if c in df.columns]
79
+
80
+ if not present_cols or df.empty:
81
+ return "empty"
82
+
83
+ # Create string representation of key data
84
+ # Include index (timestamps) and OHLCV values
85
+ hasher = hashlib.md5()
86
+
87
+ # Hash index (timestamps)
88
+ if isinstance(df.index, pd.DatetimeIndex):
89
+ index_str = df.index.astype(int).astype(str).str.cat(sep=",")
90
+ else:
91
+ index_str = ",".join(str(x) for x in df.index)
92
+ hasher.update(index_str.encode())
93
+
94
+ # Hash each column
95
+ for col in present_cols:
96
+ col_str = df[col].astype(str).str.cat(sep=",")
97
+ hasher.update(col_str.encode())
98
+
99
+ return hasher.hexdigest()
100
+
101
+
102
+ def validate_post_storage(
103
+ expected: pd.DataFrame,
104
+ retrieved: pd.DataFrame | None,
105
+ *,
106
+ symbol: str = "",
107
+ threshold_bps: int = 0,
108
+ ) -> ValidationResult:
109
+ """Validate that retrieved data matches expected data after cache operation.
110
+
111
+ This is a fast (<1 sec) validation that should run after every cache write
112
+ to verify data integrity. It checks:
113
+ 1. Row count matches
114
+ 2. First timestamp matches
115
+ 3. Last timestamp matches
116
+ 4. Checksum matches (OHLCV data)
117
+
118
+ Parameters
119
+ ----------
120
+ expected : pd.DataFrame
121
+ The DataFrame that was written to cache.
122
+ retrieved : pd.DataFrame | None
123
+ The DataFrame read back from cache (None if read failed).
124
+ symbol : str, optional
125
+ Symbol for logging context.
126
+ threshold_bps : int, optional
127
+ Threshold for logging context.
128
+
129
+ Returns
130
+ -------
131
+ ValidationResult
132
+ Validation result with pass/fail and details.
133
+
134
+ Examples
135
+ --------
136
+ >>> from rangebar.validation.post_storage import validate_post_storage
137
+ >>> result = validate_post_storage(computed_df, cached_df, symbol="BTCUSDT")
138
+ >>> if not result.passed:
139
+ ... logger.error("Post-storage validation FAILED: %s", result.checks)
140
+ """
141
+ import time
142
+
143
+ start_time = time.perf_counter()
144
+ checks: dict[str, bool] = {}
145
+ details: dict[str, Any] = {
146
+ "symbol": symbol,
147
+ "threshold_bps": threshold_bps,
148
+ }
149
+
150
+ # Check 1: Retrieved data exists
151
+ if retrieved is None:
152
+ checks["data_retrieved"] = False
153
+ duration_ms = (time.perf_counter() - start_time) * 1000
154
+ return ValidationResult(
155
+ passed=False,
156
+ checks=checks,
157
+ details={**details, "error": "No data retrieved from cache"},
158
+ duration_ms=duration_ms,
159
+ )
160
+ checks["data_retrieved"] = True
161
+
162
+ # Check 2: Row count matches
163
+ expected_count = len(expected)
164
+ retrieved_count = len(retrieved)
165
+ checks["row_count_match"] = expected_count == retrieved_count
166
+ details["expected_count"] = expected_count
167
+ details["retrieved_count"] = retrieved_count
168
+
169
+ if not checks["row_count_match"]:
170
+ logger.warning(
171
+ "Row count mismatch for %s: expected %d, got %d",
172
+ symbol,
173
+ expected_count,
174
+ retrieved_count,
175
+ )
176
+
177
+ # Check 3: First timestamp matches
178
+ if not expected.empty and not retrieved.empty:
179
+ expected_first = expected.index[0]
180
+ retrieved_first = retrieved.index[0]
181
+ checks["first_timestamp_match"] = expected_first == retrieved_first
182
+ details["expected_first_ts"] = str(expected_first)
183
+ details["retrieved_first_ts"] = str(retrieved_first)
184
+
185
+ # Check 4: Last timestamp matches
186
+ expected_last = expected.index[-1]
187
+ retrieved_last = retrieved.index[-1]
188
+ checks["last_timestamp_match"] = expected_last == retrieved_last
189
+ details["expected_last_ts"] = str(expected_last)
190
+ details["retrieved_last_ts"] = str(retrieved_last)
191
+ else:
192
+ checks["first_timestamp_match"] = expected.empty == retrieved.empty
193
+ checks["last_timestamp_match"] = expected.empty == retrieved.empty
194
+
195
+ # Check 5: Checksum matches (OHLCV data integrity)
196
+ expected_checksum = compute_dataframe_checksum(expected)
197
+ retrieved_checksum = compute_dataframe_checksum(retrieved)
198
+ checks["checksum_match"] = expected_checksum == retrieved_checksum
199
+ details["expected_checksum"] = expected_checksum[:16] # Truncate for logging
200
+ details["retrieved_checksum"] = retrieved_checksum[:16]
201
+
202
+ if not checks["checksum_match"]:
203
+ logger.warning(
204
+ "Checksum mismatch for %s: data corruption detected",
205
+ symbol,
206
+ )
207
+
208
+ # Overall pass/fail
209
+ passed = all(checks.values())
210
+ duration_ms = (time.perf_counter() - start_time) * 1000
211
+
212
+ result = ValidationResult(
213
+ passed=passed,
214
+ checks=checks,
215
+ details=details,
216
+ duration_ms=duration_ms,
217
+ )
218
+
219
+ if passed:
220
+ logger.debug(
221
+ "Post-storage validation PASSED for %s (%d bars, %.1fms)",
222
+ symbol,
223
+ expected_count,
224
+ duration_ms,
225
+ )
226
+ else:
227
+ logger.warning(
228
+ "Post-storage validation FAILED for %s: %s",
229
+ symbol,
230
+ {k: v for k, v in checks.items() if not v},
231
+ )
232
+
233
+ return result
234
+
235
+
236
+ def validate_ohlc_invariants(df: pd.DataFrame) -> ValidationResult:
237
+ """Validate OHLC price invariants.
238
+
239
+ Checks that for all bars:
240
+ - High >= max(Open, Close)
241
+ - Low <= min(Open, Close)
242
+
243
+ These invariants should always hold for valid OHLC data.
244
+
245
+ Parameters
246
+ ----------
247
+ df : pd.DataFrame
248
+ Range bar DataFrame with Open, High, Low, Close columns.
249
+
250
+ Returns
251
+ -------
252
+ ValidationResult
253
+ Validation result.
254
+ """
255
+ import time
256
+
257
+ start_time = time.perf_counter()
258
+ checks: dict[str, bool] = {}
259
+ details: dict[str, Any] = {"bar_count": len(df)}
260
+
261
+ if df.empty:
262
+ duration_ms = (time.perf_counter() - start_time) * 1000
263
+ return ValidationResult(
264
+ passed=True,
265
+ checks={"empty_dataframe": True},
266
+ details=details,
267
+ duration_ms=duration_ms,
268
+ )
269
+
270
+ # Check required columns
271
+ required = ["Open", "High", "Low", "Close"]
272
+ missing = [c for c in required if c not in df.columns]
273
+ if missing:
274
+ duration_ms = (time.perf_counter() - start_time) * 1000
275
+ return ValidationResult(
276
+ passed=False,
277
+ checks={"columns_present": False},
278
+ details={**details, "missing_columns": missing},
279
+ duration_ms=duration_ms,
280
+ )
281
+ checks["columns_present"] = True
282
+
283
+ # High >= max(Open, Close)
284
+ high_valid = (df["High"] >= df[["Open", "Close"]].max(axis=1)).all()
285
+ checks["high_ge_open_close"] = bool(high_valid)
286
+
287
+ if not high_valid:
288
+ invalid_rows = df[df["High"] < df[["Open", "Close"]].max(axis=1)]
289
+ details["high_invalid_count"] = len(invalid_rows)
290
+ details["high_invalid_first_ts"] = str(invalid_rows.index[0])
291
+
292
+ # Low <= min(Open, Close)
293
+ low_valid = (df["Low"] <= df[["Open", "Close"]].min(axis=1)).all()
294
+ checks["low_le_open_close"] = bool(low_valid)
295
+
296
+ if not low_valid:
297
+ invalid_rows = df[df["Low"] > df[["Open", "Close"]].min(axis=1)]
298
+ details["low_invalid_count"] = len(invalid_rows)
299
+ details["low_invalid_first_ts"] = str(invalid_rows.index[0])
300
+
301
+ passed = all(checks.values())
302
+ duration_ms = (time.perf_counter() - start_time) * 1000
303
+
304
+ return ValidationResult(
305
+ passed=passed,
306
+ checks=checks,
307
+ details=details,
308
+ duration_ms=duration_ms,
309
+ )
310
+
311
+
312
+ __all__ = [
313
+ "ValidationResult",
314
+ "compute_dataframe_checksum",
315
+ "validate_ohlc_invariants",
316
+ "validate_post_storage",
317
+ ]