rangebar 11.6.1__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. rangebar/CLAUDE.md +327 -0
  2. rangebar/__init__.py +227 -0
  3. rangebar/__init__.pyi +1089 -0
  4. rangebar/_core.cpython-313-darwin.so +0 -0
  5. rangebar/checkpoint.py +472 -0
  6. rangebar/cli.py +298 -0
  7. rangebar/clickhouse/CLAUDE.md +139 -0
  8. rangebar/clickhouse/__init__.py +100 -0
  9. rangebar/clickhouse/bulk_operations.py +309 -0
  10. rangebar/clickhouse/cache.py +734 -0
  11. rangebar/clickhouse/client.py +121 -0
  12. rangebar/clickhouse/config.py +141 -0
  13. rangebar/clickhouse/mixin.py +120 -0
  14. rangebar/clickhouse/preflight.py +504 -0
  15. rangebar/clickhouse/query_operations.py +345 -0
  16. rangebar/clickhouse/schema.sql +187 -0
  17. rangebar/clickhouse/tunnel.py +222 -0
  18. rangebar/constants.py +288 -0
  19. rangebar/conversion.py +177 -0
  20. rangebar/exceptions.py +207 -0
  21. rangebar/exness.py +364 -0
  22. rangebar/hooks.py +311 -0
  23. rangebar/logging.py +171 -0
  24. rangebar/notify/__init__.py +15 -0
  25. rangebar/notify/pushover.py +155 -0
  26. rangebar/notify/telegram.py +271 -0
  27. rangebar/orchestration/__init__.py +20 -0
  28. rangebar/orchestration/count_bounded.py +797 -0
  29. rangebar/orchestration/helpers.py +412 -0
  30. rangebar/orchestration/models.py +76 -0
  31. rangebar/orchestration/precompute.py +498 -0
  32. rangebar/orchestration/range_bars.py +736 -0
  33. rangebar/orchestration/tick_fetcher.py +226 -0
  34. rangebar/ouroboros.py +454 -0
  35. rangebar/processors/__init__.py +22 -0
  36. rangebar/processors/api.py +383 -0
  37. rangebar/processors/core.py +522 -0
  38. rangebar/resource_guard.py +567 -0
  39. rangebar/storage/__init__.py +22 -0
  40. rangebar/storage/checksum_registry.py +218 -0
  41. rangebar/storage/parquet.py +728 -0
  42. rangebar/streaming.py +300 -0
  43. rangebar/validation/__init__.py +69 -0
  44. rangebar/validation/cache_staleness.py +277 -0
  45. rangebar/validation/continuity.py +664 -0
  46. rangebar/validation/gap_classification.py +294 -0
  47. rangebar/validation/post_storage.py +317 -0
  48. rangebar/validation/tier1.py +175 -0
  49. rangebar/validation/tier2.py +261 -0
  50. rangebar-11.6.1.dist-info/METADATA +308 -0
  51. rangebar-11.6.1.dist-info/RECORD +54 -0
  52. rangebar-11.6.1.dist-info/WHEEL +4 -0
  53. rangebar-11.6.1.dist-info/entry_points.txt +2 -0
  54. rangebar-11.6.1.dist-info/licenses/LICENSE +21 -0
rangebar/streaming.py ADDED
@@ -0,0 +1,300 @@
1
+ # ADR: docs/adr/2026-01-31-realtime-streaming-api.md
2
+ """Real-time streaming API for range bar construction.
3
+
4
+ This module provides async Python APIs for constructing range bars from live
5
+ data sources (Binance WebSocket, Exness tick feeds).
6
+
7
+ Architecture:
8
+ - Low-level: Callback-based Rust bindings (PyBinanceLiveStream)
9
+ - High-level: Python async generators built on top
10
+
11
+ Examples
12
+ --------
13
+ Async generator (recommended for most use cases):
14
+
15
+ >>> import asyncio
16
+ >>> from rangebar.streaming import stream_binance_live
17
+ >>>
18
+ >>> async def main():
19
+ ... async for bar in stream_binance_live("BTCUSDT", threshold_bps=250):
20
+ ... print(f"New bar: {bar['close']}")
21
+ ...
22
+ >>> asyncio.run(main())
23
+
24
+ Low-level callback interface:
25
+
26
+ >>> from rangebar.streaming import BinanceLiveStream
27
+ >>>
28
+ >>> stream = BinanceLiveStream("BTCUSDT", threshold_decimal_bps=250)
29
+ >>> stream.connect()
30
+ >>> while stream.is_connected:
31
+ ... bar = stream.next_bar(timeout_ms=5000)
32
+ ... if bar:
33
+ ... print(f"New bar: {bar['close']}")
34
+
35
+ Custom data source with StreamingRangeBarProcessor:
36
+
37
+ >>> from rangebar.streaming import StreamingRangeBarProcessor
38
+ >>>
39
+ >>> processor = StreamingRangeBarProcessor(250)
40
+ >>> for trade in my_trade_source():
41
+ ... bars = processor.process_trade(trade)
42
+ ... for bar in bars:
43
+ ... print(f"Completed bar: {bar['close']}")
44
+ """
45
+
46
+ from __future__ import annotations
47
+
48
+ import asyncio
49
+ import random
50
+ import time
51
+ from collections.abc import AsyncIterator
52
+ from dataclasses import dataclass
53
+ from typing import TYPE_CHECKING
54
+
55
+ from ._core import (
56
+ BinanceLiveStream,
57
+ StreamingConfig,
58
+ StreamingMetrics,
59
+ StreamingRangeBarProcessor,
60
+ )
61
+
62
+ if TYPE_CHECKING:
63
+ from typing import Any
64
+
65
+ __all__ = [
66
+ "BinanceLiveStream",
67
+ "ReconnectionConfig",
68
+ "StreamingConfig",
69
+ "StreamingError",
70
+ "StreamingMetrics",
71
+ "StreamingRangeBarProcessor",
72
+ "stream_binance_live",
73
+ ]
74
+
75
+
76
+ class StreamingError(Exception):
77
+ """Error during streaming operation."""
78
+
79
+
80
+ @dataclass
81
+ class ReconnectionConfig:
82
+ """Configuration for automatic reconnection with jitter.
83
+
84
+ Attributes:
85
+ max_retries: Maximum reconnection attempts (0 = infinite)
86
+ initial_delay_s: Initial delay before first retry
87
+ max_delay_s: Maximum delay between retries
88
+ backoff_factor: Multiplier for exponential backoff
89
+ jitter_factor: Random jitter range (0.5 = ±50% of delay)
90
+ max_total_duration_s: Maximum total time spent reconnecting (0 = infinite)
91
+
92
+ Notes:
93
+ Jitter is applied to prevent thundering herd when multiple clients
94
+ reconnect simultaneously. The actual delay is:
95
+ `delay * (1 - jitter_factor + random() * 2 * jitter_factor)`
96
+
97
+ For jitter_factor=0.5, delay varies from 50% to 150% of base delay.
98
+ """
99
+
100
+ max_retries: int = 0 # 0 = infinite
101
+ initial_delay_s: float = 1.0
102
+ max_delay_s: float = 60.0
103
+ backoff_factor: float = 2.0
104
+ jitter_factor: float = 0.5 # ±50% jitter to prevent thundering herd
105
+ max_total_duration_s: float = 0.0 # 0 = infinite
106
+
107
+
108
+ async def stream_binance_live(
109
+ symbol: str,
110
+ threshold_bps: int = 250,
111
+ *,
112
+ reconnect: bool = True,
113
+ reconnect_config: ReconnectionConfig | None = None,
114
+ ) -> AsyncIterator[dict[str, Any]]:
115
+ """Stream range bars from Binance WebSocket in real-time.
116
+
117
+ This is an async generator that yields completed range bars as they
118
+ are constructed from live trade data.
119
+
120
+ Args:
121
+ symbol: Trading pair (e.g., "BTCUSDT")
122
+ threshold_bps: Range bar threshold in decimal basis points (250 = 0.25%)
123
+ reconnect: Whether to automatically reconnect on disconnect
124
+ reconnect_config: Custom reconnection settings
125
+
126
+ Yields:
127
+ Range bar dicts with OHLCV + microstructure features
128
+
129
+ Raises:
130
+ StreamingError: If connection fails and reconnection is disabled
131
+
132
+ Example:
133
+ >>> async for bar in stream_binance_live("BTCUSDT", threshold_bps=250):
134
+ ... print(f"New bar: {bar['close']}, OFI: {bar['ofi']}")
135
+ """
136
+ if reconnect_config is None:
137
+ reconnect_config = ReconnectionConfig()
138
+
139
+ retry_count = 0
140
+ current_delay = reconnect_config.initial_delay_s
141
+ reconnect_start_time: float | None = None
142
+
143
+ while True:
144
+ try:
145
+ # Create stream and connect
146
+ stream = BinanceLiveStream(symbol, threshold_bps)
147
+
148
+ # Run connect in thread pool to avoid blocking event loop
149
+ loop = asyncio.get_event_loop()
150
+ await loop.run_in_executor(None, stream.connect)
151
+
152
+ # Reset retry state on successful connection
153
+ retry_count = 0
154
+ current_delay = reconnect_config.initial_delay_s
155
+ reconnect_start_time = None
156
+
157
+ # Yield bars as they arrive
158
+ while stream.is_connected:
159
+ # Poll for bar with timeout (non-blocking via thread pool)
160
+ # Bind stream as default arg to avoid B023 closure issue
161
+ bar = await loop.run_in_executor(
162
+ None, lambda s=stream: s.next_bar(timeout_ms=1000)
163
+ )
164
+
165
+ if bar is not None:
166
+ yield bar
167
+
168
+ # Allow other coroutines to run
169
+ await asyncio.sleep(0)
170
+
171
+ # Stream disconnected
172
+ if not reconnect:
173
+ break
174
+
175
+ except Exception as e:
176
+ if not reconnect:
177
+ msg = f"Stream connection failed: {e}"
178
+ raise StreamingError(msg) from e
179
+
180
+ # Track reconnection start time
181
+ if reconnect_start_time is None:
182
+ reconnect_start_time = time.monotonic()
183
+
184
+ retry_count += 1
185
+
186
+ # Check max retries
187
+ if (
188
+ reconnect_config.max_retries > 0
189
+ and retry_count > reconnect_config.max_retries
190
+ ):
191
+ msg = f"Max retries ({reconnect_config.max_retries}) exceeded"
192
+ raise StreamingError(msg) from e
193
+
194
+ # Check max total duration
195
+ if reconnect_config.max_total_duration_s > 0:
196
+ elapsed = time.monotonic() - reconnect_start_time
197
+ if elapsed > reconnect_config.max_total_duration_s:
198
+ msg = (
199
+ f"Max reconnection duration "
200
+ f"({reconnect_config.max_total_duration_s:.0f}s) exceeded"
201
+ )
202
+ raise StreamingError(msg) from e
203
+
204
+ # Apply jitter to prevent thundering herd
205
+ # Jitter range: [1 - jitter_factor, 1 + jitter_factor]
206
+ jitter_multiplier = 1.0 - reconnect_config.jitter_factor + (
207
+ random.random() * 2 * reconnect_config.jitter_factor
208
+ )
209
+ jittered_delay = current_delay * jitter_multiplier
210
+
211
+ # Log and wait before retry
212
+ print(
213
+ f"Stream disconnected, retrying in {jittered_delay:.1f}s "
214
+ f"(attempt {retry_count})"
215
+ )
216
+ await asyncio.sleep(jittered_delay)
217
+
218
+ # Exponential backoff (applied to base delay, not jittered)
219
+ current_delay = min(
220
+ current_delay * reconnect_config.backoff_factor,
221
+ reconnect_config.max_delay_s,
222
+ )
223
+
224
+
225
+ class AsyncStreamingProcessor:
226
+ """Async wrapper for StreamingRangeBarProcessor.
227
+
228
+ This class provides an async interface for processing trades from
229
+ any data source into range bars.
230
+
231
+ Example:
232
+ >>> processor = AsyncStreamingProcessor(250)
233
+ >>> async for trade in my_async_trade_source():
234
+ ... bars = await processor.process_trade(trade)
235
+ ... for bar in bars:
236
+ ... await handle_new_bar(bar)
237
+ """
238
+
239
+ def __init__(self, threshold_decimal_bps: int) -> None:
240
+ """Create async streaming processor.
241
+
242
+ Args:
243
+ threshold_decimal_bps: Range bar threshold (250 = 0.25%)
244
+ """
245
+ self._processor = StreamingRangeBarProcessor(threshold_decimal_bps)
246
+ self._lock = asyncio.Lock()
247
+
248
+ async def process_trade(self, trade: dict[str, Any]) -> list[dict[str, Any]]:
249
+ """Process a single trade asynchronously.
250
+
251
+ Args:
252
+ trade: Trade dict with timestamp, price, quantity/volume
253
+
254
+ Returns:
255
+ List of completed bar dicts (usually 0 or 1)
256
+ """
257
+ async with self._lock:
258
+ loop = asyncio.get_event_loop()
259
+ return await loop.run_in_executor(
260
+ None, self._processor.process_trade, trade
261
+ )
262
+
263
+ async def process_trades(
264
+ self, trades: list[dict[str, Any]]
265
+ ) -> list[dict[str, Any]]:
266
+ """Process multiple trades asynchronously.
267
+
268
+ Args:
269
+ trades: List of trade dicts
270
+
271
+ Returns:
272
+ List of completed bar dicts
273
+ """
274
+ async with self._lock:
275
+ loop = asyncio.get_event_loop()
276
+ return await loop.run_in_executor(
277
+ None, self._processor.process_trades, trades
278
+ )
279
+
280
+ async def get_incomplete_bar(self) -> dict[str, Any] | None:
281
+ """Get current incomplete bar asynchronously."""
282
+ async with self._lock:
283
+ loop = asyncio.get_event_loop()
284
+ return await loop.run_in_executor(
285
+ None, self._processor.get_incomplete_bar
286
+ )
287
+
288
+ @property
289
+ def trades_processed(self) -> int:
290
+ """Number of trades processed."""
291
+ return self._processor.trades_processed
292
+
293
+ @property
294
+ def bars_generated(self) -> int:
295
+ """Number of bars generated."""
296
+ return self._processor.bars_generated
297
+
298
+ def get_metrics(self) -> StreamingMetrics:
299
+ """Get streaming metrics."""
300
+ return self._processor.get_metrics()
@@ -0,0 +1,69 @@
1
+ """Validation framework for microstructure features (Issue #25) and cache integrity (Issue #39).
2
+
3
+ Provides tiered validation for market microstructure features and cache operations:
4
+ - Tier 0: Cache staleness detection (<5ms) - schema evolution support
5
+ - Tier 0: Post-storage validation after cache operations (<1 sec) - Issue #39
6
+ - Tier 1: Auto-validation on every precompute (<30 sec)
7
+ - Tier 2: Statistical validation before production ML (~10 min)
8
+ - Tier 3: Feature importance and drift analysis (30+ min, on-demand)
9
+ - Continuity: Tiered gap classification for range bar data (Issue #19)
10
+ """
11
+
12
+ from .cache_staleness import (
13
+ StalenessResult,
14
+ detect_staleness,
15
+ validate_schema_version,
16
+ )
17
+ from .continuity import (
18
+ ASSET_CLASS_MULTIPLIERS,
19
+ VALIDATION_PRESETS,
20
+ AssetClass,
21
+ ContinuityError,
22
+ ContinuityWarning,
23
+ GapInfo,
24
+ GapTier,
25
+ TieredValidationResult,
26
+ TierSummary,
27
+ TierThresholds,
28
+ ValidationPreset,
29
+ detect_asset_class,
30
+ validate_continuity,
31
+ validate_continuity_tiered,
32
+ validate_junction_continuity,
33
+ )
34
+ from .post_storage import (
35
+ ValidationResult,
36
+ compute_dataframe_checksum,
37
+ validate_ohlc_invariants,
38
+ validate_post_storage,
39
+ )
40
+ from .tier1 import FEATURE_COLS, validate_tier1
41
+ from .tier2 import validate_tier2
42
+
43
+ __all__ = [
44
+ "ASSET_CLASS_MULTIPLIERS",
45
+ "FEATURE_COLS",
46
+ "VALIDATION_PRESETS",
47
+ "AssetClass",
48
+ "ContinuityError",
49
+ "ContinuityWarning",
50
+ "GapInfo",
51
+ "GapTier",
52
+ "StalenessResult",
53
+ "TierSummary",
54
+ "TierThresholds",
55
+ "TieredValidationResult",
56
+ "ValidationPreset",
57
+ "ValidationResult",
58
+ "compute_dataframe_checksum",
59
+ "detect_asset_class",
60
+ "detect_staleness",
61
+ "validate_continuity",
62
+ "validate_continuity_tiered",
63
+ "validate_junction_continuity",
64
+ "validate_ohlc_invariants",
65
+ "validate_post_storage",
66
+ "validate_schema_version",
67
+ "validate_tier1",
68
+ "validate_tier2",
69
+ ]
@@ -0,0 +1,277 @@
1
+ # polars-exception: ClickHouse cache returns Pandas for backtesting.py
2
+ """Cache staleness detection for schema evolution.
3
+
4
+ This module provides content-based validation to detect stale cached data
5
+ that was computed with older versions lacking microstructure features.
6
+
7
+ Tier 0 validation: Fast staleness detection (<5ms for 100K bars).
8
+ Run on every cache read when microstructure features are requested.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import logging
14
+ from dataclasses import dataclass, field
15
+ from typing import TYPE_CHECKING, Literal
16
+
17
+ if TYPE_CHECKING:
18
+ import pandas as pd
19
+
20
+ from rangebar.constants import MICROSTRUCTURE_COLUMNS
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+ # Semantic version has 3 parts
25
+ _VERSION_PARTS = 3
26
+
27
+
28
+ @dataclass
29
+ class StalenessResult:
30
+ """Result of cache staleness detection.
31
+
32
+ Attributes
33
+ ----------
34
+ is_stale : bool
35
+ True if cached data is detected as stale and should be invalidated.
36
+ reason : str | None
37
+ Human-readable description of why data is stale (None if not stale).
38
+ confidence : Literal["high", "medium", "low"]
39
+ Confidence level of staleness detection.
40
+ checks_passed : dict[str, bool]
41
+ Individual validation checks and their results.
42
+ recommendations : list[str]
43
+ Suggested actions to resolve staleness.
44
+ """
45
+
46
+ is_stale: bool
47
+ reason: str | None = None
48
+ confidence: Literal["high", "medium", "low"] = "high"
49
+ checks_passed: dict[str, bool] = field(default_factory=dict)
50
+ recommendations: list[str] = field(default_factory=list)
51
+
52
+
53
+ def _check_vwap(
54
+ df: pd.DataFrame,
55
+ checks: dict[str, bool],
56
+ reasons: list[str],
57
+ ) -> None:
58
+ """Validate VWAP is within [Low, High] and not all zeros."""
59
+ if "vwap" not in df.columns:
60
+ return
61
+
62
+ vwap_all_zero = (df["vwap"] == 0).all()
63
+ checks["vwap_not_all_zero"] = not vwap_all_zero
64
+
65
+ if vwap_all_zero:
66
+ reasons.append("All VWAP values are zero (pre-v7.0 cache data)")
67
+ else:
68
+ vwap_valid = (df["vwap"] >= df["Low"]) & (df["vwap"] <= df["High"])
69
+ checks["vwap_bounded"] = vwap_valid.all()
70
+ if not vwap_valid.all():
71
+ invalid_count = (~vwap_valid).sum()
72
+ reasons.append(f"VWAP outside [Low, High] for {invalid_count} bars")
73
+
74
+
75
+ def _check_bounded_columns(
76
+ df: pd.DataFrame,
77
+ checks: dict[str, bool],
78
+ reasons: list[str],
79
+ ) -> None:
80
+ """Validate bounded microstructure columns are within expected ranges."""
81
+ # OFI in [-1, 1]
82
+ if "ofi" in df.columns:
83
+ ofi_bounded = df["ofi"].between(-1, 1).all()
84
+ checks["ofi_bounded"] = ofi_bounded
85
+ if not ofi_bounded:
86
+ reasons.append("OFI values outside [-1, 1] range")
87
+
88
+ # Turnover imbalance in [-1, 1]
89
+ if "turnover_imbalance" in df.columns:
90
+ ti_bounded = df["turnover_imbalance"].between(-1, 1).all()
91
+ checks["turnover_imbalance_bounded"] = ti_bounded
92
+ if not ti_bounded:
93
+ reasons.append("Turnover imbalance outside [-1, 1] range")
94
+
95
+ # Duration non-negative
96
+ if "duration_us" in df.columns:
97
+ duration_valid = (df["duration_us"] >= 0).all()
98
+ checks["duration_non_negative"] = duration_valid
99
+ if not duration_valid:
100
+ reasons.append("Negative duration values detected")
101
+
102
+ # Aggregation density >= 1
103
+ if "aggregation_density" in df.columns:
104
+ agg_valid = (df["aggregation_density"] >= 1).all()
105
+ checks["aggregation_density_valid"] = agg_valid
106
+ if not agg_valid:
107
+ reasons.append("Aggregation density < 1 detected")
108
+
109
+
110
+ def _check_volume_consistency(
111
+ df: pd.DataFrame,
112
+ checks: dict[str, bool],
113
+ reasons: list[str],
114
+ ) -> None:
115
+ """Validate buy_volume + sell_volume == Volume."""
116
+ required_cols = {"buy_volume", "sell_volume", "Volume"}
117
+ if not required_cols.issubset(df.columns):
118
+ return
119
+
120
+ vol_sum = df["buy_volume"] + df["sell_volume"]
121
+ vol_diff = (vol_sum - df["Volume"]).abs()
122
+ vol_match = (vol_diff < 1e-6 * df["Volume"].abs().clip(lower=1e-10)).all()
123
+ checks["volume_consistency"] = vol_match
124
+ if not vol_match:
125
+ reasons.append("buy_volume + sell_volume != Volume")
126
+
127
+
128
+ def _check_trade_counts(
129
+ df: pd.DataFrame,
130
+ checks: dict[str, bool],
131
+ reasons: list[str],
132
+ ) -> None:
133
+ """Validate trade count columns have valid values."""
134
+ if "individual_trade_count" in df.columns:
135
+ counts_valid = (df["individual_trade_count"] >= 1).all()
136
+ checks["trade_counts_valid"] = counts_valid
137
+ if not counts_valid:
138
+ reasons.append("Invalid trade count values (< 1)")
139
+
140
+
141
+ def _check_all_microstructure_zero(
142
+ df: pd.DataFrame,
143
+ checks: dict[str, bool],
144
+ reasons: list[str],
145
+ ) -> None:
146
+ """Check if all microstructure columns are zero (indicates stale data)."""
147
+ micro_cols_present = [c for c in MICROSTRUCTURE_COLUMNS if c in df.columns]
148
+ if not micro_cols_present:
149
+ return
150
+
151
+ all_micro_zero = all((df[col] == 0).all() for col in micro_cols_present)
152
+ checks["microstructure_not_all_zero"] = not all_micro_zero
153
+ if all_micro_zero:
154
+ reasons.append("All microstructure columns are zero (pre-v7.0 cache data)")
155
+
156
+
157
+ def _determine_staleness(
158
+ checks: dict[str, bool],
159
+ reasons: list[str],
160
+ ) -> StalenessResult:
161
+ """Determine final staleness result from individual checks."""
162
+ high_confidence_checks = [
163
+ "vwap_bounded",
164
+ "vwap_not_all_zero",
165
+ "ofi_bounded",
166
+ "turnover_imbalance_bounded",
167
+ "duration_non_negative",
168
+ "aggregation_density_valid",
169
+ "trade_counts_valid",
170
+ "microstructure_not_all_zero",
171
+ ]
172
+
173
+ high_conf_failures = [
174
+ k for k in high_confidence_checks if k in checks and not checks[k]
175
+ ]
176
+
177
+ is_stale = len(high_conf_failures) > 0
178
+
179
+ # Determine confidence level
180
+ confidence: Literal["high", "medium", "low"] = "high"
181
+ if is_stale and not high_conf_failures:
182
+ confidence = "medium"
183
+
184
+ # Build recommendations
185
+ recommendations: list[str] = []
186
+ if is_stale:
187
+ recommendations.append(
188
+ "Invalidate cache entry and recompute with current version"
189
+ )
190
+ if "vwap_not_all_zero" in high_conf_failures:
191
+ recommendations.append("Data appears to be from pre-v7.0")
192
+ if "microstructure_not_all_zero" in high_conf_failures:
193
+ recommendations.append("Data appears to be from pre-v7.0")
194
+ recommendations.append("Run: get_range_bars(..., use_cache=False)")
195
+
196
+ return StalenessResult(
197
+ is_stale=is_stale,
198
+ reason="; ".join(reasons) if reasons else None,
199
+ confidence=confidence,
200
+ checks_passed=checks,
201
+ recommendations=recommendations,
202
+ )
203
+
204
+
205
+ def detect_staleness(
206
+ df: pd.DataFrame,
207
+ require_microstructure: bool = True,
208
+ ) -> StalenessResult:
209
+ """Detect stale cached data using content-based validation.
210
+
211
+ This is Tier 0 validation: fast staleness detection (<5ms for 100K bars).
212
+ Run on every cache read before returning data to caller.
213
+
214
+ Parameters
215
+ ----------
216
+ df : pd.DataFrame
217
+ Cached range bar DataFrame, possibly with microstructure columns.
218
+ require_microstructure : bool, default=True
219
+ If True, check for valid microstructure columns.
220
+
221
+ Returns
222
+ -------
223
+ StalenessResult
224
+ Detection result with confidence level and specific failures.
225
+ """
226
+ checks: dict[str, bool] = {}
227
+ reasons: list[str] = []
228
+
229
+ if require_microstructure:
230
+ _check_vwap(df, checks, reasons)
231
+ _check_bounded_columns(df, checks, reasons)
232
+ _check_volume_consistency(df, checks, reasons)
233
+ _check_trade_counts(df, checks, reasons)
234
+ _check_all_microstructure_zero(df, checks, reasons)
235
+
236
+ return _determine_staleness(checks, reasons)
237
+
238
+
239
+ def validate_schema_version(
240
+ cached_version: str | None,
241
+ min_version: str,
242
+ ) -> bool:
243
+ """Check if cached data meets minimum schema version requirement.
244
+
245
+ Parameters
246
+ ----------
247
+ cached_version : str | None
248
+ Version string from cached data (e.g., "7.0.0").
249
+ min_version : str
250
+ Minimum required version (e.g., "7.0.0").
251
+
252
+ Returns
253
+ -------
254
+ bool
255
+ True if cached_version >= min_version.
256
+ """
257
+ if not cached_version:
258
+ return False
259
+
260
+ try:
261
+ cached_parts = [int(x) for x in cached_version.split(".")[:_VERSION_PARTS]]
262
+ min_parts = [int(x) for x in min_version.split(".")[:_VERSION_PARTS]]
263
+
264
+ # Pad to 3 parts
265
+ while len(cached_parts) < _VERSION_PARTS:
266
+ cached_parts.append(0)
267
+ while len(min_parts) < _VERSION_PARTS:
268
+ min_parts.append(0)
269
+
270
+ return tuple(cached_parts) >= tuple(min_parts)
271
+ except (ValueError, AttributeError):
272
+ logger.warning(
273
+ "Invalid version format: cached=%r, min=%r",
274
+ cached_version,
275
+ min_version,
276
+ )
277
+ return False