rangebar 11.6.1__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. rangebar/CLAUDE.md +327 -0
  2. rangebar/__init__.py +227 -0
  3. rangebar/__init__.pyi +1089 -0
  4. rangebar/_core.cpython-313-darwin.so +0 -0
  5. rangebar/checkpoint.py +472 -0
  6. rangebar/cli.py +298 -0
  7. rangebar/clickhouse/CLAUDE.md +139 -0
  8. rangebar/clickhouse/__init__.py +100 -0
  9. rangebar/clickhouse/bulk_operations.py +309 -0
  10. rangebar/clickhouse/cache.py +734 -0
  11. rangebar/clickhouse/client.py +121 -0
  12. rangebar/clickhouse/config.py +141 -0
  13. rangebar/clickhouse/mixin.py +120 -0
  14. rangebar/clickhouse/preflight.py +504 -0
  15. rangebar/clickhouse/query_operations.py +345 -0
  16. rangebar/clickhouse/schema.sql +187 -0
  17. rangebar/clickhouse/tunnel.py +222 -0
  18. rangebar/constants.py +288 -0
  19. rangebar/conversion.py +177 -0
  20. rangebar/exceptions.py +207 -0
  21. rangebar/exness.py +364 -0
  22. rangebar/hooks.py +311 -0
  23. rangebar/logging.py +171 -0
  24. rangebar/notify/__init__.py +15 -0
  25. rangebar/notify/pushover.py +155 -0
  26. rangebar/notify/telegram.py +271 -0
  27. rangebar/orchestration/__init__.py +20 -0
  28. rangebar/orchestration/count_bounded.py +797 -0
  29. rangebar/orchestration/helpers.py +412 -0
  30. rangebar/orchestration/models.py +76 -0
  31. rangebar/orchestration/precompute.py +498 -0
  32. rangebar/orchestration/range_bars.py +736 -0
  33. rangebar/orchestration/tick_fetcher.py +226 -0
  34. rangebar/ouroboros.py +454 -0
  35. rangebar/processors/__init__.py +22 -0
  36. rangebar/processors/api.py +383 -0
  37. rangebar/processors/core.py +522 -0
  38. rangebar/resource_guard.py +567 -0
  39. rangebar/storage/__init__.py +22 -0
  40. rangebar/storage/checksum_registry.py +218 -0
  41. rangebar/storage/parquet.py +728 -0
  42. rangebar/streaming.py +300 -0
  43. rangebar/validation/__init__.py +69 -0
  44. rangebar/validation/cache_staleness.py +277 -0
  45. rangebar/validation/continuity.py +664 -0
  46. rangebar/validation/gap_classification.py +294 -0
  47. rangebar/validation/post_storage.py +317 -0
  48. rangebar/validation/tier1.py +175 -0
  49. rangebar/validation/tier2.py +261 -0
  50. rangebar-11.6.1.dist-info/METADATA +308 -0
  51. rangebar-11.6.1.dist-info/RECORD +54 -0
  52. rangebar-11.6.1.dist-info/WHEEL +4 -0
  53. rangebar-11.6.1.dist-info/entry_points.txt +2 -0
  54. rangebar-11.6.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,664 @@
1
+ # polars-exception: backtesting.py requires Pandas DataFrames with DatetimeIndex
2
+ # Issue #19: Gap classification modularized to gap_classification.py
3
+ """Continuity validation for range bar data (Issue #19, Issue #5).
4
+
5
+ This module provides validation functions for range bar continuity,
6
+ using the tiered gap classification system from gap_classification.py.
7
+
8
+ Validation functions:
9
+ - validate_junction_continuity: Check continuity between two DataFrames
10
+ - validate_continuity_tiered: Full tiered validation with gap classification
11
+ - validate_continuity: Simple legacy validation (threshold-based)
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import warnings
17
+ from dataclasses import dataclass
18
+ from typing import TYPE_CHECKING
19
+
20
+ import pandas as pd
21
+
22
+ from rangebar.constants import CONTINUITY_TOLERANCE_PCT
23
+
24
+ # Import gap classification types from extracted module
25
+ from .gap_classification import (
26
+ ASSET_CLASS_MULTIPLIERS,
27
+ VALIDATION_PRESETS,
28
+ AssetClass,
29
+ GapTier,
30
+ TierThresholds,
31
+ ValidationPreset,
32
+ detect_asset_class,
33
+ )
34
+
35
+ if TYPE_CHECKING:
36
+ pass
37
+
38
+
39
+ __all__ = [
40
+ # Re-exported from gap_classification
41
+ "ASSET_CLASS_MULTIPLIERS",
42
+ "VALIDATION_PRESETS",
43
+ "AssetClass",
44
+ # Defined in this module
45
+ "ContinuityError",
46
+ "ContinuityWarning",
47
+ "GapInfo",
48
+ "GapTier",
49
+ "TierSummary",
50
+ "TierThresholds",
51
+ "TieredValidationResult",
52
+ "ValidationPreset",
53
+ "detect_asset_class",
54
+ "validate_continuity",
55
+ "validate_continuity_tiered",
56
+ "validate_junction_continuity",
57
+ ]
58
+
59
+
60
+ # ============================================================================
61
+ # Exceptions (kept in __init__.py for backward compatibility, imported here)
62
+ # ============================================================================
63
+
64
+
65
+ class ContinuityError(Exception):
66
+ """Raised when range bar continuity validation fails.
67
+
68
+ This indicates bars from different processing sessions were combined,
69
+ which can happen when:
70
+ 1. Data was fetched in multiple chunks without processor state continuity
71
+ 2. Cached data from different runs was concatenated
72
+ 3. There are actual gaps in the source tick data
73
+
74
+ Attributes
75
+ ----------
76
+ message : str
77
+ Human-readable error message
78
+ discontinuities : list[dict]
79
+ List of discontinuity details with bar_index, prev_close, curr_open, gap_pct
80
+ """
81
+
82
+ def __init__(self, message: str, discontinuities: list[dict] | None = None) -> None:
83
+ super().__init__(message)
84
+ self.discontinuities = discontinuities or []
85
+
86
+
87
+ class ContinuityWarning(UserWarning):
88
+ """Warning for non-fatal continuity issues."""
89
+
90
+
91
+ # ============================================================================
92
+ # Result Dataclasses
93
+ # ============================================================================
94
+
95
+
96
+ @dataclass
97
+ class GapInfo:
98
+ """Details of a single gap between consecutive bars.
99
+
100
+ Attributes
101
+ ----------
102
+ bar_index : int
103
+ Index of the bar with the gap (0-based)
104
+ prev_close : float
105
+ Close price of the previous bar
106
+ curr_open : float
107
+ Open price of the current bar
108
+ gap_pct : float
109
+ Gap magnitude as percentage (e.g., 0.01 = 1%)
110
+ tier : GapTier
111
+ Severity classification of this gap
112
+ timestamp : pd.Timestamp | None
113
+ Timestamp of the bar (if available from DataFrame index)
114
+ """
115
+
116
+ bar_index: int
117
+ prev_close: float
118
+ curr_open: float
119
+ gap_pct: float
120
+ tier: GapTier
121
+ timestamp: pd.Timestamp | None = None
122
+
123
+
124
+ @dataclass
125
+ class TierSummary:
126
+ """Per-tier statistics for gap analysis.
127
+
128
+ Attributes
129
+ ----------
130
+ count : int
131
+ Number of gaps in this tier
132
+ max_gap_pct : float
133
+ Maximum gap percentage in this tier
134
+ avg_gap_pct : float
135
+ Average gap percentage in this tier (0 if count == 0)
136
+ """
137
+
138
+ count: int = 0
139
+ max_gap_pct: float = 0.0
140
+ avg_gap_pct: float = 0.0
141
+
142
+
143
+ @dataclass
144
+ class TieredValidationResult:
145
+ """Comprehensive validation result with tier breakdown.
146
+
147
+ This result provides detailed gap analysis categorized by severity tier,
148
+ enabling nuanced handling of different gap magnitudes.
149
+
150
+ Attributes
151
+ ----------
152
+ is_valid : bool
153
+ True if no SESSION_BOUNDARY gaps (tier 5) detected
154
+ bar_count : int
155
+ Total number of bars validated
156
+ gaps_by_tier : dict[GapTier, TierSummary]
157
+ Per-tier statistics
158
+ all_gaps : list[GapInfo]
159
+ All gaps above PRECISION tier (detailed list)
160
+ threshold_used_pct : float
161
+ Range bar threshold used for validation (as percentage)
162
+ asset_class_detected : AssetClass
163
+ Auto-detected or overridden asset class
164
+ preset_used : str | None
165
+ Name of preset used, or None for custom config
166
+
167
+ Examples
168
+ --------
169
+ >>> result = validate_continuity_tiered(df, validation="research")
170
+ >>> result.is_valid
171
+ True
172
+ >>> result.gaps_by_tier[GapTier.MICROSTRUCTURE].count
173
+ 3
174
+ >>> result.has_microstructure_events
175
+ True
176
+ """
177
+
178
+ is_valid: bool
179
+ bar_count: int
180
+ gaps_by_tier: dict[GapTier, TierSummary]
181
+ all_gaps: list[GapInfo]
182
+ threshold_used_pct: float
183
+ asset_class_detected: AssetClass
184
+ preset_used: str | None
185
+
186
+ @property
187
+ def has_session_breaks(self) -> bool:
188
+ """True if any SESSION_BOUNDARY gaps detected."""
189
+ return self.gaps_by_tier[GapTier.SESSION_BOUNDARY].count > 0
190
+
191
+ @property
192
+ def has_microstructure_events(self) -> bool:
193
+ """True if any MICROSTRUCTURE gaps detected."""
194
+ return self.gaps_by_tier[GapTier.MICROSTRUCTURE].count > 0
195
+
196
+ def summary_dict(self) -> dict[str, int]:
197
+ """Return gap counts by tier name for logging.
198
+
199
+ Returns
200
+ -------
201
+ dict[str, int]
202
+ Mapping of tier name to gap count
203
+
204
+ Examples
205
+ --------
206
+ >>> result.summary_dict()
207
+ {'PRECISION': 0, 'NOISE': 5, 'MARKET_MOVE': 10, ...}
208
+ """
209
+ return {
210
+ tier.name: summary.count for tier, summary in self.gaps_by_tier.items()
211
+ }
212
+
213
+
214
+ # ============================================================================
215
+ # Validation Functions
216
+ # ============================================================================
217
+
218
+
219
+ def validate_junction_continuity(
220
+ older_bars: pd.DataFrame,
221
+ newer_bars: pd.DataFrame,
222
+ tolerance_pct: float = CONTINUITY_TOLERANCE_PCT,
223
+ ) -> tuple[bool, float | None]:
224
+ """Validate continuity at junction between two bar DataFrames.
225
+
226
+ Checks that older_bars[-1].Close == newer_bars[0].Open (within tolerance).
227
+
228
+ Parameters
229
+ ----------
230
+ older_bars : pd.DataFrame
231
+ Older bars (chronologically earlier)
232
+ newer_bars : pd.DataFrame
233
+ Newer bars (chronologically later)
234
+ tolerance_pct : float
235
+ Maximum allowed relative difference (0.0001 = 0.01%)
236
+
237
+ Returns
238
+ -------
239
+ tuple[bool, float | None]
240
+ (is_continuous, gap_pct) where gap_pct is the relative difference if
241
+ discontinuous, None if continuous
242
+ """
243
+ if older_bars.empty or newer_bars.empty:
244
+ return True, None
245
+
246
+ last_close = older_bars.iloc[-1]["Close"]
247
+ first_open = newer_bars.iloc[0]["Open"]
248
+
249
+ if last_close == 0:
250
+ return True, None # Avoid division by zero
251
+
252
+ gap_pct = abs(first_open - last_close) / abs(last_close)
253
+
254
+ if gap_pct <= tolerance_pct:
255
+ return True, None
256
+
257
+ return False, gap_pct
258
+
259
+
260
+ def validate_continuity(
261
+ df: pd.DataFrame,
262
+ tolerance_pct: float | None = None,
263
+ threshold_decimal_bps: int = 250,
264
+ ) -> dict:
265
+ """Validate that range bars come from continuous single-session processing.
266
+
267
+ Range bars do NOT guarantee bar[i].close == bar[i+1].open. The next bar
268
+ opens at the first tick AFTER the previous bar closes, not at the close
269
+ price. This is by design - range bars capture actual market movements.
270
+
271
+ What this function validates:
272
+ 1. OHLC invariants hold (High >= max(Open, Close), Low <= min(Open, Close))
273
+ 2. Price gaps between bars don't exceed threshold + tolerance
274
+ (gaps larger than threshold indicate bars from different sessions)
275
+ 3. Timestamps are monotonically increasing
276
+
277
+ Parameters
278
+ ----------
279
+ df : pd.DataFrame
280
+ Range bar DataFrame with OHLC columns
281
+ tolerance_pct : float, optional
282
+ Additional tolerance beyond threshold for gap detection.
283
+ Default is 0.5% (0.005) to account for floating-point precision.
284
+ threshold_decimal_bps : int, default=250
285
+ Range bar threshold used to generate these bars (250 = 0.25%).
286
+ Gaps larger than this indicate session boundaries.
287
+
288
+ Returns
289
+ -------
290
+ dict
291
+ Validation result with keys:
292
+ - is_valid: bool - True if bars appear from single session
293
+ - bar_count: int - Total number of bars
294
+ - discontinuity_count: int - Number of session boundaries found
295
+ - discontinuities: list[dict] - Details of each discontinuity
296
+
297
+ Notes
298
+ -----
299
+ A "discontinuity" here means bars from different processing sessions
300
+ were combined. Within a single session, the gap between bar[i].close
301
+ and bar[i+1].open should never exceed the threshold (since a bar only
302
+ closes when price moves by threshold from open).
303
+
304
+ The tolerance parameter accounts for:
305
+ - Floating-point precision in price calculations
306
+ - Minor price movements between close tick and next tick
307
+ """
308
+ if tolerance_pct is None:
309
+ tolerance_pct = 0.005 # 0.5% default tolerance
310
+
311
+ if df.empty:
312
+ return {
313
+ "is_valid": True,
314
+ "bar_count": 0,
315
+ "discontinuity_count": 0,
316
+ "discontinuities": [],
317
+ }
318
+
319
+ required_cols = {"Open", "High", "Low", "Close"}
320
+ if not required_cols.issubset(df.columns):
321
+ msg = f"DataFrame must have columns: {required_cols}"
322
+ raise ValueError(msg)
323
+
324
+ discontinuities = []
325
+
326
+ # Convert threshold to percentage (250 dbps = 0.25% = 0.0025)
327
+ threshold_pct = threshold_decimal_bps / 100000.0
328
+
329
+ # Maximum allowed gap = threshold + tolerance
330
+ # Within single session, gap should never exceed this
331
+ max_gap_pct = threshold_pct + tolerance_pct
332
+
333
+ close_prices = df["Close"].to_numpy()[:-1]
334
+ open_prices = df["Open"].to_numpy()[1:]
335
+
336
+ for i, (prev_close, curr_open) in enumerate(
337
+ zip(close_prices, open_prices, strict=False)
338
+ ):
339
+ if prev_close == 0:
340
+ continue
341
+
342
+ gap_pct = abs(curr_open - prev_close) / abs(prev_close)
343
+ if gap_pct > max_gap_pct:
344
+ discontinuities.append(
345
+ {
346
+ "bar_index": i + 1,
347
+ "prev_close": float(prev_close),
348
+ "curr_open": float(curr_open),
349
+ "gap_pct": float(gap_pct),
350
+ }
351
+ )
352
+
353
+ return {
354
+ "is_valid": len(discontinuities) == 0,
355
+ "bar_count": len(df),
356
+ "discontinuity_count": len(discontinuities),
357
+ "discontinuities": discontinuities,
358
+ }
359
+
360
+
361
+ def _resolve_validation(
362
+ validation: str | dict | ValidationPreset,
363
+ symbol: str | None = None,
364
+ ) -> tuple[ValidationPreset, AssetClass, str | None]:
365
+ """Resolve validation parameter to preset and asset class.
366
+
367
+ Parameters
368
+ ----------
369
+ validation : str, dict, or ValidationPreset
370
+ Validation configuration:
371
+ - "auto": Auto-detect asset class from symbol
372
+ - str: Preset name ("research", "strict", "crypto", etc.)
373
+ - dict: Custom config {"tolerance_pct": 0.01, "mode": "warn"}
374
+ - ValidationPreset: Direct preset instance
375
+ symbol : str, optional
376
+ Symbol for asset class auto-detection
377
+
378
+ Returns
379
+ -------
380
+ tuple[ValidationPreset, AssetClass, str | None]
381
+ (resolved preset, detected asset class, preset name or None)
382
+
383
+ Raises
384
+ ------
385
+ ValueError
386
+ If unknown preset name provided
387
+ TypeError
388
+ If validation is not a supported type
389
+ """
390
+ # Handle "auto" - detect from symbol
391
+ if validation == "auto":
392
+ asset_class = detect_asset_class(symbol) if symbol else AssetClass.UNKNOWN
393
+ preset_name = (
394
+ asset_class.value if asset_class != AssetClass.UNKNOWN else "standard"
395
+ )
396
+ return VALIDATION_PRESETS[preset_name], asset_class, preset_name
397
+
398
+ # Handle preset string
399
+ if isinstance(validation, str):
400
+ if validation not in VALIDATION_PRESETS:
401
+ valid_presets = ", ".join(sorted(VALIDATION_PRESETS.keys()))
402
+ msg = (
403
+ f"Unknown validation preset: {validation!r}. "
404
+ f"Valid presets: {valid_presets}"
405
+ )
406
+ raise ValueError(msg)
407
+ preset = VALIDATION_PRESETS[validation]
408
+ asset_class = preset.asset_class or (
409
+ detect_asset_class(symbol) if symbol else AssetClass.UNKNOWN
410
+ )
411
+ return preset, asset_class, validation
412
+
413
+ # Handle dict
414
+ if isinstance(validation, dict):
415
+ tier_thresholds = validation.get("tier_thresholds", TierThresholds())
416
+ if isinstance(tier_thresholds, dict):
417
+ tier_thresholds = TierThresholds(**tier_thresholds)
418
+ preset = ValidationPreset(
419
+ tolerance_pct=validation["tolerance_pct"],
420
+ mode=validation["mode"],
421
+ tier_thresholds=tier_thresholds,
422
+ description="Custom",
423
+ )
424
+ asset_class = detect_asset_class(symbol) if symbol else AssetClass.UNKNOWN
425
+ return preset, asset_class, None
426
+
427
+ # Handle ValidationPreset directly
428
+ if isinstance(validation, ValidationPreset):
429
+ asset_class = validation.asset_class or (
430
+ detect_asset_class(symbol) if symbol else AssetClass.UNKNOWN
431
+ )
432
+ return validation, asset_class, None
433
+
434
+ msg = (
435
+ f"Invalid validation type: {type(validation).__name__}. "
436
+ "Expected str, dict, or ValidationPreset"
437
+ )
438
+ raise TypeError(msg)
439
+
440
+
441
+ def _classify_gap(
442
+ gap_pct: float,
443
+ tier_thresholds: TierThresholds,
444
+ session_threshold_pct: float,
445
+ ) -> GapTier:
446
+ """Classify a gap into a severity tier.
447
+
448
+ Parameters
449
+ ----------
450
+ gap_pct : float
451
+ Gap magnitude as percentage (absolute value)
452
+ tier_thresholds : TierThresholds
453
+ Boundaries between tiers
454
+ session_threshold_pct : float
455
+ Session boundary threshold (threshold * session_factor)
456
+
457
+ Returns
458
+ -------
459
+ GapTier
460
+ Severity classification
461
+ """
462
+ if gap_pct < tier_thresholds.precision:
463
+ return GapTier.PRECISION
464
+ if gap_pct < tier_thresholds.noise:
465
+ return GapTier.NOISE
466
+ if gap_pct < tier_thresholds.market_move:
467
+ return GapTier.MARKET_MOVE
468
+ if gap_pct < session_threshold_pct:
469
+ return GapTier.MICROSTRUCTURE
470
+ return GapTier.SESSION_BOUNDARY
471
+
472
+
473
+ def validate_continuity_tiered(
474
+ df: pd.DataFrame,
475
+ threshold_decimal_bps: int = 250,
476
+ *,
477
+ validation: str | dict | ValidationPreset = "standard",
478
+ symbol: str | None = None,
479
+ ) -> TieredValidationResult:
480
+ """Validate range bar continuity with tiered gap classification.
481
+
482
+ This function categorizes gaps by severity tier, enabling nuanced
483
+ handling of different gap magnitudes. It's the opt-in v6.2.0 API
484
+ that will become the default in v7.0.
485
+
486
+ Parameters
487
+ ----------
488
+ df : pd.DataFrame
489
+ Range bar DataFrame with OHLCV columns
490
+ threshold_decimal_bps : int, default=250
491
+ Range bar threshold (250 = 0.25% = 25 basis points)
492
+ validation : str, dict, or ValidationPreset, default="standard"
493
+ Validation configuration:
494
+ - "auto": Auto-detect asset class from symbol
495
+ - str: Preset name ("research", "strict", "crypto", etc.)
496
+ - dict: Custom config {"tolerance_pct": 0.01, "mode": "warn"}
497
+ - ValidationPreset: Direct preset instance
498
+ symbol : str, optional
499
+ Symbol for asset class auto-detection. If None and validation is
500
+ "auto", uses "standard" preset.
501
+
502
+ Returns
503
+ -------
504
+ TieredValidationResult
505
+ Comprehensive result with per-tier statistics
506
+
507
+ Raises
508
+ ------
509
+ ContinuityError
510
+ If validation mode is "error" and tolerance exceeded
511
+ ContinuityWarning
512
+ If validation mode is "warn" and tolerance exceeded (via warnings)
513
+
514
+ Examples
515
+ --------
516
+ >>> result = validate_continuity_tiered(df, validation="research")
517
+ >>> print(f"Valid: {result.is_valid}")
518
+ Valid: True
519
+
520
+ >>> # Custom configuration
521
+ >>> result = validate_continuity_tiered(
522
+ ... df,
523
+ ... validation={"tolerance_pct": 0.015, "mode": "warn"},
524
+ ... symbol="BTCUSDT",
525
+ ... )
526
+
527
+ >>> # Auto-detect asset class
528
+ >>> result = validate_continuity_tiered(df, validation="auto", symbol="EURUSD")
529
+ >>> result.asset_class_detected
530
+ <AssetClass.FOREX: 'forex'>
531
+ """
532
+ min_bars_for_gap = 2 # Need at least 2 bars to have a gap
533
+ if len(df) < min_bars_for_gap:
534
+ # No gaps possible with fewer than 2 bars
535
+ return TieredValidationResult(
536
+ is_valid=True,
537
+ bar_count=len(df),
538
+ gaps_by_tier={tier: TierSummary() for tier in GapTier},
539
+ all_gaps=[],
540
+ threshold_used_pct=threshold_decimal_bps / 10000.0,
541
+ asset_class_detected=(
542
+ detect_asset_class(symbol) if symbol else AssetClass.UNKNOWN
543
+ ),
544
+ preset_used=validation if isinstance(validation, str) else None,
545
+ )
546
+
547
+ # Resolve validation configuration
548
+ preset, asset_class, preset_name = _resolve_validation(validation, symbol)
549
+
550
+ # Skip validation if mode is "skip"
551
+ if preset.mode == "skip":
552
+ return TieredValidationResult(
553
+ is_valid=True,
554
+ bar_count=len(df),
555
+ gaps_by_tier={tier: TierSummary() for tier in GapTier},
556
+ all_gaps=[],
557
+ threshold_used_pct=threshold_decimal_bps / 10000.0,
558
+ asset_class_detected=asset_class,
559
+ preset_used=preset_name,
560
+ )
561
+
562
+ # Calculate session boundary threshold
563
+ threshold_pct = threshold_decimal_bps / 10000.0 # Convert to percentage
564
+ session_threshold_pct = threshold_pct * preset.tier_thresholds.session_factor
565
+
566
+ # Analyze gaps
567
+ all_gaps: list[GapInfo] = []
568
+ tier_gaps: dict[GapTier, list[float]] = {tier: [] for tier in GapTier}
569
+
570
+ # Get Close and Open columns
571
+ close_col = "Close" if "Close" in df.columns else "close"
572
+ open_col = "Open" if "Open" in df.columns else "open"
573
+
574
+ closes = df[close_col].to_numpy()
575
+ opens = df[open_col].to_numpy()
576
+ index = df.index
577
+
578
+ for i in range(1, len(df)):
579
+ prev_close = float(closes[i - 1])
580
+ curr_open = float(opens[i])
581
+
582
+ if prev_close == 0:
583
+ continue # Skip division by zero
584
+
585
+ gap_pct = abs(curr_open - prev_close) / prev_close
586
+ tier = _classify_gap(gap_pct, preset.tier_thresholds, session_threshold_pct)
587
+
588
+ tier_gaps[tier].append(gap_pct)
589
+
590
+ # Record non-PRECISION gaps for detailed list
591
+ if tier != GapTier.PRECISION:
592
+ timestamp = index[i] if isinstance(index, pd.DatetimeIndex) else None
593
+ all_gaps.append(
594
+ GapInfo(
595
+ bar_index=i,
596
+ prev_close=prev_close,
597
+ curr_open=curr_open,
598
+ gap_pct=gap_pct,
599
+ tier=tier,
600
+ timestamp=timestamp,
601
+ )
602
+ )
603
+
604
+ # Build tier summaries
605
+ gaps_by_tier: dict[GapTier, TierSummary] = {}
606
+ for tier in GapTier:
607
+ gaps = tier_gaps[tier]
608
+ if gaps:
609
+ gaps_by_tier[tier] = TierSummary(
610
+ count=len(gaps),
611
+ max_gap_pct=max(gaps),
612
+ avg_gap_pct=sum(gaps) / len(gaps),
613
+ )
614
+ else:
615
+ gaps_by_tier[tier] = TierSummary()
616
+
617
+ # Determine validity: no SESSION_BOUNDARY gaps
618
+ is_valid = gaps_by_tier[GapTier.SESSION_BOUNDARY].count == 0
619
+
620
+ # Check tolerance threshold
621
+ tolerance_exceeded = any(
622
+ gap.gap_pct > preset.tolerance_pct
623
+ for gap in all_gaps
624
+ if gap.tier >= GapTier.MARKET_MOVE # Only check MARKET_MOVE and above
625
+ )
626
+
627
+ result = TieredValidationResult(
628
+ is_valid=is_valid,
629
+ bar_count=len(df),
630
+ gaps_by_tier=gaps_by_tier,
631
+ all_gaps=all_gaps,
632
+ threshold_used_pct=threshold_pct,
633
+ asset_class_detected=asset_class,
634
+ preset_used=preset_name,
635
+ )
636
+
637
+ # Handle tolerance violations based on mode
638
+ if tolerance_exceeded:
639
+ violating_gaps = [g for g in all_gaps if g.gap_pct > preset.tolerance_pct]
640
+ max_gap = max(violating_gaps, key=lambda g: g.gap_pct)
641
+
642
+ msg = (
643
+ f"Continuity tolerance exceeded: {len(violating_gaps)} gap(s) > "
644
+ f"{preset.tolerance_pct:.4%}. Max gap: {max_gap.gap_pct:.4%} at bar "
645
+ f"{max_gap.bar_index}. Tier breakdown: {result.summary_dict()}"
646
+ )
647
+
648
+ if preset.mode == "error":
649
+ discontinuities = [
650
+ {
651
+ "bar_index": g.bar_index,
652
+ "prev_close": g.prev_close,
653
+ "curr_open": g.curr_open,
654
+ "gap_pct": g.gap_pct,
655
+ "tier": g.tier.name,
656
+ }
657
+ for g in violating_gaps
658
+ ]
659
+ raise ContinuityError(msg, discontinuities)
660
+
661
+ if preset.mode == "warn":
662
+ warnings.warn(msg, ContinuityWarning, stacklevel=2)
663
+
664
+ return result