rangebar 11.6.1__cp313-cp313-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rangebar/CLAUDE.md +327 -0
- rangebar/__init__.py +227 -0
- rangebar/__init__.pyi +1089 -0
- rangebar/_core.cpython-313-darwin.so +0 -0
- rangebar/checkpoint.py +472 -0
- rangebar/cli.py +298 -0
- rangebar/clickhouse/CLAUDE.md +139 -0
- rangebar/clickhouse/__init__.py +100 -0
- rangebar/clickhouse/bulk_operations.py +309 -0
- rangebar/clickhouse/cache.py +734 -0
- rangebar/clickhouse/client.py +121 -0
- rangebar/clickhouse/config.py +141 -0
- rangebar/clickhouse/mixin.py +120 -0
- rangebar/clickhouse/preflight.py +504 -0
- rangebar/clickhouse/query_operations.py +345 -0
- rangebar/clickhouse/schema.sql +187 -0
- rangebar/clickhouse/tunnel.py +222 -0
- rangebar/constants.py +288 -0
- rangebar/conversion.py +177 -0
- rangebar/exceptions.py +207 -0
- rangebar/exness.py +364 -0
- rangebar/hooks.py +311 -0
- rangebar/logging.py +171 -0
- rangebar/notify/__init__.py +15 -0
- rangebar/notify/pushover.py +155 -0
- rangebar/notify/telegram.py +271 -0
- rangebar/orchestration/__init__.py +20 -0
- rangebar/orchestration/count_bounded.py +797 -0
- rangebar/orchestration/helpers.py +412 -0
- rangebar/orchestration/models.py +76 -0
- rangebar/orchestration/precompute.py +498 -0
- rangebar/orchestration/range_bars.py +736 -0
- rangebar/orchestration/tick_fetcher.py +226 -0
- rangebar/ouroboros.py +454 -0
- rangebar/processors/__init__.py +22 -0
- rangebar/processors/api.py +383 -0
- rangebar/processors/core.py +522 -0
- rangebar/resource_guard.py +567 -0
- rangebar/storage/__init__.py +22 -0
- rangebar/storage/checksum_registry.py +218 -0
- rangebar/storage/parquet.py +728 -0
- rangebar/streaming.py +300 -0
- rangebar/validation/__init__.py +69 -0
- rangebar/validation/cache_staleness.py +277 -0
- rangebar/validation/continuity.py +664 -0
- rangebar/validation/gap_classification.py +294 -0
- rangebar/validation/post_storage.py +317 -0
- rangebar/validation/tier1.py +175 -0
- rangebar/validation/tier2.py +261 -0
- rangebar-11.6.1.dist-info/METADATA +308 -0
- rangebar-11.6.1.dist-info/RECORD +54 -0
- rangebar-11.6.1.dist-info/WHEEL +4 -0
- rangebar-11.6.1.dist-info/entry_points.txt +2 -0
- rangebar-11.6.1.dist-info/licenses/LICENSE +21 -0
rangebar/__init__.pyi
ADDED
|
@@ -0,0 +1,1089 @@
|
|
|
1
|
+
"""Type stubs for rangebar package.
|
|
2
|
+
|
|
3
|
+
Public API
|
|
4
|
+
----------
|
|
5
|
+
get_range_bars : Get range bars with automatic data fetching and caching (date-bounded).
|
|
6
|
+
get_n_range_bars : Get exactly N range bars (count-bounded, deterministic).
|
|
7
|
+
precompute_range_bars : Pre-compute continuous range bars for a date range (single-pass).
|
|
8
|
+
validate_continuity_tiered : Validate range bar continuity with tiered gap classification.
|
|
9
|
+
ContinuityError : Exception raised when range bar continuity is violated.
|
|
10
|
+
ContinuityWarning : Warning issued when range bar discontinuities are detected.
|
|
11
|
+
PrecomputeProgress : Progress update for precomputation.
|
|
12
|
+
PrecomputeResult : Result of precomputation.
|
|
13
|
+
GapTier : Gap severity classification enum.
|
|
14
|
+
AssetClass : Asset class enum for tolerance calibration.
|
|
15
|
+
TierThresholds : Configurable boundaries between gap tiers.
|
|
16
|
+
ValidationPreset : Immutable validation configuration preset.
|
|
17
|
+
GapInfo : Details of a single gap between consecutive bars.
|
|
18
|
+
TierSummary : Per-tier statistics for gap analysis.
|
|
19
|
+
TieredValidationResult : Comprehensive validation result with tier breakdown.
|
|
20
|
+
TIER1_SYMBOLS : High-liquidity symbols available on all Binance markets.
|
|
21
|
+
THRESHOLD_PRESETS : Named threshold presets (micro, tight, standard, etc.).
|
|
22
|
+
VALIDATION_PRESETS : Named validation presets (research, strict, crypto, etc.).
|
|
23
|
+
THRESHOLD_DECIMAL_MIN : Minimum valid threshold (1 = 0.1bps).
|
|
24
|
+
THRESHOLD_DECIMAL_MAX : Maximum valid threshold (100,000 = 10,000bps).
|
|
25
|
+
__version__ : Package version string.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from collections.abc import Callable, Iterator
|
|
29
|
+
from dataclasses import dataclass
|
|
30
|
+
from enum import Enum, IntEnum
|
|
31
|
+
from typing import Any, Literal, overload
|
|
32
|
+
|
|
33
|
+
import pandas as pd
|
|
34
|
+
import polars as pl
|
|
35
|
+
|
|
36
|
+
# ============================================================================
|
|
37
|
+
# Exceptions and Warnings
|
|
38
|
+
# ============================================================================
|
|
39
|
+
|
|
40
|
+
class ContinuityError(Exception):
|
|
41
|
+
"""Raised when range bar continuity is violated.
|
|
42
|
+
|
|
43
|
+
The bar[i+1].open == bar[i].close invariant is broken, indicating
|
|
44
|
+
discontinuities in the range bar sequence.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
discontinuities: list[dict]
|
|
48
|
+
"""List of discontinuity details (bar_index, prev_close, next_open, gap_pct)."""
|
|
49
|
+
|
|
50
|
+
def __init__(
|
|
51
|
+
self, message: str, discontinuities: list[dict] | None = None
|
|
52
|
+
) -> None: ...
|
|
53
|
+
|
|
54
|
+
class ContinuityWarning(UserWarning):
|
|
55
|
+
"""Warning issued when range bar discontinuities are detected but not fatal."""
|
|
56
|
+
|
|
57
|
+
# ============================================================================
|
|
58
|
+
# Data Classes
|
|
59
|
+
# ============================================================================
|
|
60
|
+
|
|
61
|
+
@dataclass
|
|
62
|
+
class PrecomputeProgress:
|
|
63
|
+
"""Progress update for precomputation.
|
|
64
|
+
|
|
65
|
+
Passed to progress_callback during precompute_range_bars() execution.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
phase: Literal["fetching", "processing", "caching"]
|
|
69
|
+
"""Current processing phase."""
|
|
70
|
+
current_month: str
|
|
71
|
+
"""Current month being processed (YYYY-MM format)."""
|
|
72
|
+
months_completed: int
|
|
73
|
+
"""Number of months already processed."""
|
|
74
|
+
months_total: int
|
|
75
|
+
"""Total number of months to process."""
|
|
76
|
+
bars_generated: int
|
|
77
|
+
"""Cumulative bars generated so far."""
|
|
78
|
+
ticks_processed: int
|
|
79
|
+
"""Cumulative ticks processed so far."""
|
|
80
|
+
elapsed_seconds: float
|
|
81
|
+
"""Seconds elapsed since precomputation started."""
|
|
82
|
+
|
|
83
|
+
@dataclass
|
|
84
|
+
class PrecomputeResult:
|
|
85
|
+
"""Result of precomputation.
|
|
86
|
+
|
|
87
|
+
Returned by precompute_range_bars() after successful execution.
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
symbol: str
|
|
91
|
+
"""Trading symbol that was precomputed."""
|
|
92
|
+
threshold_decimal_bps: int
|
|
93
|
+
"""Threshold used for bar construction."""
|
|
94
|
+
start_date: str
|
|
95
|
+
"""Start date of precomputed range (YYYY-MM-DD)."""
|
|
96
|
+
end_date: str
|
|
97
|
+
"""End date of precomputed range (YYYY-MM-DD)."""
|
|
98
|
+
total_bars: int
|
|
99
|
+
"""Total number of bars generated."""
|
|
100
|
+
total_ticks: int
|
|
101
|
+
"""Total number of ticks processed."""
|
|
102
|
+
elapsed_seconds: float
|
|
103
|
+
"""Total time taken for precomputation."""
|
|
104
|
+
continuity_valid: bool | None
|
|
105
|
+
"""Whether all bars pass continuity validation. None if validation was skipped."""
|
|
106
|
+
cache_key: str
|
|
107
|
+
"""Cache key for the stored bars."""
|
|
108
|
+
|
|
109
|
+
__version__: str
|
|
110
|
+
|
|
111
|
+
# ============================================================================
|
|
112
|
+
# Ouroboros: Cyclical Reset Boundaries for Reproducibility
|
|
113
|
+
# Plan: /Users/terryli/.claude/plans/sparkling-coalescing-dijkstra.md
|
|
114
|
+
# ============================================================================
|
|
115
|
+
|
|
116
|
+
from datetime import date, datetime
|
|
117
|
+
|
|
118
|
+
class OuroborosMode(str, Enum):
|
|
119
|
+
"""Ouroboros granularity modes for reset boundaries.
|
|
120
|
+
|
|
121
|
+
Ouroboros (Greek: οὐροβόρος) represents cyclical reset boundaries
|
|
122
|
+
that enable reproducible range bar construction.
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
YEAR = "year"
|
|
126
|
+
"""Reset at January 1 00:00:00 UTC each year."""
|
|
127
|
+
MONTH = "month"
|
|
128
|
+
"""Reset at 1st of each month 00:00:00 UTC."""
|
|
129
|
+
WEEK = "week"
|
|
130
|
+
"""Reset at Sunday 00:00:00 UTC each week (crypto) or first tick after market open (forex)."""
|
|
131
|
+
|
|
132
|
+
@dataclass(frozen=True)
|
|
133
|
+
class OuroborosBoundary:
|
|
134
|
+
"""A single ouroboros reset boundary.
|
|
135
|
+
|
|
136
|
+
Represents a specific timestamp where the range bar processor should reset
|
|
137
|
+
its state to enable reproducible bar construction across segments.
|
|
138
|
+
"""
|
|
139
|
+
|
|
140
|
+
timestamp: datetime
|
|
141
|
+
"""UTC datetime of the boundary."""
|
|
142
|
+
mode: OuroborosMode
|
|
143
|
+
"""Which granularity created this boundary."""
|
|
144
|
+
reason: str
|
|
145
|
+
"""Human-readable reason (e.g., 'year_boundary', 'month_boundary')."""
|
|
146
|
+
|
|
147
|
+
@property
|
|
148
|
+
def timestamp_ms(self) -> int:
|
|
149
|
+
"""Timestamp in milliseconds (for comparison with trade data)."""
|
|
150
|
+
|
|
151
|
+
@property
|
|
152
|
+
def timestamp_us(self) -> int:
|
|
153
|
+
"""Timestamp in microseconds."""
|
|
154
|
+
|
|
155
|
+
@dataclass
|
|
156
|
+
class OrphanedBarMetadata:
|
|
157
|
+
"""Metadata for orphaned bars at ouroboros boundaries.
|
|
158
|
+
|
|
159
|
+
Orphaned bars are incomplete bars that existed when the processor
|
|
160
|
+
was reset at an ouroboros boundary. They can be included or excluded
|
|
161
|
+
from results based on the `include_orphaned_bars` parameter.
|
|
162
|
+
"""
|
|
163
|
+
|
|
164
|
+
is_orphan: bool = True
|
|
165
|
+
"""Always True for orphaned bars."""
|
|
166
|
+
ouroboros_boundary: datetime | None = None
|
|
167
|
+
"""Which boundary caused the orphan."""
|
|
168
|
+
reason: str | None = None
|
|
169
|
+
"""Reason string: 'year_boundary', 'month_boundary', 'week_boundary'."""
|
|
170
|
+
expected_duration_us: int | None = None
|
|
171
|
+
"""Expected duration if bar had completed normally."""
|
|
172
|
+
|
|
173
|
+
def get_ouroboros_boundaries(
|
|
174
|
+
start: date,
|
|
175
|
+
end: date,
|
|
176
|
+
mode: Literal["year", "month", "week"],
|
|
177
|
+
) -> list[OuroborosBoundary]:
|
|
178
|
+
"""Return all ouroboros reset points within the date range.
|
|
179
|
+
|
|
180
|
+
Parameters
|
|
181
|
+
----------
|
|
182
|
+
start : date
|
|
183
|
+
Start date (inclusive)
|
|
184
|
+
end : date
|
|
185
|
+
End date (inclusive)
|
|
186
|
+
mode : {"year", "month", "week"}
|
|
187
|
+
Ouroboros granularity
|
|
188
|
+
|
|
189
|
+
Returns
|
|
190
|
+
-------
|
|
191
|
+
list[OuroborosBoundary]
|
|
192
|
+
Sorted list of boundaries within the date range
|
|
193
|
+
|
|
194
|
+
Examples
|
|
195
|
+
--------
|
|
196
|
+
>>> from datetime import date
|
|
197
|
+
>>> from rangebar import get_ouroboros_boundaries
|
|
198
|
+
>>> boundaries = get_ouroboros_boundaries(date(2024, 1, 1), date(2024, 3, 31), "month")
|
|
199
|
+
>>> len(boundaries)
|
|
200
|
+
3
|
|
201
|
+
>>> boundaries[0].reason
|
|
202
|
+
'month_boundary'
|
|
203
|
+
"""
|
|
204
|
+
|
|
205
|
+
# ============================================================================
|
|
206
|
+
# Configuration Constants
|
|
207
|
+
# ============================================================================
|
|
208
|
+
|
|
209
|
+
TIER1_SYMBOLS: tuple[str, ...]
|
|
210
|
+
"""18 high-liquidity symbols available on ALL Binance markets.
|
|
211
|
+
|
|
212
|
+
AAVE, ADA, AVAX, BCH, BNB, BTC, DOGE, ETH, FIL,
|
|
213
|
+
LINK, LTC, NEAR, SOL, SUI, UNI, WIF, WLD, XRP
|
|
214
|
+
"""
|
|
215
|
+
|
|
216
|
+
THRESHOLD_DECIMAL_MIN: int
|
|
217
|
+
"""Minimum valid threshold: 1 (0.1bps = 0.001%)"""
|
|
218
|
+
|
|
219
|
+
THRESHOLD_DECIMAL_MAX: int
|
|
220
|
+
"""Maximum valid threshold: 100,000 (10,000bps = 100%)"""
|
|
221
|
+
|
|
222
|
+
THRESHOLD_PRESETS: dict[str, int]
|
|
223
|
+
"""Named threshold presets (in 0.1bps units).
|
|
224
|
+
|
|
225
|
+
- "micro": 10 (1bps = 0.01%) - scalping
|
|
226
|
+
- "tight": 50 (5bps = 0.05%) - day trading
|
|
227
|
+
- "standard": 100 (10bps = 0.1%) - swing trading
|
|
228
|
+
- "medium": 250 (25bps = 0.25%) - default
|
|
229
|
+
- "wide": 500 (50bps = 0.5%) - position trading
|
|
230
|
+
- "macro": 1000 (100bps = 1%) - long-term
|
|
231
|
+
"""
|
|
232
|
+
|
|
233
|
+
# Issue #59: Inter-bar microstructure features
|
|
234
|
+
INTER_BAR_FEATURE_COLUMNS: tuple[str, ...]
|
|
235
|
+
"""16 inter-bar microstructure feature column names (Issue #59).
|
|
236
|
+
|
|
237
|
+
Tier 1 - Core (7 features):
|
|
238
|
+
- lookback_trade_count: Trade count in lookback window
|
|
239
|
+
- lookback_ofi: Order flow imbalance [-1, 1]
|
|
240
|
+
- lookback_duration_us: Lookback window duration (microseconds)
|
|
241
|
+
- lookback_intensity: Trade intensity (trades/second)
|
|
242
|
+
- lookback_vwap_raw: Volume-weighted average price (raw i64)
|
|
243
|
+
- lookback_vwap_position: VWAP position in price range [0, 1]
|
|
244
|
+
- lookback_count_imbalance: Trade count imbalance [-1, 1]
|
|
245
|
+
|
|
246
|
+
Tier 2 - Statistical (5 features):
|
|
247
|
+
- lookback_kyle_lambda: Kyle's lambda (price impact)
|
|
248
|
+
- lookback_burstiness: Goh-Barabási burstiness [-1, 1]
|
|
249
|
+
- lookback_volume_skew: Volume distribution skewness
|
|
250
|
+
- lookback_volume_kurt: Volume distribution kurtosis
|
|
251
|
+
- lookback_price_range: Price range / first price [0, +inf)
|
|
252
|
+
|
|
253
|
+
Tier 3 - Advanced (4 features):
|
|
254
|
+
- lookback_kaufman_er: Kaufman efficiency ratio [0, 1]
|
|
255
|
+
- lookback_garman_klass_vol: Garman-Klass volatility [0, 1)
|
|
256
|
+
- lookback_hurst: Hurst exponent [0, 1]
|
|
257
|
+
- lookback_permutation_entropy: Permutation entropy [0, 1]
|
|
258
|
+
|
|
259
|
+
All inter-bar features are Optional - None when no lookback data available.
|
|
260
|
+
"""
|
|
261
|
+
|
|
262
|
+
# ============================================================================
|
|
263
|
+
# Tiered Validation System (Issue #19 - v6.2.0+)
|
|
264
|
+
# ============================================================================
|
|
265
|
+
|
|
266
|
+
class GapTier(IntEnum):
|
|
267
|
+
"""Gap severity classification for range bar continuity validation.
|
|
268
|
+
|
|
269
|
+
Tiers are based on empirical analysis of 30-month BTC data which
|
|
270
|
+
identified 49 legitimate market microstructure events.
|
|
271
|
+
"""
|
|
272
|
+
|
|
273
|
+
PRECISION = 1
|
|
274
|
+
"""< 0.001% - Floating-point artifacts (always ignored)"""
|
|
275
|
+
NOISE = 2
|
|
276
|
+
"""0.001% - 0.01% - Tick-level noise (logged, not flagged)"""
|
|
277
|
+
MARKET_MOVE = 3
|
|
278
|
+
"""0.01% - 0.1% - Normal market movement (configurable)"""
|
|
279
|
+
MICROSTRUCTURE = 4
|
|
280
|
+
"""> 0.1% - Flash crashes, liquidations (warning/error)"""
|
|
281
|
+
SESSION_BOUNDARY = 5
|
|
282
|
+
"""> threshold*2 - Definite session break (always error)"""
|
|
283
|
+
|
|
284
|
+
class AssetClass(Enum):
|
|
285
|
+
"""Asset class for tolerance calibration.
|
|
286
|
+
|
|
287
|
+
Different asset classes have different typical gap magnitudes.
|
|
288
|
+
"""
|
|
289
|
+
|
|
290
|
+
CRYPTO = "crypto"
|
|
291
|
+
"""24/7 markets, flash crashes possible"""
|
|
292
|
+
FOREX = "forex"
|
|
293
|
+
"""Session-based, weekend gaps"""
|
|
294
|
+
EQUITIES = "equities"
|
|
295
|
+
"""Overnight gaps, circuit breakers"""
|
|
296
|
+
UNKNOWN = "unknown"
|
|
297
|
+
"""Fallback to crypto defaults"""
|
|
298
|
+
|
|
299
|
+
ASSET_CLASS_MULTIPLIERS: dict[AssetClass, float]
|
|
300
|
+
"""Tolerance multipliers by asset class (relative to baseline)."""
|
|
301
|
+
|
|
302
|
+
def detect_asset_class(symbol: str) -> AssetClass:
|
|
303
|
+
"""Auto-detect asset class from symbol pattern.
|
|
304
|
+
|
|
305
|
+
Detection Rules:
|
|
306
|
+
- Crypto: Contains common crypto bases (BTC, ETH, etc.) or ends with USDT/BUSD
|
|
307
|
+
- Forex: Standard 6-char pairs (EURUSD) or commodities (XAU, XAG)
|
|
308
|
+
- Unknown: Fallback for unrecognized patterns
|
|
309
|
+
|
|
310
|
+
Parameters
|
|
311
|
+
----------
|
|
312
|
+
symbol : str
|
|
313
|
+
Trading symbol (case-insensitive)
|
|
314
|
+
|
|
315
|
+
Returns
|
|
316
|
+
-------
|
|
317
|
+
AssetClass
|
|
318
|
+
Detected asset class
|
|
319
|
+
|
|
320
|
+
Examples
|
|
321
|
+
--------
|
|
322
|
+
>>> detect_asset_class("BTCUSDT")
|
|
323
|
+
<AssetClass.CRYPTO: 'crypto'>
|
|
324
|
+
>>> detect_asset_class("EURUSD")
|
|
325
|
+
<AssetClass.FOREX: 'forex'>
|
|
326
|
+
"""
|
|
327
|
+
|
|
328
|
+
@dataclass(frozen=True)
|
|
329
|
+
class TierThresholds:
|
|
330
|
+
"""Configurable boundaries between gap tiers (in percentage).
|
|
331
|
+
|
|
332
|
+
These thresholds define the boundaries for classifying gaps into tiers.
|
|
333
|
+
Values are percentages (e.g., 0.00001 = 0.001%).
|
|
334
|
+
"""
|
|
335
|
+
|
|
336
|
+
precision: float = ...
|
|
337
|
+
"""Tier 1/2 boundary (default: 0.00001 = 0.001%)"""
|
|
338
|
+
noise: float = ...
|
|
339
|
+
"""Tier 2/3 boundary (default: 0.0001 = 0.01%)"""
|
|
340
|
+
market_move: float = ...
|
|
341
|
+
"""Tier 3/4 boundary (default: 0.001 = 0.1%)"""
|
|
342
|
+
session_factor: float = ...
|
|
343
|
+
"""Tier 5 multiplier (default: 2.0)"""
|
|
344
|
+
|
|
345
|
+
@dataclass(frozen=True)
|
|
346
|
+
class ValidationPreset:
|
|
347
|
+
"""Immutable validation configuration preset.
|
|
348
|
+
|
|
349
|
+
Presets bundle tolerance, behavior mode, and tier thresholds into
|
|
350
|
+
named configurations for common use cases.
|
|
351
|
+
"""
|
|
352
|
+
|
|
353
|
+
tolerance_pct: float
|
|
354
|
+
"""Maximum gap percentage before flagging (e.g., 0.01 = 1%)"""
|
|
355
|
+
mode: Literal["error", "warn", "skip"]
|
|
356
|
+
"""Behavior on validation failure"""
|
|
357
|
+
tier_thresholds: TierThresholds = ...
|
|
358
|
+
"""Boundaries for gap tier classification"""
|
|
359
|
+
asset_class: AssetClass | None = ...
|
|
360
|
+
"""Override auto-detection if set"""
|
|
361
|
+
description: str = ...
|
|
362
|
+
"""Human-readable description of the preset"""
|
|
363
|
+
|
|
364
|
+
VALIDATION_PRESETS: dict[str, ValidationPreset]
|
|
365
|
+
"""Named validation presets for common scenarios.
|
|
366
|
+
|
|
367
|
+
General-purpose:
|
|
368
|
+
- "permissive": 5% tolerance, warn mode
|
|
369
|
+
- "research": 2% tolerance, warn mode (exploratory analysis)
|
|
370
|
+
- "standard": 1% tolerance, warn mode (production backtesting)
|
|
371
|
+
- "strict": 0.5% tolerance, error mode (ML training data)
|
|
372
|
+
- "paranoid": 0.1% tolerance, error mode (original v6.1.0 behavior)
|
|
373
|
+
|
|
374
|
+
Asset-class specific:
|
|
375
|
+
- "crypto": 2% tolerance, crypto asset class
|
|
376
|
+
- "forex": 1% tolerance, forex asset class
|
|
377
|
+
- "equities": 3% tolerance, equities asset class
|
|
378
|
+
|
|
379
|
+
Special:
|
|
380
|
+
- "skip": Disable validation entirely
|
|
381
|
+
- "audit": 0.2% tolerance, error mode (data quality audit)
|
|
382
|
+
"""
|
|
383
|
+
|
|
384
|
+
@dataclass
|
|
385
|
+
class GapInfo:
|
|
386
|
+
"""Details of a single gap between consecutive bars."""
|
|
387
|
+
|
|
388
|
+
bar_index: int
|
|
389
|
+
"""Index of the bar with the gap (0-based)"""
|
|
390
|
+
prev_close: float
|
|
391
|
+
"""Close price of the previous bar"""
|
|
392
|
+
curr_open: float
|
|
393
|
+
"""Open price of the current bar"""
|
|
394
|
+
gap_pct: float
|
|
395
|
+
"""Gap magnitude as percentage (e.g., 0.01 = 1%)"""
|
|
396
|
+
tier: GapTier
|
|
397
|
+
"""Severity classification of this gap"""
|
|
398
|
+
timestamp: pd.Timestamp | None = ...
|
|
399
|
+
"""Timestamp of the bar (if available from DataFrame index)"""
|
|
400
|
+
|
|
401
|
+
@dataclass
|
|
402
|
+
class TierSummary:
|
|
403
|
+
"""Per-tier statistics for gap analysis."""
|
|
404
|
+
|
|
405
|
+
count: int = ...
|
|
406
|
+
"""Number of gaps in this tier"""
|
|
407
|
+
max_gap_pct: float = ...
|
|
408
|
+
"""Maximum gap percentage in this tier"""
|
|
409
|
+
avg_gap_pct: float = ...
|
|
410
|
+
"""Average gap percentage in this tier (0 if count == 0)"""
|
|
411
|
+
|
|
412
|
+
@dataclass
|
|
413
|
+
class TieredValidationResult:
|
|
414
|
+
"""Comprehensive validation result with tier breakdown.
|
|
415
|
+
|
|
416
|
+
This result provides detailed gap analysis categorized by severity tier,
|
|
417
|
+
enabling nuanced handling of different gap magnitudes.
|
|
418
|
+
"""
|
|
419
|
+
|
|
420
|
+
is_valid: bool
|
|
421
|
+
"""True if no SESSION_BOUNDARY gaps (tier 5) detected"""
|
|
422
|
+
bar_count: int
|
|
423
|
+
"""Total number of bars validated"""
|
|
424
|
+
gaps_by_tier: dict[GapTier, TierSummary]
|
|
425
|
+
"""Per-tier statistics"""
|
|
426
|
+
all_gaps: list[GapInfo]
|
|
427
|
+
"""All gaps above PRECISION tier (detailed list)"""
|
|
428
|
+
threshold_used_pct: float
|
|
429
|
+
"""Range bar threshold used for validation (as percentage)"""
|
|
430
|
+
asset_class_detected: AssetClass
|
|
431
|
+
"""Auto-detected or overridden asset class"""
|
|
432
|
+
preset_used: str | None
|
|
433
|
+
"""Name of preset used, or None for custom config"""
|
|
434
|
+
|
|
435
|
+
@property
|
|
436
|
+
def has_session_breaks(self) -> bool:
|
|
437
|
+
"""True if any SESSION_BOUNDARY gaps detected."""
|
|
438
|
+
|
|
439
|
+
@property
|
|
440
|
+
def has_microstructure_events(self) -> bool:
|
|
441
|
+
"""True if any MICROSTRUCTURE gaps detected."""
|
|
442
|
+
|
|
443
|
+
def summary_dict(self) -> dict[str, int]:
|
|
444
|
+
"""Return gap counts by tier name for logging.
|
|
445
|
+
|
|
446
|
+
Returns
|
|
447
|
+
-------
|
|
448
|
+
dict[str, int]
|
|
449
|
+
Mapping of tier name to gap count
|
|
450
|
+
"""
|
|
451
|
+
|
|
452
|
+
def validate_continuity_tiered(
|
|
453
|
+
df: pd.DataFrame,
|
|
454
|
+
threshold_decimal_bps: int = 250,
|
|
455
|
+
*,
|
|
456
|
+
validation: str | dict | ValidationPreset = "standard",
|
|
457
|
+
symbol: str | None = None,
|
|
458
|
+
) -> TieredValidationResult:
|
|
459
|
+
"""Validate range bar continuity with tiered gap classification.
|
|
460
|
+
|
|
461
|
+
This function categorizes gaps by severity tier, enabling nuanced
|
|
462
|
+
handling of different gap magnitudes. It's the opt-in v6.2.0 API
|
|
463
|
+
that will become the default in v7.0.
|
|
464
|
+
|
|
465
|
+
Parameters
|
|
466
|
+
----------
|
|
467
|
+
df : pd.DataFrame
|
|
468
|
+
Range bar DataFrame with OHLCV columns
|
|
469
|
+
threshold_decimal_bps : int, default=250
|
|
470
|
+
Range bar threshold (250 = 0.25% = 25 basis points)
|
|
471
|
+
validation : str, dict, or ValidationPreset, default="standard"
|
|
472
|
+
Validation configuration:
|
|
473
|
+
- "auto": Auto-detect asset class from symbol
|
|
474
|
+
- str: Preset name ("research", "strict", "crypto", etc.)
|
|
475
|
+
- dict: Custom config {"tolerance_pct": 0.01, "mode": "warn"}
|
|
476
|
+
- ValidationPreset: Direct preset instance
|
|
477
|
+
symbol : str, optional
|
|
478
|
+
Symbol for asset class auto-detection
|
|
479
|
+
|
|
480
|
+
Returns
|
|
481
|
+
-------
|
|
482
|
+
TieredValidationResult
|
|
483
|
+
Comprehensive result with per-tier statistics
|
|
484
|
+
|
|
485
|
+
Raises
|
|
486
|
+
------
|
|
487
|
+
ContinuityError
|
|
488
|
+
If validation mode is "error" and tolerance exceeded
|
|
489
|
+
ContinuityWarning
|
|
490
|
+
If validation mode is "warn" and tolerance exceeded (via warnings module)
|
|
491
|
+
|
|
492
|
+
Examples
|
|
493
|
+
--------
|
|
494
|
+
>>> result = validate_continuity_tiered(df, validation="research")
|
|
495
|
+
>>> print(f"Valid: {result.is_valid}")
|
|
496
|
+
Valid: True
|
|
497
|
+
"""
|
|
498
|
+
|
|
499
|
+
# ============================================================================
|
|
500
|
+
# Main API
|
|
501
|
+
# ============================================================================
|
|
502
|
+
|
|
503
|
+
@overload
|
|
504
|
+
def get_range_bars(
|
|
505
|
+
symbol: str,
|
|
506
|
+
start_date: str,
|
|
507
|
+
end_date: str,
|
|
508
|
+
threshold_decimal_bps: (
|
|
509
|
+
int | Literal["micro", "tight", "standard", "medium", "wide", "macro"]
|
|
510
|
+
) = 250,
|
|
511
|
+
*,
|
|
512
|
+
ouroboros: Literal["year", "month", "week"] = ...,
|
|
513
|
+
include_orphaned_bars: bool = ...,
|
|
514
|
+
materialize: Literal[True] = ...,
|
|
515
|
+
batch_size: int = ...,
|
|
516
|
+
source: Literal["binance", "exness"] = ...,
|
|
517
|
+
market: Literal["spot", "futures-um", "futures-cm", "um", "cm"] = ...,
|
|
518
|
+
validation: Literal["permissive", "strict", "paranoid"] = ...,
|
|
519
|
+
include_incomplete: bool = ...,
|
|
520
|
+
include_microstructure: bool = ...,
|
|
521
|
+
include_exchange_sessions: bool = ..., # Issue #8
|
|
522
|
+
prevent_same_timestamp_close: bool = ...,
|
|
523
|
+
verify_checksum: bool = ...,
|
|
524
|
+
use_cache: bool = ...,
|
|
525
|
+
fetch_if_missing: bool = ...,
|
|
526
|
+
cache_dir: str | None = ...,
|
|
527
|
+
max_memory_mb: int | None = ..., # Issue #49
|
|
528
|
+
inter_bar_lookback_count: int | None = ..., # Issue #59
|
|
529
|
+
) -> pd.DataFrame: ...
|
|
530
|
+
@overload
|
|
531
|
+
def get_range_bars(
|
|
532
|
+
symbol: str,
|
|
533
|
+
start_date: str,
|
|
534
|
+
end_date: str,
|
|
535
|
+
threshold_decimal_bps: (
|
|
536
|
+
int | Literal["micro", "tight", "standard", "medium", "wide", "macro"]
|
|
537
|
+
) = 250,
|
|
538
|
+
*,
|
|
539
|
+
ouroboros: Literal["year", "month", "week"] = ...,
|
|
540
|
+
include_orphaned_bars: bool = ...,
|
|
541
|
+
materialize: Literal[False],
|
|
542
|
+
batch_size: int = ...,
|
|
543
|
+
source: Literal["binance", "exness"] = ...,
|
|
544
|
+
market: Literal["spot", "futures-um", "futures-cm", "um", "cm"] = ...,
|
|
545
|
+
validation: Literal["permissive", "strict", "paranoid"] = ...,
|
|
546
|
+
include_incomplete: bool = ...,
|
|
547
|
+
include_microstructure: bool = ...,
|
|
548
|
+
include_exchange_sessions: bool = ..., # Issue #8
|
|
549
|
+
prevent_same_timestamp_close: bool = ...,
|
|
550
|
+
verify_checksum: bool = ...,
|
|
551
|
+
use_cache: bool = ...,
|
|
552
|
+
fetch_if_missing: bool = ...,
|
|
553
|
+
cache_dir: str | None = ...,
|
|
554
|
+
max_memory_mb: int | None = ..., # Issue #49
|
|
555
|
+
inter_bar_lookback_count: int | None = ..., # Issue #59
|
|
556
|
+
) -> Iterator[pl.DataFrame]: ...
|
|
557
|
+
def get_range_bars(
|
|
558
|
+
symbol: str,
|
|
559
|
+
start_date: str,
|
|
560
|
+
end_date: str,
|
|
561
|
+
threshold_decimal_bps: (
|
|
562
|
+
int | Literal["micro", "tight", "standard", "medium", "wide", "macro"]
|
|
563
|
+
) = 250,
|
|
564
|
+
*,
|
|
565
|
+
# Ouroboros: Cyclical reset boundaries (v11.0+)
|
|
566
|
+
ouroboros: Literal["year", "month", "week"] = "year",
|
|
567
|
+
include_orphaned_bars: bool = False,
|
|
568
|
+
# Streaming options (v8.0+)
|
|
569
|
+
materialize: bool = True,
|
|
570
|
+
batch_size: int = 10_000,
|
|
571
|
+
# Data source configuration
|
|
572
|
+
source: Literal["binance", "exness"] = "binance",
|
|
573
|
+
market: Literal["spot", "futures-um", "futures-cm", "um", "cm"] = "spot",
|
|
574
|
+
# Exness-specific options
|
|
575
|
+
validation: Literal["permissive", "strict", "paranoid"] = "strict",
|
|
576
|
+
# Processing options
|
|
577
|
+
include_incomplete: bool = False,
|
|
578
|
+
include_microstructure: bool = False,
|
|
579
|
+
include_exchange_sessions: bool = False, # Issue #8: Exchange session flags
|
|
580
|
+
prevent_same_timestamp_close: bool = True,
|
|
581
|
+
# Data integrity (Issue #43)
|
|
582
|
+
verify_checksum: bool = True,
|
|
583
|
+
# Caching options
|
|
584
|
+
use_cache: bool = True,
|
|
585
|
+
fetch_if_missing: bool = True,
|
|
586
|
+
cache_dir: str | None = None,
|
|
587
|
+
# Memory guards (Issue #49)
|
|
588
|
+
max_memory_mb: int | None = None,
|
|
589
|
+
# Inter-bar features (Issue #59)
|
|
590
|
+
inter_bar_lookback_count: int | None = None,
|
|
591
|
+
) -> pd.DataFrame | Iterator[pl.DataFrame]:
|
|
592
|
+
"""Get range bars for a symbol with automatic data fetching and caching.
|
|
593
|
+
|
|
594
|
+
This is the single entry point for all range bar generation. It supports
|
|
595
|
+
multiple data sources (Binance crypto, Exness forex), all market types,
|
|
596
|
+
and exposes the full configurability of the underlying Rust engine.
|
|
597
|
+
|
|
598
|
+
Parameters
|
|
599
|
+
----------
|
|
600
|
+
symbol : str
|
|
601
|
+
Trading symbol (uppercase).
|
|
602
|
+
- Binance: "BTCUSDT", "ETHUSDT", etc.
|
|
603
|
+
- Exness: "EURUSD", "GBPUSD", "XAUUSD", etc.
|
|
604
|
+
start_date : str
|
|
605
|
+
Start date in YYYY-MM-DD format.
|
|
606
|
+
end_date : str
|
|
607
|
+
End date in YYYY-MM-DD format.
|
|
608
|
+
threshold_decimal_bps : int or str, default=250
|
|
609
|
+
Threshold in 0.1bps units. Can be:
|
|
610
|
+
- Integer: Direct value (250 = 25bps = 0.25%)
|
|
611
|
+
- String preset: "micro" (1bps), "tight" (5bps), "standard" (10bps),
|
|
612
|
+
"medium" (25bps), "wide" (50bps), "macro" (100bps)
|
|
613
|
+
Valid range: 1-100,000 (0.001% to 100%)
|
|
614
|
+
materialize : bool, default=True
|
|
615
|
+
If True, return a single pd.DataFrame (legacy behavior).
|
|
616
|
+
If False, return an Iterator[pl.DataFrame] that yields batches
|
|
617
|
+
of bars for memory-efficient streaming (v8.0+).
|
|
618
|
+
batch_size : int, default=10_000
|
|
619
|
+
Number of bars per batch when materialize=False.
|
|
620
|
+
Each batch is ~500 KB. Only used in streaming mode.
|
|
621
|
+
|
|
622
|
+
source : str, default="binance"
|
|
623
|
+
Data source: "binance" or "exness"
|
|
624
|
+
market : str, default="spot"
|
|
625
|
+
Market type (Binance only):
|
|
626
|
+
- "spot": Spot market
|
|
627
|
+
- "futures-um" or "um": USD-M perpetual futures
|
|
628
|
+
- "futures-cm" or "cm": COIN-M perpetual futures
|
|
629
|
+
validation : str, default="strict"
|
|
630
|
+
Validation strictness (Exness only):
|
|
631
|
+
- "permissive": Basic checks (bid > 0, ask > 0, bid < ask)
|
|
632
|
+
- "strict": + Spread < 10% (catches obvious errors)
|
|
633
|
+
- "paranoid": + Spread < 1% (flags suspicious data)
|
|
634
|
+
include_incomplete : bool, default=False
|
|
635
|
+
Include the final incomplete bar (useful for analysis).
|
|
636
|
+
If False (default), only completed bars are returned.
|
|
637
|
+
include_microstructure : bool, default=False
|
|
638
|
+
Include market microstructure columns:
|
|
639
|
+
- buy_volume, sell_volume: Volume by aggressor side
|
|
640
|
+
- vwap: Volume-weighted average price
|
|
641
|
+
- trade_count: Number of trades in bar
|
|
642
|
+
- (Exness) spread_min, spread_max, spread_avg: Spread statistics
|
|
643
|
+
- (Issue #25) duration_us: Bar duration in microseconds
|
|
644
|
+
- (Issue #25) ofi: Order Flow Imbalance [-1, 1]
|
|
645
|
+
- (Issue #25) vwap_close_deviation: (close - vwap) / (high - low)
|
|
646
|
+
- (Issue #25) price_impact: Amihud-style illiquidity
|
|
647
|
+
- (Issue #25) kyle_lambda_proxy: Market depth proxy
|
|
648
|
+
- (Issue #25) trade_intensity: Trades per second
|
|
649
|
+
- (Issue #25) volume_per_trade: Average trade size
|
|
650
|
+
- (Issue #25) aggression_ratio: Buy/sell trade count ratio
|
|
651
|
+
- (Issue #25) aggregation_density: Trade fragmentation proxy
|
|
652
|
+
- (Issue #25) turnover_imbalance: Dollar-weighted OFI [-1, 1]
|
|
653
|
+
prevent_same_timestamp_close : bool, default=True
|
|
654
|
+
Prevent consecutive bars from having identical timestamps.
|
|
655
|
+
verify_checksum : bool, default=True
|
|
656
|
+
Verify SHA-256 checksum of downloaded data (Issue #43).
|
|
657
|
+
Enabled by default for data integrity. Set to False for
|
|
658
|
+
faster downloads when data integrity is verified elsewhere.
|
|
659
|
+
use_cache : bool, default=True
|
|
660
|
+
Cache tick data locally in Parquet format.
|
|
661
|
+
cache_dir : str or None, default=None
|
|
662
|
+
Custom cache directory. If None, uses platform default:
|
|
663
|
+
- macOS: ~/Library/Caches/rangebar/
|
|
664
|
+
- Linux: ~/.cache/rangebar/
|
|
665
|
+
- Windows: %LOCALAPPDATA%/terrylica/rangebar/Cache/
|
|
666
|
+
max_memory_mb : int or None, default=None
|
|
667
|
+
Memory budget in MB for tick data loading (Issue #49).
|
|
668
|
+
If estimated in-memory size exceeds this limit, raises MemoryError.
|
|
669
|
+
If None, uses automatic detection (80% of available RAM).
|
|
670
|
+
Set to 0 to disable all memory guards.
|
|
671
|
+
inter_bar_lookback_count : int or None, default=None
|
|
672
|
+
Number of trades to keep in lookback buffer for inter-bar feature
|
|
673
|
+
computation (Issue #59). If set, enables 16 inter-bar features
|
|
674
|
+
computed from trades BEFORE each bar opens. Recommended: 100-500.
|
|
675
|
+
If None (default), inter-bar features are disabled.
|
|
676
|
+
|
|
677
|
+
Returns
|
|
678
|
+
-------
|
|
679
|
+
pd.DataFrame or Iterator[pl.DataFrame]
|
|
680
|
+
If materialize=True (default): Single pd.DataFrame ready for
|
|
681
|
+
backtesting.py, with DatetimeIndex and OHLCV columns.
|
|
682
|
+
|
|
683
|
+
If materialize=False: Iterator yielding pl.DataFrame batches
|
|
684
|
+
(batch_size bars each) for memory-efficient streaming.
|
|
685
|
+
|
|
686
|
+
Columns: Open, High, Low, Close, Volume
|
|
687
|
+
(if include_microstructure) Additional columns
|
|
688
|
+
|
|
689
|
+
Raises
|
|
690
|
+
------
|
|
691
|
+
ValueError
|
|
692
|
+
- Invalid threshold (outside 1-100,000 range)
|
|
693
|
+
- Invalid dates or date format
|
|
694
|
+
- Unknown source, market, or validation level
|
|
695
|
+
- Unknown threshold preset name
|
|
696
|
+
RuntimeError
|
|
697
|
+
- Data fetching failed
|
|
698
|
+
- No data available for date range
|
|
699
|
+
- Feature not enabled (e.g., Exness without exness feature)
|
|
700
|
+
|
|
701
|
+
Examples
|
|
702
|
+
--------
|
|
703
|
+
Basic usage - Binance spot:
|
|
704
|
+
|
|
705
|
+
>>> from rangebar import get_range_bars
|
|
706
|
+
>>> df = get_range_bars("BTCUSDT", "2024-01-01", "2024-06-30")
|
|
707
|
+
|
|
708
|
+
Using threshold presets:
|
|
709
|
+
|
|
710
|
+
>>> df = get_range_bars("BTCUSDT", "2024-01-01", "2024-03-31", threshold_decimal_bps="tight")
|
|
711
|
+
|
|
712
|
+
Binance USD-M Futures:
|
|
713
|
+
|
|
714
|
+
>>> df = get_range_bars("BTCUSDT", "2024-01-01", "2024-03-31", market="futures-um")
|
|
715
|
+
|
|
716
|
+
Exness forex with spread monitoring:
|
|
717
|
+
|
|
718
|
+
>>> df = get_range_bars(
|
|
719
|
+
... "EURUSD", "2024-01-01", "2024-01-31",
|
|
720
|
+
... source="exness",
|
|
721
|
+
... threshold_decimal_bps="standard",
|
|
722
|
+
... include_microstructure=True,
|
|
723
|
+
... )
|
|
724
|
+
|
|
725
|
+
Use with backtesting.py:
|
|
726
|
+
|
|
727
|
+
>>> from backtesting import Backtest, Strategy
|
|
728
|
+
>>> df = get_range_bars("BTCUSDT", "2024-01-01", "2024-12-31")
|
|
729
|
+
>>> bt = Backtest(df, MyStrategy, cash=10000, commission=0.0002)
|
|
730
|
+
>>> stats = bt.run()
|
|
731
|
+
|
|
732
|
+
Notes
|
|
733
|
+
-----
|
|
734
|
+
Threshold units (0.1bps):
|
|
735
|
+
The threshold is specified in tenths of basis points for precision.
|
|
736
|
+
Common conversions:
|
|
737
|
+
- 10 = 1bps = 0.01%
|
|
738
|
+
- 100 = 10bps = 0.1%
|
|
739
|
+
- 250 = 25bps = 0.25%
|
|
740
|
+
- 1000 = 100bps = 1%
|
|
741
|
+
|
|
742
|
+
Tier-1 symbols:
|
|
743
|
+
18 high-liquidity symbols available on ALL Binance markets:
|
|
744
|
+
AAVE, ADA, AVAX, BCH, BNB, BTC, DOGE, ETH, FIL,
|
|
745
|
+
LINK, LTC, NEAR, SOL, SUI, UNI, WIF, WLD, XRP
|
|
746
|
+
|
|
747
|
+
Non-lookahead guarantee:
|
|
748
|
+
- Threshold computed from bar OPEN price only
|
|
749
|
+
- Breaching trade included in closing bar
|
|
750
|
+
- No future information used in bar construction
|
|
751
|
+
"""
|
|
752
|
+
|
|
753
|
+
def get_range_bars_pandas(
|
|
754
|
+
symbol: str,
|
|
755
|
+
start_date: str,
|
|
756
|
+
end_date: str,
|
|
757
|
+
threshold_decimal_bps: (
|
|
758
|
+
int | Literal["micro", "tight", "standard", "medium", "wide", "macro"]
|
|
759
|
+
) = 250,
|
|
760
|
+
**kwargs: Any, # noqa: ANN401
|
|
761
|
+
) -> pd.DataFrame:
|
|
762
|
+
"""Get range bars as pandas DataFrame (deprecated compatibility shim).
|
|
763
|
+
|
|
764
|
+
.. deprecated:: 8.0
|
|
765
|
+
Use ``get_range_bars(materialize=True)`` directly instead.
|
|
766
|
+
This function will be removed in v9.0.
|
|
767
|
+
|
|
768
|
+
Parameters
|
|
769
|
+
----------
|
|
770
|
+
symbol : str
|
|
771
|
+
Trading symbol (e.g., "BTCUSDT")
|
|
772
|
+
start_date : str
|
|
773
|
+
Start date in YYYY-MM-DD format
|
|
774
|
+
end_date : str
|
|
775
|
+
End date in YYYY-MM-DD format
|
|
776
|
+
threshold_decimal_bps : int or str, default=250
|
|
777
|
+
Threshold in decimal basis points
|
|
778
|
+
**kwargs
|
|
779
|
+
Additional arguments passed to ``get_range_bars()``
|
|
780
|
+
|
|
781
|
+
Returns
|
|
782
|
+
-------
|
|
783
|
+
pd.DataFrame
|
|
784
|
+
OHLCV DataFrame ready for backtesting.py
|
|
785
|
+
"""
|
|
786
|
+
|
|
787
|
+
def get_n_range_bars(
|
|
788
|
+
symbol: str,
|
|
789
|
+
n_bars: int,
|
|
790
|
+
threshold_decimal_bps: (
|
|
791
|
+
int | Literal["micro", "tight", "standard", "medium", "wide", "macro"]
|
|
792
|
+
) = 250,
|
|
793
|
+
*,
|
|
794
|
+
end_date: str | None = None,
|
|
795
|
+
source: Literal["binance", "exness"] = "binance",
|
|
796
|
+
market: Literal["spot", "futures-um", "futures-cm", "um", "cm"] = "spot",
|
|
797
|
+
include_microstructure: bool = False,
|
|
798
|
+
use_cache: bool = True,
|
|
799
|
+
fetch_if_missing: bool = True,
|
|
800
|
+
max_lookback_days: int = 90,
|
|
801
|
+
warn_if_fewer: bool = True,
|
|
802
|
+
validate_on_return: bool = False,
|
|
803
|
+
continuity_action: Literal["warn", "raise", "log"] = "warn",
|
|
804
|
+
chunk_size: int = 100_000,
|
|
805
|
+
cache_dir: str | None = None,
|
|
806
|
+
) -> pd.DataFrame:
|
|
807
|
+
"""Get exactly N range bars ending at or before a given date.
|
|
808
|
+
|
|
809
|
+
Unlike `get_range_bars()` which uses date bounds (producing variable bar counts),
|
|
810
|
+
this function returns a deterministic number of bars. This is useful for:
|
|
811
|
+
- ML training (exactly 10,000 samples)
|
|
812
|
+
- Walk-forward optimization (fixed window sizes)
|
|
813
|
+
- Consistent backtest comparisons
|
|
814
|
+
|
|
815
|
+
Parameters
|
|
816
|
+
----------
|
|
817
|
+
symbol : str
|
|
818
|
+
Trading symbol (e.g., "BTCUSDT")
|
|
819
|
+
n_bars : int
|
|
820
|
+
Number of bars to retrieve. Must be > 0.
|
|
821
|
+
threshold_decimal_bps : int or str, default=250
|
|
822
|
+
Threshold in decimal basis points. Can be:
|
|
823
|
+
- Integer: Direct value (250 = 25bps = 0.25%)
|
|
824
|
+
- String preset: "micro", "tight", "standard", "medium", "wide", "macro"
|
|
825
|
+
end_date : str or None, default=None
|
|
826
|
+
End date in YYYY-MM-DD format. If None, uses most recent available data.
|
|
827
|
+
source : str, default="binance"
|
|
828
|
+
Data source: "binance" or "exness"
|
|
829
|
+
market : str, default="spot"
|
|
830
|
+
Market type (Binance only): "spot", "futures-um", or "futures-cm"
|
|
831
|
+
include_microstructure : bool, default=False
|
|
832
|
+
Include microstructure columns (vwap, buy_volume, sell_volume,
|
|
833
|
+
plus Issue #25 features: ofi, duration_us, price_impact, etc.)
|
|
834
|
+
use_cache : bool, default=True
|
|
835
|
+
Use ClickHouse cache for bar retrieval/storage
|
|
836
|
+
fetch_if_missing : bool, default=True
|
|
837
|
+
Fetch and process new data if cache doesn't have enough bars
|
|
838
|
+
max_lookback_days : int, default=90
|
|
839
|
+
Safety limit: maximum days to look back when fetching missing data.
|
|
840
|
+
Prevents runaway fetches on empty caches.
|
|
841
|
+
warn_if_fewer : bool, default=True
|
|
842
|
+
Emit UserWarning if returning fewer bars than requested.
|
|
843
|
+
validate_on_return : bool, default=False
|
|
844
|
+
If True, validate bar continuity before returning.
|
|
845
|
+
Uses continuity_action to determine behavior on failure.
|
|
846
|
+
continuity_action : str, default="warn"
|
|
847
|
+
Action when discontinuity found during validation:
|
|
848
|
+
- "warn": Log warning but return data
|
|
849
|
+
- "raise": Raise ContinuityError
|
|
850
|
+
- "log": Silent logging only
|
|
851
|
+
chunk_size : int, default=100_000
|
|
852
|
+
Number of ticks per processing chunk for memory efficiency.
|
|
853
|
+
Larger values = faster processing, more memory.
|
|
854
|
+
Default 100K = ~15MB memory overhead.
|
|
855
|
+
cache_dir : str or None, default=None
|
|
856
|
+
Custom cache directory for tick data (Tier 1).
|
|
857
|
+
|
|
858
|
+
Returns
|
|
859
|
+
-------
|
|
860
|
+
pd.DataFrame
|
|
861
|
+
OHLCV DataFrame with exactly n_bars rows (or fewer if not enough data),
|
|
862
|
+
sorted chronologically (oldest first). Columns:
|
|
863
|
+
- Open, High, Low, Close, Volume
|
|
864
|
+
- (if include_microstructure) vwap, buy_volume, sell_volume
|
|
865
|
+
|
|
866
|
+
Raises
|
|
867
|
+
------
|
|
868
|
+
ValueError
|
|
869
|
+
- n_bars <= 0
|
|
870
|
+
- Invalid threshold
|
|
871
|
+
- Invalid date format
|
|
872
|
+
RuntimeError
|
|
873
|
+
- ClickHouse not available when use_cache=True
|
|
874
|
+
- Data fetching failed
|
|
875
|
+
|
|
876
|
+
Examples
|
|
877
|
+
--------
|
|
878
|
+
Get last 10,000 bars for ML training:
|
|
879
|
+
|
|
880
|
+
>>> from rangebar import get_n_range_bars
|
|
881
|
+
>>> df = get_n_range_bars("BTCUSDT", n_bars=10000)
|
|
882
|
+
>>> assert len(df) == 10000
|
|
883
|
+
|
|
884
|
+
Get 5,000 bars ending at specific date for walk-forward:
|
|
885
|
+
|
|
886
|
+
>>> df = get_n_range_bars("BTCUSDT", n_bars=5000, end_date="2024-06-01")
|
|
887
|
+
|
|
888
|
+
With safety limit (won't fetch more than 30 days of data):
|
|
889
|
+
|
|
890
|
+
>>> df = get_n_range_bars("BTCUSDT", n_bars=1000, max_lookback_days=30)
|
|
891
|
+
|
|
892
|
+
Notes
|
|
893
|
+
-----
|
|
894
|
+
Cache behavior:
|
|
895
|
+
- Fast path: If cache has >= n_bars, returns immediately (~50ms)
|
|
896
|
+
- Slow path: If cache has < n_bars and fetch_if_missing=True,
|
|
897
|
+
fetches additional data, computes bars, stores in cache, returns
|
|
898
|
+
|
|
899
|
+
Gap-filling algorithm:
|
|
900
|
+
Uses adaptive exponential backoff to estimate how many ticks to fetch.
|
|
901
|
+
Learns compression ratio (ticks/bar) for each (symbol, threshold) pair.
|
|
902
|
+
|
|
903
|
+
See Also
|
|
904
|
+
--------
|
|
905
|
+
get_range_bars : Date-bounded bar retrieval (variable bar count)
|
|
906
|
+
precompute_range_bars : Pre-compute continuous bars for WFO workflows
|
|
907
|
+
THRESHOLD_PRESETS : Named threshold values
|
|
908
|
+
"""
|
|
909
|
+
|
|
910
|
+
def precompute_range_bars(
|
|
911
|
+
symbol: str,
|
|
912
|
+
start_date: str,
|
|
913
|
+
end_date: str,
|
|
914
|
+
threshold_decimal_bps: (
|
|
915
|
+
int | Literal["micro", "tight", "standard", "medium", "wide", "macro"]
|
|
916
|
+
) = 250,
|
|
917
|
+
*,
|
|
918
|
+
source: Literal["binance", "exness"] = "binance",
|
|
919
|
+
market: Literal["spot", "futures-um", "futures-cm", "um", "cm"] = "spot",
|
|
920
|
+
chunk_size: int = 100_000,
|
|
921
|
+
invalidate_existing: Literal["overlap", "full", "none", "smart"] = "smart",
|
|
922
|
+
progress_callback: Callable[[PrecomputeProgress], None] | None = None,
|
|
923
|
+
include_microstructure: bool = False,
|
|
924
|
+
validate_on_complete: Literal["error", "warn", "skip"] = "error",
|
|
925
|
+
continuity_tolerance_pct: float = 0.001,
|
|
926
|
+
cache_dir: str | None = None,
|
|
927
|
+
) -> PrecomputeResult:
|
|
928
|
+
"""Precompute continuous range bars for a date range (single-pass, guaranteed continuity).
|
|
929
|
+
|
|
930
|
+
Designed for ML workflows requiring continuous bar sequences for training/validation.
|
|
931
|
+
Uses single-pass processing to guarantee the bar[i+1].open == bar[i].close invariant.
|
|
932
|
+
|
|
933
|
+
Parameters
|
|
934
|
+
----------
|
|
935
|
+
symbol : str
|
|
936
|
+
Trading pair (e.g., "BTCUSDT")
|
|
937
|
+
start_date : str
|
|
938
|
+
Start date (inclusive) "YYYY-MM-DD"
|
|
939
|
+
end_date : str
|
|
940
|
+
End date (inclusive) "YYYY-MM-DD"
|
|
941
|
+
threshold_decimal_bps : int or str, default=250
|
|
942
|
+
Range bar threshold. Can be integer (250 = 0.25%) or preset name.
|
|
943
|
+
source : str, default="binance"
|
|
944
|
+
Data source: "binance" or "exness"
|
|
945
|
+
market : str, default="spot"
|
|
946
|
+
Market type for Binance: "spot", "futures-um"/"um", or "futures-cm"/"cm"
|
|
947
|
+
chunk_size : int, default=100_000
|
|
948
|
+
Ticks per processing chunk (~15MB memory per 100K ticks)
|
|
949
|
+
invalidate_existing : str, default="smart"
|
|
950
|
+
Cache invalidation strategy:
|
|
951
|
+
- "overlap": Invalidate only bars in date range
|
|
952
|
+
- "full": Invalidate ALL bars for symbol/threshold
|
|
953
|
+
- "none": Skip if any cached bars exist in range
|
|
954
|
+
- "smart": Invalidate overlapping + validate junction continuity
|
|
955
|
+
progress_callback : callable, optional
|
|
956
|
+
Callback for progress updates. Receives PrecomputeProgress dataclass.
|
|
957
|
+
include_microstructure : bool, default=False
|
|
958
|
+
Include order flow metrics (vwap, buy_volume, sell_volume)
|
|
959
|
+
validate_on_complete : str, default="error"
|
|
960
|
+
Continuity validation mode after precomputation:
|
|
961
|
+
- "error": Raise ContinuityError if discontinuities found
|
|
962
|
+
- "warn": Log warning but continue (sets continuity_valid=False)
|
|
963
|
+
- "skip": Skip validation entirely (continuity_valid=None)
|
|
964
|
+
continuity_tolerance_pct : float, default=0.001
|
|
965
|
+
Maximum allowed price gap percentage for continuity validation.
|
|
966
|
+
Default 0.1% (0.001) accommodates market microstructure events.
|
|
967
|
+
The total allowed gap is threshold_pct + continuity_tolerance_pct.
|
|
968
|
+
cache_dir : str or None, optional
|
|
969
|
+
Custom cache directory for tick data.
|
|
970
|
+
|
|
971
|
+
Returns
|
|
972
|
+
-------
|
|
973
|
+
PrecomputeResult
|
|
974
|
+
Dataclass with statistics: total_bars, total_ticks, elapsed_seconds,
|
|
975
|
+
continuity_valid, cache_key
|
|
976
|
+
|
|
977
|
+
Raises
|
|
978
|
+
------
|
|
979
|
+
ValueError
|
|
980
|
+
Invalid parameters (dates, threshold, symbol)
|
|
981
|
+
RuntimeError
|
|
982
|
+
Fetch or processing failure
|
|
983
|
+
ContinuityError
|
|
984
|
+
If validate_on_complete=True and discontinuities found
|
|
985
|
+
|
|
986
|
+
Examples
|
|
987
|
+
--------
|
|
988
|
+
Basic precomputation:
|
|
989
|
+
|
|
990
|
+
>>> from rangebar import precompute_range_bars
|
|
991
|
+
>>> result = precompute_range_bars("BTCUSDT", "2024-01-01", "2024-03-31")
|
|
992
|
+
>>> print(f"Generated {result.total_bars} bars in {result.elapsed_seconds:.1f}s")
|
|
993
|
+
|
|
994
|
+
With progress callback:
|
|
995
|
+
|
|
996
|
+
>>> def on_progress(p):
|
|
997
|
+
... print(f"{p.phase}: {p.months_completed}/{p.months_total} months")
|
|
998
|
+
>>> precompute_range_bars("BTCUSDT", "2024-01-01", "2024-06-30",
|
|
999
|
+
... progress_callback=on_progress)
|
|
1000
|
+
|
|
1001
|
+
See Also
|
|
1002
|
+
--------
|
|
1003
|
+
get_n_range_bars : Count-bounded bar retrieval (uses precomputed cache)
|
|
1004
|
+
get_range_bars : Date-bounded bar retrieval
|
|
1005
|
+
"""
|
|
1006
|
+
|
|
1007
|
+
def process_trades_polars(
|
|
1008
|
+
trades: pl.DataFrame | pl.LazyFrame,
|
|
1009
|
+
threshold_decimal_bps: int = 250,
|
|
1010
|
+
) -> pd.DataFrame:
|
|
1011
|
+
"""Process trades from Polars DataFrame (optimized pipeline).
|
|
1012
|
+
|
|
1013
|
+
This is the recommended API for Polars users. Uses lazy evaluation
|
|
1014
|
+
and minimal dict conversion for best performance.
|
|
1015
|
+
|
|
1016
|
+
Parameters
|
|
1017
|
+
----------
|
|
1018
|
+
trades : polars.DataFrame or polars.LazyFrame
|
|
1019
|
+
Trade data with columns:
|
|
1020
|
+
- timestamp: int64 (milliseconds since epoch)
|
|
1021
|
+
- price: float
|
|
1022
|
+
- quantity (or volume): float
|
|
1023
|
+
threshold_decimal_bps : int, default=250
|
|
1024
|
+
Threshold in decimal basis points (250 = 25bps = 0.25%)
|
|
1025
|
+
|
|
1026
|
+
Returns
|
|
1027
|
+
-------
|
|
1028
|
+
pd.DataFrame
|
|
1029
|
+
OHLCV DataFrame ready for backtesting.py, with:
|
|
1030
|
+
- DatetimeIndex (timestamp)
|
|
1031
|
+
- Capitalized columns: Open, High, Low, Close, Volume
|
|
1032
|
+
|
|
1033
|
+
Examples
|
|
1034
|
+
--------
|
|
1035
|
+
With LazyFrame (predicate pushdown):
|
|
1036
|
+
|
|
1037
|
+
>>> import polars as pl
|
|
1038
|
+
>>> from rangebar import process_trades_polars
|
|
1039
|
+
>>> lazy_df = pl.scan_parquet("trades.parquet")
|
|
1040
|
+
>>> lazy_filtered = lazy_df.filter(pl.col("timestamp") >= 1704067200000)
|
|
1041
|
+
>>> df = process_trades_polars(lazy_filtered, threshold_decimal_bps=250)
|
|
1042
|
+
|
|
1043
|
+
With DataFrame:
|
|
1044
|
+
|
|
1045
|
+
>>> df = pl.read_parquet("trades.parquet")
|
|
1046
|
+
>>> bars = process_trades_polars(df)
|
|
1047
|
+
|
|
1048
|
+
Notes
|
|
1049
|
+
-----
|
|
1050
|
+
Performance optimization:
|
|
1051
|
+
- Only required columns are extracted (timestamp, price, quantity)
|
|
1052
|
+
- Lazy evaluation: predicates pushed to I/O layer
|
|
1053
|
+
- 2-3x faster than process_trades_to_dataframe() for Polars inputs
|
|
1054
|
+
|
|
1055
|
+
See Also
|
|
1056
|
+
--------
|
|
1057
|
+
process_trades_to_dataframe : Process trades from pandas DataFrame or dict list
|
|
1058
|
+
get_range_bars : Full pipeline with data fetching and caching
|
|
1059
|
+
"""
|
|
1060
|
+
|
|
1061
|
+
def process_trades_to_dataframe(
|
|
1062
|
+
trades: list[dict] | pd.DataFrame,
|
|
1063
|
+
threshold_decimal_bps: int = 250,
|
|
1064
|
+
include_microstructure: bool = False,
|
|
1065
|
+
) -> pd.DataFrame:
|
|
1066
|
+
"""Process trades into range bars from pandas DataFrame or dict list.
|
|
1067
|
+
|
|
1068
|
+
Parameters
|
|
1069
|
+
----------
|
|
1070
|
+
trades : list[dict] or pd.DataFrame
|
|
1071
|
+
Trade data. If list[dict], each dict needs:
|
|
1072
|
+
- timestamp: int (milliseconds since epoch)
|
|
1073
|
+
- price: float
|
|
1074
|
+
- quantity: float
|
|
1075
|
+
threshold_decimal_bps : int, default=250
|
|
1076
|
+
Threshold in decimal basis points (250 = 25bps = 0.25%)
|
|
1077
|
+
include_microstructure : bool, default=False
|
|
1078
|
+
Include microstructure columns (vwap, buy_volume, sell_volume)
|
|
1079
|
+
|
|
1080
|
+
Returns
|
|
1081
|
+
-------
|
|
1082
|
+
pd.DataFrame
|
|
1083
|
+
OHLCV DataFrame ready for backtesting.py
|
|
1084
|
+
|
|
1085
|
+
See Also
|
|
1086
|
+
--------
|
|
1087
|
+
process_trades_polars : Faster alternative for Polars inputs
|
|
1088
|
+
get_range_bars : Full pipeline with data fetching and caching
|
|
1089
|
+
"""
|