rangebar 11.6.1__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. rangebar/CLAUDE.md +327 -0
  2. rangebar/__init__.py +227 -0
  3. rangebar/__init__.pyi +1089 -0
  4. rangebar/_core.cpython-313-darwin.so +0 -0
  5. rangebar/checkpoint.py +472 -0
  6. rangebar/cli.py +298 -0
  7. rangebar/clickhouse/CLAUDE.md +139 -0
  8. rangebar/clickhouse/__init__.py +100 -0
  9. rangebar/clickhouse/bulk_operations.py +309 -0
  10. rangebar/clickhouse/cache.py +734 -0
  11. rangebar/clickhouse/client.py +121 -0
  12. rangebar/clickhouse/config.py +141 -0
  13. rangebar/clickhouse/mixin.py +120 -0
  14. rangebar/clickhouse/preflight.py +504 -0
  15. rangebar/clickhouse/query_operations.py +345 -0
  16. rangebar/clickhouse/schema.sql +187 -0
  17. rangebar/clickhouse/tunnel.py +222 -0
  18. rangebar/constants.py +288 -0
  19. rangebar/conversion.py +177 -0
  20. rangebar/exceptions.py +207 -0
  21. rangebar/exness.py +364 -0
  22. rangebar/hooks.py +311 -0
  23. rangebar/logging.py +171 -0
  24. rangebar/notify/__init__.py +15 -0
  25. rangebar/notify/pushover.py +155 -0
  26. rangebar/notify/telegram.py +271 -0
  27. rangebar/orchestration/__init__.py +20 -0
  28. rangebar/orchestration/count_bounded.py +797 -0
  29. rangebar/orchestration/helpers.py +412 -0
  30. rangebar/orchestration/models.py +76 -0
  31. rangebar/orchestration/precompute.py +498 -0
  32. rangebar/orchestration/range_bars.py +736 -0
  33. rangebar/orchestration/tick_fetcher.py +226 -0
  34. rangebar/ouroboros.py +454 -0
  35. rangebar/processors/__init__.py +22 -0
  36. rangebar/processors/api.py +383 -0
  37. rangebar/processors/core.py +522 -0
  38. rangebar/resource_guard.py +567 -0
  39. rangebar/storage/__init__.py +22 -0
  40. rangebar/storage/checksum_registry.py +218 -0
  41. rangebar/storage/parquet.py +728 -0
  42. rangebar/streaming.py +300 -0
  43. rangebar/validation/__init__.py +69 -0
  44. rangebar/validation/cache_staleness.py +277 -0
  45. rangebar/validation/continuity.py +664 -0
  46. rangebar/validation/gap_classification.py +294 -0
  47. rangebar/validation/post_storage.py +317 -0
  48. rangebar/validation/tier1.py +175 -0
  49. rangebar/validation/tier2.py +261 -0
  50. rangebar-11.6.1.dist-info/METADATA +308 -0
  51. rangebar-11.6.1.dist-info/RECORD +54 -0
  52. rangebar-11.6.1.dist-info/WHEEL +4 -0
  53. rangebar-11.6.1.dist-info/entry_points.txt +2 -0
  54. rangebar-11.6.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,736 @@
1
+ # polars-exception: backtesting.py requires Pandas DataFrames with DatetimeIndex
2
+ # Issue #46: Modularization M4 - Extract get_range_bars from __init__.py
3
+ """Date-bounded range bar generation.
4
+
5
+ Provides get_range_bars() - the single entry point for all range bar generation
6
+ with automatic data fetching, caching, and ouroboros boundary handling.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from collections.abc import Iterator
12
+ from typing import TYPE_CHECKING, Any, Literal
13
+
14
+ import pandas as pd
15
+
16
+ from rangebar.constants import (
17
+ THRESHOLD_DECIMAL_MAX,
18
+ THRESHOLD_DECIMAL_MIN,
19
+ THRESHOLD_PRESETS,
20
+ )
21
+ from rangebar.processors.core import RangeBarProcessor
22
+ from rangebar.validation.cache_staleness import detect_staleness
23
+
24
+ from .helpers import (
25
+ _fetch_binance,
26
+ _fetch_exness,
27
+ _process_binance_trades,
28
+ _process_exness_ticks,
29
+ _stream_range_bars_binance,
30
+ )
31
+
32
+ if TYPE_CHECKING:
33
+ import polars as pl
34
+
35
+
36
+ def get_range_bars(
37
+ symbol: str,
38
+ start_date: str,
39
+ end_date: str,
40
+ threshold_decimal_bps: int | str = 250,
41
+ *,
42
+ # Ouroboros: Cyclical reset boundaries (v11.0+)
43
+ ouroboros: Literal["year", "month", "week"] = "year",
44
+ include_orphaned_bars: bool = False,
45
+ # Streaming options (v8.0+)
46
+ materialize: bool = True,
47
+ batch_size: int = 10_000,
48
+ # Data source configuration
49
+ source: str = "binance",
50
+ market: str = "spot",
51
+ # Exness-specific options
52
+ validation: str = "strict",
53
+ # Processing options
54
+ include_incomplete: bool = False,
55
+ include_microstructure: bool = False,
56
+ include_exchange_sessions: bool = False, # Issue #8: Exchange session flags
57
+ # Timestamp gating (Issue #36)
58
+ prevent_same_timestamp_close: bool = True,
59
+ # Data integrity (Issue #43)
60
+ verify_checksum: bool = True,
61
+ # Caching options
62
+ use_cache: bool = True,
63
+ fetch_if_missing: bool = True,
64
+ cache_dir: str | None = None,
65
+ # Memory guards (Issue #49)
66
+ max_memory_mb: int | None = None,
67
+ # Inter-bar features (Issue #59)
68
+ inter_bar_lookback_count: int | None = None,
69
+ ) -> pd.DataFrame | Iterator[pl.DataFrame]:
70
+ """Get range bars for a symbol with automatic data fetching and caching.
71
+
72
+ This is the single entry point for all range bar generation. It supports
73
+ multiple data sources (Binance crypto, Exness forex), all market types,
74
+ and exposes the full configurability of the underlying Rust engine.
75
+
76
+ Parameters
77
+ ----------
78
+ symbol : str
79
+ Trading symbol (uppercase).
80
+ - Binance: "BTCUSDT", "ETHUSDT", etc.
81
+ - Exness: "EURUSD", "GBPUSD", "XAUUSD", etc.
82
+ start_date : str
83
+ Start date in YYYY-MM-DD format.
84
+ end_date : str
85
+ End date in YYYY-MM-DD format.
86
+ threshold_decimal_bps : int or str, default=250
87
+ Threshold in decimal basis points. Can be:
88
+ - Integer: Direct value (250 dbps = 0.25%)
89
+ - String preset: "micro" (10 dbps), "tight" (50 dbps), "standard" (100 dbps),
90
+ "medium" (250 dbps), "wide" (500 dbps), "macro" (1000 dbps)
91
+ Valid range: 1-100,000 dbps (0.001% to 100%)
92
+ ouroboros : {"year", "month", "week"}, default="year"
93
+ Cyclical reset boundary for reproducible bar construction (v11.0+).
94
+ Processor state resets at each boundary for deterministic results.
95
+ - "year" (default): Reset at January 1st 00:00:00 UTC (cryptocurrency)
96
+ - "month": Reset at 1st of each month 00:00:00 UTC
97
+ - "week": Reset at Sunday 00:00:00 UTC (required for Forex)
98
+ Named after the Greek serpent eating its tail (οὐροβόρος).
99
+ include_orphaned_bars : bool, default=False
100
+ Include incomplete bars from ouroboros boundaries.
101
+ If True, orphaned bars are included with ``is_orphan=True`` column.
102
+ Useful for analysis; filter with ``df[~df.get('is_orphan', False)]``.
103
+ materialize : bool, default=True
104
+ If True, return a single pd.DataFrame (legacy behavior).
105
+ If False, return an Iterator[pl.DataFrame] that yields batches
106
+ of bars for memory-efficient streaming (v8.0+).
107
+ batch_size : int, default=10_000
108
+ Number of bars per batch when materialize=False.
109
+ Each batch is ~500 KB. Only used in streaming mode.
110
+
111
+ source : str, default="binance"
112
+ Data source: "binance" or "exness"
113
+ market : str, default="spot"
114
+ Market type (Binance only):
115
+ - "spot": Spot market
116
+ - "futures-um" or "um": USD-M perpetual futures
117
+ - "futures-cm" or "cm": COIN-M perpetual futures
118
+ validation : str, default="strict"
119
+ Validation strictness (Exness only):
120
+ - "permissive": Basic checks (bid > 0, ask > 0, bid < ask)
121
+ - "strict": + Spread < 10% (catches obvious errors)
122
+ - "paranoid": + Spread < 1% (flags suspicious data)
123
+ include_incomplete : bool, default=False
124
+ Include the final incomplete bar (useful for analysis).
125
+ If False (default), only completed bars are returned.
126
+ include_microstructure : bool, default=False
127
+ Include market microstructure columns:
128
+ - buy_volume, sell_volume: Volume by aggressor side
129
+ - vwap: Volume-weighted average price
130
+ - trade_count: Number of trades in bar
131
+ - (Exness) spread_min, spread_max, spread_avg: Spread statistics
132
+ include_exchange_sessions : bool, default=False
133
+ Include traditional exchange market session flags (Issue #8).
134
+ When True, adds boolean columns indicating active sessions at bar close:
135
+ - exchange_session_sydney: ASX (10:00-16:00 Sydney time)
136
+ - exchange_session_tokyo: TSE (09:00-15:00 Tokyo time)
137
+ - exchange_session_london: LSE (08:00-17:00 London time)
138
+ - exchange_session_newyork: NYSE (10:00-16:00 New York time)
139
+ Useful for analyzing crypto/forex behavior during traditional market hours.
140
+ prevent_same_timestamp_close : bool, default=True
141
+ Timestamp gating for flash crash prevention (Issue #36).
142
+ If True (default): A bar cannot close on the same timestamp it opened.
143
+ This prevents flash crash scenarios from creating thousands of bars
144
+ at identical timestamps. If False: Legacy v8 behavior where bars can
145
+ close immediately on breach regardless of timestamp. Use False for
146
+ comparative analysis between old and new behavior.
147
+ verify_checksum : bool, default=True
148
+ Verify SHA-256 checksum of downloaded data (Issue #43).
149
+ If True (default): Verify downloaded ZIP files against Binance-provided
150
+ checksums to detect data corruption early. If verification fails,
151
+ raises RuntimeError. If False: Skip checksum verification for faster
152
+ downloads (use when data integrity is verified elsewhere).
153
+ use_cache : bool, default=True
154
+ Cache tick data locally in Parquet format.
155
+ fetch_if_missing : bool, default=True
156
+ If True (default), fetch tick data from source when not available
157
+ in cache. If False, return only cached data (may return empty
158
+ DataFrame if no cached data exists for the date range).
159
+ cache_dir : str or None, default=None
160
+ Custom cache directory. If None, uses platform default:
161
+ - macOS: ~/Library/Caches/rangebar/
162
+ - Linux: ~/.cache/rangebar/
163
+ - Windows: %LOCALAPPDATA%/terrylica/rangebar/Cache/
164
+ max_memory_mb : int or None, default=None
165
+ Memory budget in MB for tick data loading. If the estimated
166
+ in-memory size exceeds this limit, raises MemoryError. If None,
167
+ uses automatic detection (80% of available RAM). Set to 0 to
168
+ disable all memory guards.
169
+ inter_bar_lookback_count : int or None, default=None
170
+ Number of trades to keep in lookback buffer for inter-bar feature
171
+ computation (Issue #59). If set, enables 16 inter-bar features
172
+ computed from trades BEFORE each bar opens. Recommended: 100-500.
173
+ If None (default), inter-bar features are disabled.
174
+
175
+ Returns
176
+ -------
177
+ pd.DataFrame or Iterator[pl.DataFrame]
178
+ If materialize=True (default): Single pd.DataFrame ready for
179
+ backtesting.py, with DatetimeIndex and OHLCV columns.
180
+
181
+ If materialize=False: Iterator yielding pl.DataFrame batches
182
+ (batch_size bars each) for memory-efficient streaming. Convert
183
+ to pandas with: ``pl.concat(list(iterator)).to_pandas()``
184
+
185
+ Columns: Open, High, Low, Close, Volume
186
+ (if include_microstructure) Additional columns
187
+
188
+ Raises
189
+ ------
190
+ ValueError
191
+ - Invalid threshold (outside 1-100,000 range)
192
+ - Invalid dates or date format
193
+ - Unknown source, market, or validation level
194
+ - Unknown threshold preset name
195
+ RuntimeError
196
+ - Data fetching failed
197
+ - No data available for date range
198
+ - Feature not enabled (e.g., Exness without exness feature)
199
+
200
+ Examples
201
+ --------
202
+ Basic usage - Binance spot:
203
+
204
+ >>> from rangebar import get_range_bars
205
+ >>> df = get_range_bars("BTCUSDT", "2024-01-01", "2024-06-30")
206
+
207
+ Using threshold presets:
208
+
209
+ >>> df = get_range_bars("BTCUSDT", "2024-01-01", "2024-03-31", "tight")
210
+
211
+ Binance USD-M Futures:
212
+
213
+ >>> df = get_range_bars("BTCUSDT", "2024-01-01", "2024-03-31", market="futures-um")
214
+
215
+ Exness forex with spread monitoring:
216
+
217
+ >>> df = get_range_bars(
218
+ ... "EURUSD", "2024-01-01", "2024-01-31",
219
+ ... source="exness",
220
+ ... threshold_decimal_bps="standard",
221
+ ... include_microstructure=True, # includes spread stats
222
+ ... )
223
+
224
+ Include incomplete bar for analysis:
225
+
226
+ >>> df = get_range_bars(
227
+ ... "ETHUSDT", "2024-01-01", "2024-01-07",
228
+ ... include_incomplete=True,
229
+ ... )
230
+
231
+ Use with backtesting.py:
232
+
233
+ >>> from backtesting import Backtest, Strategy
234
+ >>> df = get_range_bars("BTCUSDT", "2024-01-01", "2024-12-31")
235
+ >>> bt = Backtest(df, MyStrategy, cash=10000, commission=0.0002)
236
+ >>> stats = bt.run()
237
+
238
+ Streaming mode for large datasets (v8.0+):
239
+
240
+ >>> import polars as pl
241
+ >>> # Memory-efficient: yields ~500 KB batches
242
+ >>> for batch in get_range_bars(
243
+ ... "BTCUSDT", "2024-01-01", "2024-06-30",
244
+ ... materialize=False,
245
+ ... batch_size=10_000,
246
+ ... ):
247
+ ... process_batch(batch) # batch is pl.DataFrame
248
+ ...
249
+ >>> # Or collect to single DataFrame:
250
+ >>> batches = list(get_range_bars(
251
+ ... "BTCUSDT", "2024-01-01", "2024-03-31",
252
+ ... materialize=False,
253
+ ... ))
254
+ >>> df = pl.concat(batches).to_pandas()
255
+
256
+ Notes
257
+ -----
258
+ Threshold units (decimal basis points):
259
+ The threshold is specified in decimal basis points (0.1bps) for precision.
260
+ Common conversions:
261
+ - 10 = 1bps = 0.01%
262
+ - 100 = 10bps = 0.1%
263
+ - 250 = 25bps = 0.25%
264
+ - 1000 = 100bps = 1%
265
+
266
+ Tier-1 symbols:
267
+ 18 high-liquidity symbols available on ALL Binance markets:
268
+ AAVE, ADA, AVAX, BCH, BNB, BTC, DOGE, ETH, FIL,
269
+ LINK, LTC, NEAR, SOL, SUI, UNI, WIF, WLD, XRP
270
+
271
+ Non-lookahead guarantee:
272
+ - Threshold computed from bar OPEN price only
273
+ - Breaching trade included in closing bar
274
+ - No future information used in bar construction
275
+
276
+ See Also
277
+ --------
278
+ TIER1_SYMBOLS : Tuple of high-liquidity symbols
279
+ THRESHOLD_PRESETS : Dictionary of named threshold values
280
+ """
281
+ from datetime import datetime
282
+ from pathlib import Path
283
+
284
+ from rangebar.storage.parquet import TickStorage
285
+
286
+ # -------------------------------------------------------------------------
287
+ # Resolve threshold (support presets)
288
+ # -------------------------------------------------------------------------
289
+ if isinstance(threshold_decimal_bps, str):
290
+ if threshold_decimal_bps not in THRESHOLD_PRESETS:
291
+ msg = (
292
+ f"Unknown threshold preset: {threshold_decimal_bps!r}. "
293
+ f"Valid presets: {list(THRESHOLD_PRESETS.keys())}"
294
+ )
295
+ raise ValueError(msg)
296
+ threshold_decimal_bps = THRESHOLD_PRESETS[threshold_decimal_bps]
297
+
298
+ if not THRESHOLD_DECIMAL_MIN <= threshold_decimal_bps <= THRESHOLD_DECIMAL_MAX:
299
+ msg = (
300
+ f"threshold_decimal_bps must be between {THRESHOLD_DECIMAL_MIN} and {THRESHOLD_DECIMAL_MAX}, "
301
+ f"got {threshold_decimal_bps}"
302
+ )
303
+ raise ValueError(msg)
304
+
305
+ # -------------------------------------------------------------------------
306
+ # Validate ouroboros mode (v11.0+)
307
+ # -------------------------------------------------------------------------
308
+ from rangebar.ouroboros import validate_ouroboros_mode
309
+
310
+ ouroboros = validate_ouroboros_mode(ouroboros)
311
+
312
+ # -------------------------------------------------------------------------
313
+ # Validate source and market
314
+ # -------------------------------------------------------------------------
315
+ source = source.lower()
316
+ if source not in ("binance", "exness"):
317
+ msg = f"Unknown source: {source!r}. Must be 'binance' or 'exness'"
318
+ raise ValueError(msg)
319
+
320
+ # Normalize market type
321
+ market_map = {
322
+ "spot": "spot",
323
+ "futures-um": "um",
324
+ "futures-cm": "cm",
325
+ "um": "um",
326
+ "cm": "cm",
327
+ }
328
+ market = market.lower()
329
+ if source == "binance" and market not in market_map:
330
+ msg = (
331
+ f"Unknown market: {market!r}. "
332
+ "Must be 'spot', 'futures-um'/'um', or 'futures-cm'/'cm'"
333
+ )
334
+ raise ValueError(msg)
335
+ market_normalized = market_map.get(market, market)
336
+
337
+ # Validate Exness validation strictness
338
+ validation = validation.lower()
339
+ if source == "exness" and validation not in ("permissive", "strict", "paranoid"):
340
+ msg = (
341
+ f"Unknown validation: {validation!r}. "
342
+ "Must be 'permissive', 'strict', or 'paranoid'"
343
+ )
344
+ raise ValueError(msg)
345
+
346
+ # -------------------------------------------------------------------------
347
+ # Parse and validate dates
348
+ # -------------------------------------------------------------------------
349
+ try:
350
+ start_dt = datetime.strptime(start_date, "%Y-%m-%d")
351
+ end_dt = datetime.strptime(end_date, "%Y-%m-%d")
352
+ except ValueError as e:
353
+ msg = f"Invalid date format. Use YYYY-MM-DD: {e}"
354
+ raise ValueError(msg) from e
355
+
356
+ if start_dt > end_dt:
357
+ msg = "start_date must be <= end_date"
358
+ raise ValueError(msg)
359
+
360
+ # Convert to milliseconds for cache lookup
361
+ start_ts = int(start_dt.timestamp() * 1000)
362
+ end_ts = int((end_dt.timestamp() + 86399) * 1000) # End of day
363
+
364
+ # -------------------------------------------------------------------------
365
+ # Streaming mode (v8.0+): Return generator instead of materializing
366
+ # -------------------------------------------------------------------------
367
+ if not materialize:
368
+ if source == "exness":
369
+ msg = (
370
+ "Streaming mode (materialize=False) is not yet supported for Exness. "
371
+ "Use materialize=True or use Binance source."
372
+ )
373
+ raise ValueError(msg)
374
+
375
+ # Binance streaming: yields batches directly from network
376
+ return _stream_range_bars_binance(
377
+ symbol=symbol,
378
+ start_date=start_date,
379
+ end_date=end_date,
380
+ threshold_decimal_bps=threshold_decimal_bps,
381
+ market=market_normalized,
382
+ batch_size=batch_size,
383
+ include_microstructure=include_microstructure,
384
+ include_incomplete=include_incomplete,
385
+ prevent_same_timestamp_close=prevent_same_timestamp_close,
386
+ verify_checksum=verify_checksum,
387
+ inter_bar_lookback_count=inter_bar_lookback_count,
388
+ )
389
+
390
+ # -------------------------------------------------------------------------
391
+ # Check ClickHouse bar cache first (Issue #21: fast path for precomputed bars)
392
+ # -------------------------------------------------------------------------
393
+ if use_cache:
394
+ try:
395
+ from rangebar.clickhouse import RangeBarCache
396
+
397
+ with RangeBarCache() as cache:
398
+ # Ouroboros mode filter ensures cache isolation (Plan: sparkling-coalescing-dijkstra.md)
399
+ cached_bars = cache.get_bars_by_timestamp_range(
400
+ symbol=symbol,
401
+ threshold_decimal_bps=threshold_decimal_bps,
402
+ start_ts=start_ts,
403
+ end_ts=end_ts,
404
+ include_microstructure=include_microstructure,
405
+ ouroboros_mode=ouroboros,
406
+ )
407
+ if cached_bars is not None and len(cached_bars) > 0:
408
+ # Tier 0 validation: Content-based staleness detection (Issue #39)
409
+ # This catches stale cached data from pre-v7.0 (e.g., VWAP=0)
410
+ if include_microstructure:
411
+ staleness = detect_staleness(
412
+ cached_bars, require_microstructure=True
413
+ )
414
+ if staleness.is_stale:
415
+ import logging
416
+
417
+ logger = logging.getLogger(__name__)
418
+ logger.warning(
419
+ "Stale cache data detected for %s: %s. "
420
+ "Falling through to recompute.",
421
+ symbol,
422
+ staleness.reason,
423
+ )
424
+ # Fall through to tick processing path
425
+ else:
426
+ # Fast path: return validated bars from ClickHouse (~50ms)
427
+ return cached_bars
428
+ else:
429
+ # Fast path: return precomputed bars from ClickHouse (~50ms)
430
+ return cached_bars
431
+ except ImportError:
432
+ # ClickHouse not available, fall through to tick processing
433
+ pass
434
+ except ConnectionError:
435
+ # ClickHouse connection failed, fall through to tick processing
436
+ pass
437
+
438
+ # -------------------------------------------------------------------------
439
+ # Initialize storage (Tier 1: local Parquet ticks)
440
+ # -------------------------------------------------------------------------
441
+ storage = TickStorage(cache_dir=Path(cache_dir) if cache_dir else None)
442
+
443
+ # Cache key includes source and market to avoid collisions
444
+ cache_symbol = f"{source}_{market_normalized}_{symbol}".upper()
445
+
446
+ # -------------------------------------------------------------------------
447
+ # Determine tick data source (cache or network)
448
+ # -------------------------------------------------------------------------
449
+ has_cached_ticks = use_cache and storage.has_ticks(cache_symbol, start_ts, end_ts)
450
+
451
+ if not has_cached_ticks and not fetch_if_missing:
452
+ return pd.DataFrame(columns=["Open", "High", "Low", "Close", "Volume"])
453
+
454
+ # For Exness, load all ticks upfront (smaller datasets)
455
+ if source == "exness":
456
+ if has_cached_ticks:
457
+ tick_data = storage.read_ticks(cache_symbol, start_ts, end_ts)
458
+ else:
459
+ tick_data = _fetch_exness(symbol, start_date, end_date, validation)
460
+ if use_cache and not tick_data.is_empty():
461
+ storage.write_ticks(cache_symbol, tick_data)
462
+ if tick_data.is_empty():
463
+ msg = f"No data available for {symbol} from {start_date} to {end_date}"
464
+ raise RuntimeError(msg)
465
+ return _process_exness_ticks(
466
+ tick_data,
467
+ symbol,
468
+ threshold_decimal_bps,
469
+ validation,
470
+ include_incomplete,
471
+ include_microstructure,
472
+ inter_bar_lookback_count=inter_bar_lookback_count,
473
+ )
474
+
475
+ # -------------------------------------------------------------------------
476
+ # MEM-010: Pre-flight memory estimation (Issue #49)
477
+ # Check if cached tick data would fit in memory before loading.
478
+ # -------------------------------------------------------------------------
479
+ if has_cached_ticks and max_memory_mb != 0:
480
+ import warnings
481
+
482
+ from rangebar.resource_guard import estimate_tick_memory
483
+
484
+ estimate = estimate_tick_memory(
485
+ storage, cache_symbol, start_ts, end_ts
486
+ )
487
+ if estimate.recommendation == "will_oom":
488
+ msg = (
489
+ f"Loading {symbol} ({start_date} -> {end_date}) would "
490
+ f"require ~{estimate.estimated_memory_mb} MB "
491
+ f"(available: {estimate.system_available_mb} MB). "
492
+ f"Use precompute_range_bars() for streaming processing."
493
+ )
494
+ if max_memory_mb is not None:
495
+ estimate.check_or_raise(max_mb=max_memory_mb)
496
+ else:
497
+ raise MemoryError(msg)
498
+ elif estimate.recommendation == "streaming_recommended":
499
+ warnings.warn(
500
+ f"Large tick dataset for {symbol} "
501
+ f"(~{estimate.estimated_memory_mb} MB). "
502
+ f"Consider precompute_range_bars() for memory-safe "
503
+ f"processing.",
504
+ ResourceWarning,
505
+ stacklevel=2,
506
+ )
507
+
508
+ # -------------------------------------------------------------------------
509
+ # Binance: Process with ouroboros segment iteration (Issue #51)
510
+ # Load ticks per-segment to avoid OOM on large date ranges.
511
+ # Each segment loads only the ticks within its boundaries (~1 year max).
512
+ # -------------------------------------------------------------------------
513
+ from rangebar.ouroboros import iter_ouroboros_segments
514
+
515
+ all_bars: list[pd.DataFrame] = []
516
+ processor: RangeBarProcessor | None = None
517
+ any_data_found = False
518
+
519
+ for segment_start, segment_end, boundary in iter_ouroboros_segments(
520
+ start_dt.date(), end_dt.date(), ouroboros
521
+ ):
522
+ # Reset processor at ouroboros boundary
523
+ if boundary is not None and processor is not None:
524
+ orphaned_bar = processor.reset_at_ouroboros()
525
+ if include_orphaned_bars and orphaned_bar is not None:
526
+ # Add orphan metadata
527
+ orphaned_bar["is_orphan"] = True
528
+ orphaned_bar["ouroboros_boundary"] = boundary.timestamp
529
+ orphaned_bar["ouroboros_reason"] = boundary.reason
530
+ orphan_df = pd.DataFrame([orphaned_bar])
531
+ # Convert timestamp to datetime index
532
+ if "timestamp" in orphan_df.columns:
533
+ orphan_df["timestamp"] = pd.to_datetime(
534
+ orphan_df["timestamp"], unit="us", utc=True
535
+ )
536
+ orphan_df = orphan_df.set_index("timestamp")
537
+ all_bars.append(orphan_df)
538
+
539
+ # Load tick data scoped to this segment (not the full range)
540
+ segment_start_ms = int(segment_start.timestamp() * 1_000)
541
+ segment_end_ms = int(segment_end.timestamp() * 1_000)
542
+
543
+ if has_cached_ticks:
544
+ segment_ticks = storage.read_ticks(
545
+ cache_symbol, segment_start_ms, segment_end_ms
546
+ )
547
+ else:
548
+ # Fetch from network for this segment only
549
+ seg_start_str = segment_start.strftime("%Y-%m-%d")
550
+ seg_end_str = segment_end.strftime("%Y-%m-%d")
551
+ segment_ticks = _fetch_binance(
552
+ symbol, seg_start_str, seg_end_str, market_normalized
553
+ )
554
+ # Cache segment ticks
555
+ if use_cache and not segment_ticks.is_empty():
556
+ storage.write_ticks(cache_symbol, segment_ticks)
557
+
558
+ if segment_ticks.is_empty():
559
+ continue
560
+
561
+ any_data_found = True
562
+
563
+ # Process segment (reuse processor for state continuity within segment)
564
+ segment_bars, processor = _process_binance_trades(
565
+ segment_ticks,
566
+ threshold_decimal_bps,
567
+ include_incomplete,
568
+ include_microstructure,
569
+ processor=processor,
570
+ symbol=symbol,
571
+ prevent_same_timestamp_close=prevent_same_timestamp_close,
572
+ inter_bar_lookback_count=inter_bar_lookback_count,
573
+ )
574
+
575
+ if segment_bars is not None and not segment_bars.empty:
576
+ all_bars.append(segment_bars)
577
+
578
+ if not any_data_found:
579
+ msg = f"No data available for {symbol} from {start_date} to {end_date}"
580
+ raise RuntimeError(msg)
581
+
582
+ # Concatenate all segments
583
+ if not all_bars:
584
+ bars_df = pd.DataFrame(columns=["Open", "High", "Low", "Close", "Volume"])
585
+ elif len(all_bars) == 1:
586
+ bars_df = all_bars[0]
587
+ else:
588
+ bars_df = pd.concat(all_bars, axis=0)
589
+ bars_df = bars_df.sort_index()
590
+
591
+ # -------------------------------------------------------------------------
592
+ # Add exchange session flags (Issue #8)
593
+ # -------------------------------------------------------------------------
594
+ # Session flags indicate which traditional market sessions were active
595
+ # at bar close time. Useful for analyzing crypto/forex behavior.
596
+ if include_exchange_sessions and not bars_df.empty:
597
+ import warnings
598
+
599
+ from rangebar.ouroboros import get_active_exchange_sessions
600
+
601
+ # Compute session flags for each bar based on close timestamp (index)
602
+ session_data = {
603
+ "exchange_session_sydney": [],
604
+ "exchange_session_tokyo": [],
605
+ "exchange_session_london": [],
606
+ "exchange_session_newyork": [],
607
+ }
608
+ for ts in bars_df.index:
609
+ # Ensure timezone-aware UTC timestamp
610
+ if ts.tzinfo is None:
611
+ ts_utc = ts.tz_localize("UTC")
612
+ else:
613
+ ts_utc = ts.tz_convert("UTC")
614
+ # Suppress nanosecond warning - session detection is hour-granularity
615
+ with warnings.catch_warnings():
616
+ warnings.filterwarnings("ignore", "Discarding nonzero nanoseconds")
617
+ flags = get_active_exchange_sessions(ts_utc.to_pydatetime())
618
+ session_data["exchange_session_sydney"].append(flags.sydney)
619
+ session_data["exchange_session_tokyo"].append(flags.tokyo)
620
+ session_data["exchange_session_london"].append(flags.london)
621
+ session_data["exchange_session_newyork"].append(flags.newyork)
622
+
623
+ # Add columns to DataFrame
624
+ for col, values in session_data.items():
625
+ bars_df[col] = values
626
+
627
+ # -------------------------------------------------------------------------
628
+ # Write computed bars to ClickHouse cache (Issue #37)
629
+ # -------------------------------------------------------------------------
630
+ # Cache write is non-blocking: failures don't affect the return value.
631
+ # The computation succeeded, so we return bars even if caching fails.
632
+ if use_cache and bars_df is not None and not bars_df.empty:
633
+ try:
634
+ from rangebar.clickhouse import RangeBarCache
635
+ from rangebar.exceptions import CacheError
636
+
637
+ with RangeBarCache() as cache:
638
+ # Use store_bars_bulk for bars computed without exact CacheKey
639
+ # Ouroboros mode determines cache key (Plan: sparkling-coalescing-dijkstra.md)
640
+ written = cache.store_bars_bulk(
641
+ symbol=symbol,
642
+ threshold_decimal_bps=threshold_decimal_bps,
643
+ bars=bars_df,
644
+ version="", # Version tracked elsewhere
645
+ ouroboros_mode=ouroboros,
646
+ )
647
+ import logging
648
+
649
+ logger = logging.getLogger(__name__)
650
+ logger.info(
651
+ "Cached %d bars for %s @ %d dbps",
652
+ written,
653
+ symbol,
654
+ threshold_decimal_bps,
655
+ )
656
+ except ImportError:
657
+ # ClickHouse not available - skip caching
658
+ pass
659
+ except ConnectionError:
660
+ # ClickHouse connection failed - skip caching
661
+ pass
662
+ except (CacheError, OSError, RuntimeError) as e:
663
+ # Log but don't fail - cache is optimization layer
664
+ # CacheError: All cache-specific errors
665
+ # OSError: Network/disk errors
666
+ # RuntimeError: ClickHouse driver errors
667
+ import logging
668
+
669
+ logger = logging.getLogger(__name__)
670
+ logger.warning("Cache write failed (non-fatal): %s", e)
671
+
672
+ return bars_df
673
+
674
+
675
+ def get_range_bars_pandas(
676
+ symbol: str,
677
+ start_date: str,
678
+ end_date: str,
679
+ threshold_decimal_bps: int | str = 250,
680
+ **kwargs: Any,
681
+ ) -> pd.DataFrame:
682
+ """Get range bars as pandas DataFrame (deprecated compatibility shim).
683
+
684
+ .. deprecated:: 8.0
685
+ Use ``get_range_bars(materialize=True)`` directly instead.
686
+ This function will be removed in v9.0.
687
+
688
+ This function exists for backward compatibility with code written before
689
+ the streaming API was introduced. It simply calls ``get_range_bars()``
690
+ with ``materialize=True`` and returns the result.
691
+
692
+ Parameters
693
+ ----------
694
+ symbol : str
695
+ Trading symbol (e.g., "BTCUSDT")
696
+ start_date : str
697
+ Start date in YYYY-MM-DD format
698
+ end_date : str
699
+ End date in YYYY-MM-DD format
700
+ threshold_decimal_bps : int or str, default=250
701
+ Threshold in decimal basis points
702
+ **kwargs
703
+ Additional arguments passed to ``get_range_bars()``
704
+
705
+ Returns
706
+ -------
707
+ pd.DataFrame
708
+ OHLCV DataFrame ready for backtesting.py
709
+
710
+ Examples
711
+ --------
712
+ Instead of:
713
+
714
+ >>> df = get_range_bars_pandas("BTCUSDT", "2024-01-01", "2024-06-30")
715
+
716
+ Use:
717
+
718
+ >>> df = get_range_bars("BTCUSDT", "2024-01-01", "2024-06-30", materialize=True)
719
+ """
720
+ import warnings
721
+
722
+ warnings.warn(
723
+ "get_range_bars_pandas() is deprecated. "
724
+ "Use get_range_bars(materialize=True) instead. "
725
+ "This function will be removed in v9.0.",
726
+ DeprecationWarning,
727
+ stacklevel=2,
728
+ )
729
+ return get_range_bars(
730
+ symbol,
731
+ start_date,
732
+ end_date,
733
+ threshold_decimal_bps,
734
+ materialize=True,
735
+ **kwargs,
736
+ )