rangebar 11.6.1__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. rangebar/CLAUDE.md +327 -0
  2. rangebar/__init__.py +227 -0
  3. rangebar/__init__.pyi +1089 -0
  4. rangebar/_core.cpython-313-darwin.so +0 -0
  5. rangebar/checkpoint.py +472 -0
  6. rangebar/cli.py +298 -0
  7. rangebar/clickhouse/CLAUDE.md +139 -0
  8. rangebar/clickhouse/__init__.py +100 -0
  9. rangebar/clickhouse/bulk_operations.py +309 -0
  10. rangebar/clickhouse/cache.py +734 -0
  11. rangebar/clickhouse/client.py +121 -0
  12. rangebar/clickhouse/config.py +141 -0
  13. rangebar/clickhouse/mixin.py +120 -0
  14. rangebar/clickhouse/preflight.py +504 -0
  15. rangebar/clickhouse/query_operations.py +345 -0
  16. rangebar/clickhouse/schema.sql +187 -0
  17. rangebar/clickhouse/tunnel.py +222 -0
  18. rangebar/constants.py +288 -0
  19. rangebar/conversion.py +177 -0
  20. rangebar/exceptions.py +207 -0
  21. rangebar/exness.py +364 -0
  22. rangebar/hooks.py +311 -0
  23. rangebar/logging.py +171 -0
  24. rangebar/notify/__init__.py +15 -0
  25. rangebar/notify/pushover.py +155 -0
  26. rangebar/notify/telegram.py +271 -0
  27. rangebar/orchestration/__init__.py +20 -0
  28. rangebar/orchestration/count_bounded.py +797 -0
  29. rangebar/orchestration/helpers.py +412 -0
  30. rangebar/orchestration/models.py +76 -0
  31. rangebar/orchestration/precompute.py +498 -0
  32. rangebar/orchestration/range_bars.py +736 -0
  33. rangebar/orchestration/tick_fetcher.py +226 -0
  34. rangebar/ouroboros.py +454 -0
  35. rangebar/processors/__init__.py +22 -0
  36. rangebar/processors/api.py +383 -0
  37. rangebar/processors/core.py +522 -0
  38. rangebar/resource_guard.py +567 -0
  39. rangebar/storage/__init__.py +22 -0
  40. rangebar/storage/checksum_registry.py +218 -0
  41. rangebar/storage/parquet.py +728 -0
  42. rangebar/streaming.py +300 -0
  43. rangebar/validation/__init__.py +69 -0
  44. rangebar/validation/cache_staleness.py +277 -0
  45. rangebar/validation/continuity.py +664 -0
  46. rangebar/validation/gap_classification.py +294 -0
  47. rangebar/validation/post_storage.py +317 -0
  48. rangebar/validation/tier1.py +175 -0
  49. rangebar/validation/tier2.py +261 -0
  50. rangebar-11.6.1.dist-info/METADATA +308 -0
  51. rangebar-11.6.1.dist-info/RECORD +54 -0
  52. rangebar-11.6.1.dist-info/WHEEL +4 -0
  53. rangebar-11.6.1.dist-info/entry_points.txt +2 -0
  54. rangebar-11.6.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,412 @@
1
+ # polars-exception: backtesting.py requires Pandas DataFrames with DatetimeIndex
2
+ # Issue #46: Modularization M4 - Extract shared helpers from __init__.py
3
+ """Shared helper functions for orchestration modules.
4
+
5
+ These internal functions handle data fetching, trade processing,
6
+ and streaming for both Binance and Exness data sources.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from collections.abc import Iterator
12
+ from typing import TYPE_CHECKING
13
+
14
+ import pandas as pd
15
+
16
+ from rangebar.processors.core import RangeBarProcessor
17
+
18
+ if TYPE_CHECKING:
19
+ import polars as pl
20
+
21
+
22
+ def _stream_range_bars_binance(
23
+ symbol: str,
24
+ start_date: str,
25
+ end_date: str,
26
+ threshold_decimal_bps: int,
27
+ market: str,
28
+ batch_size: int = 10_000,
29
+ include_microstructure: bool = False,
30
+ include_incomplete: bool = False,
31
+ prevent_same_timestamp_close: bool = True,
32
+ verify_checksum: bool = True,
33
+ inter_bar_lookback_count: int | None = None,
34
+ ) -> Iterator[pl.DataFrame]:
35
+ """Stream range bars in batches using memory-efficient chunked processing.
36
+
37
+ This is the internal generator for Phase 4 streaming API. It:
38
+ 1. Streams trades in 6-hour chunks from Binance via stream_binance_trades()
39
+ 2. Processes each chunk to bars via process_trades_streaming_arrow()
40
+ 3. Yields batches of bars as Polars DataFrames
41
+
42
+ Parameters
43
+ ----------
44
+ symbol : str
45
+ Trading symbol (e.g., "BTCUSDT")
46
+ start_date : str
47
+ Start date "YYYY-MM-DD"
48
+ end_date : str
49
+ End date "YYYY-MM-DD"
50
+ threshold_decimal_bps : int
51
+ Range bar threshold (250 = 0.25%)
52
+ market : str
53
+ Normalized market type: "spot", "um", or "cm"
54
+ batch_size : int, default=10_000
55
+ Number of bars per yielded DataFrame (~500 KB each)
56
+ include_microstructure : bool, default=False
57
+ Include microstructure columns in output
58
+ include_incomplete : bool, default=False
59
+ Include the final incomplete bar
60
+ prevent_same_timestamp_close : bool, default=True
61
+ Timestamp gating for flash crash prevention
62
+ verify_checksum : bool, default=True
63
+ Verify SHA-256 checksum of downloaded data
64
+ inter_bar_lookback_count : int, optional
65
+ Lookback trade count for inter-bar features (Issue #59)
66
+
67
+ Yields
68
+ ------
69
+ pl.DataFrame
70
+ Batches of range bars (OHLCV format, backtesting.py compatible)
71
+
72
+ Memory Usage
73
+ ------------
74
+ Peak: ~50 MB (6-hour trade chunk + bar buffer)
75
+ Per yield: ~500 KB (10,000 bars)
76
+ """
77
+ import polars as pl
78
+
79
+ from rangebar.conversion import _bars_list_to_polars
80
+
81
+ try:
82
+ from rangebar._core import MarketType, stream_binance_trades
83
+ except ImportError as e:
84
+ msg = (
85
+ "Streaming requires the 'data-providers' feature. "
86
+ "Rebuild with: maturin develop --features data-providers"
87
+ )
88
+ raise RuntimeError(msg) from e
89
+
90
+ # Map market string to enum
91
+ market_enum = {
92
+ "spot": MarketType.Spot,
93
+ "um": MarketType.FuturesUM,
94
+ "cm": MarketType.FuturesCM,
95
+ }[market]
96
+
97
+ # Create processor with symbol for checkpoint support
98
+ processor = RangeBarProcessor(
99
+ threshold_decimal_bps,
100
+ symbol=symbol,
101
+ prevent_same_timestamp_close=prevent_same_timestamp_close,
102
+ inter_bar_lookback_count=inter_bar_lookback_count,
103
+ )
104
+ bar_buffer: list[dict] = []
105
+
106
+ # Stream trades in 6-hour chunks
107
+ for trade_batch in stream_binance_trades(
108
+ symbol,
109
+ start_date,
110
+ end_date,
111
+ chunk_hours=6,
112
+ market_type=market_enum,
113
+ verify_checksum=verify_checksum,
114
+ ):
115
+ # Process to bars via Arrow (zero-copy to Polars)
116
+ arrow_batch = processor.process_trades_streaming_arrow(trade_batch)
117
+ bars_df = pl.from_arrow(arrow_batch)
118
+
119
+ if not bars_df.is_empty():
120
+ # Add to buffer
121
+ bar_buffer.extend(bars_df.to_dicts())
122
+
123
+ # Yield when buffer reaches batch_size
124
+ while len(bar_buffer) >= batch_size:
125
+ batch = bar_buffer[:batch_size]
126
+ bar_buffer = bar_buffer[batch_size:]
127
+ yield _bars_list_to_polars(batch, include_microstructure)
128
+
129
+ # Handle incomplete bar at end
130
+ if include_incomplete:
131
+ incomplete = processor.get_incomplete_bar()
132
+ if incomplete:
133
+ bar_buffer.append(incomplete)
134
+
135
+ # Yield remaining bars
136
+ if bar_buffer:
137
+ yield _bars_list_to_polars(bar_buffer, include_microstructure)
138
+
139
+
140
+ def _fetch_binance(
141
+ symbol: str,
142
+ start_date: str,
143
+ end_date: str,
144
+ market: str,
145
+ ) -> pl.DataFrame:
146
+ """Fetch Binance aggTrades data (internal).
147
+
148
+ DEPRECATED: Use stream_binance_trades() for memory-efficient streaming.
149
+ This function loads all trades into memory at once.
150
+ """
151
+ import warnings
152
+ from datetime import datetime
153
+
154
+ import polars as pl
155
+
156
+ # MEM-007: Guard deprecated batch path with date range limit (Issue #49)
157
+ # This function loads ALL trades into a single DataFrame. For high-volume
158
+ # symbols (BTCUSDT), a single month can be ~6GB. Limit to 30 days.
159
+ max_days = 30
160
+ days = (
161
+ datetime.strptime(end_date, "%Y-%m-%d")
162
+ - datetime.strptime(start_date, "%Y-%m-%d")
163
+ ).days
164
+ if days > max_days:
165
+ msg = (
166
+ f"_fetch_binance() cannot safely load {days} days of data. "
167
+ f"This deprecated path loads all trades into memory at once "
168
+ f"(limit: {max_days} days). Use precompute_range_bars() or "
169
+ f"get_range_bars() with per-segment loading instead."
170
+ )
171
+ raise MemoryError(msg)
172
+
173
+ warnings.warn(
174
+ "_fetch_binance() is deprecated. Use stream_binance_trades() for "
175
+ "memory-efficient streaming. This function will be removed in v9.0.",
176
+ DeprecationWarning,
177
+ stacklevel=2,
178
+ )
179
+
180
+ try:
181
+ from rangebar._core import MarketType, fetch_binance_aggtrades
182
+
183
+ market_enum = {
184
+ "spot": MarketType.Spot,
185
+ "um": MarketType.FuturesUM,
186
+ "cm": MarketType.FuturesCM,
187
+ }[market]
188
+
189
+ trades_list = fetch_binance_aggtrades(symbol, start_date, end_date, market_enum)
190
+ return pl.DataFrame(trades_list)
191
+
192
+ except ImportError as e:
193
+ msg = (
194
+ "Binance data fetching requires the 'data-providers' feature. "
195
+ "Rebuild with: maturin develop --features data-providers"
196
+ )
197
+ raise RuntimeError(msg) from e
198
+
199
+
200
+ def _fetch_exness(
201
+ symbol: str,
202
+ start_date: str,
203
+ end_date: str,
204
+ validation: str,
205
+ ) -> pl.DataFrame:
206
+ """Fetch Exness tick data (internal)."""
207
+ try:
208
+ from rangebar.exness import fetch_exness_ticks
209
+
210
+ return fetch_exness_ticks(symbol, start_date, end_date)
211
+
212
+ except ImportError as e:
213
+ msg = (
214
+ "Exness data fetching requires the 'exness' feature. "
215
+ "Rebuild with: maturin develop --features data-providers"
216
+ )
217
+ raise RuntimeError(msg) from e
218
+
219
+
220
+ def _process_binance_trades(
221
+ trades: pl.DataFrame,
222
+ threshold_decimal_bps: int,
223
+ include_incomplete: bool,
224
+ include_microstructure: bool,
225
+ *,
226
+ processor: RangeBarProcessor | None = None,
227
+ symbol: str | None = None,
228
+ prevent_same_timestamp_close: bool = True,
229
+ inter_bar_lookback_count: int | None = None,
230
+ ) -> tuple[pd.DataFrame, RangeBarProcessor]:
231
+ """Process Binance trades to range bars (internal).
232
+
233
+ Parameters
234
+ ----------
235
+ trades : pl.DataFrame
236
+ Polars DataFrame with tick data
237
+ threshold_decimal_bps : int
238
+ Threshold in decimal basis points
239
+ include_incomplete : bool
240
+ Include incomplete bar (not yet implemented)
241
+ include_microstructure : bool
242
+ Include microstructure columns
243
+ processor : RangeBarProcessor, optional
244
+ Existing processor with state (for cross-file continuity).
245
+ If None, creates a new processor.
246
+ symbol : str, optional
247
+ Symbol for checkpoint creation
248
+ prevent_same_timestamp_close : bool, default=True
249
+ Timestamp gating for flash crash prevention
250
+ inter_bar_lookback_count : int, optional
251
+ Lookback trade count for inter-bar features (Issue #59)
252
+
253
+ Returns
254
+ -------
255
+ tuple[pd.DataFrame, RangeBarProcessor]
256
+ (bars DataFrame, processor with updated state)
257
+ The processor can be used to create a checkpoint for the next file.
258
+ """
259
+ import polars as pl
260
+
261
+ # MEM-003: Apply column selection BEFORE collecting LazyFrame
262
+ # This enables predicate pushdown and avoids materializing unused columns
263
+ # Memory impact: 10-100x reduction depending on filter selectivity
264
+
265
+ # Determine volume column name (works for both DataFrame and LazyFrame)
266
+ if isinstance(trades, pl.LazyFrame):
267
+ available_cols = trades.collect_schema().names()
268
+ else:
269
+ available_cols = trades.columns
270
+
271
+ volume_col = "quantity" if "quantity" in available_cols else "volume"
272
+
273
+ # Build column list - include is_buyer_maker for microstructure features (Issue #30)
274
+ columns = [
275
+ pl.col("timestamp"),
276
+ pl.col("price"),
277
+ pl.col(volume_col).alias("quantity"),
278
+ ]
279
+ if "is_buyer_maker" in available_cols:
280
+ columns.append(pl.col("is_buyer_maker"))
281
+
282
+ # Apply selection (predicates pushed down for LazyFrame)
283
+ trades_selected = trades.select(columns)
284
+
285
+ # Collect AFTER selection (for LazyFrame)
286
+ if isinstance(trades_selected, pl.LazyFrame):
287
+ trades_minimal = trades_selected.collect()
288
+ else:
289
+ trades_minimal = trades_selected
290
+
291
+ # Use provided processor or create new one
292
+ if processor is None:
293
+ processor = RangeBarProcessor(
294
+ threshold_decimal_bps,
295
+ symbol=symbol,
296
+ prevent_same_timestamp_close=prevent_same_timestamp_close,
297
+ inter_bar_lookback_count=inter_bar_lookback_count,
298
+ )
299
+
300
+ # MEM-002: Process in chunks to bound memory (2.5 GB → ~50 MB per chunk)
301
+ # Chunked .to_dicts() avoids materializing 1M+ trade dicts at once
302
+ chunk_size = 100_000
303
+ all_bars: list[dict] = []
304
+
305
+ n_rows = len(trades_minimal)
306
+ for start in range(0, n_rows, chunk_size):
307
+ chunk = trades_minimal.slice(start, chunk_size).to_dicts()
308
+ bars = processor.process_trades_streaming(chunk)
309
+ all_bars.extend(bars)
310
+
311
+ bars = all_bars
312
+
313
+ if not bars:
314
+ empty_df = pd.DataFrame(
315
+ columns=["Open", "High", "Low", "Close", "Volume"]
316
+ ).set_index(pd.DatetimeIndex([]))
317
+ return empty_df, processor
318
+
319
+ # Build DataFrame with all fields
320
+ result = pd.DataFrame(bars)
321
+ result["timestamp"] = pd.to_datetime(result["timestamp"], format="ISO8601")
322
+ result = result.set_index("timestamp")
323
+
324
+ # Rename OHLCV columns to backtesting.py format
325
+ result = result.rename(
326
+ columns={
327
+ "open": "Open",
328
+ "high": "High",
329
+ "low": "Low",
330
+ "close": "Close",
331
+ "volume": "Volume",
332
+ }
333
+ )
334
+
335
+ if include_microstructure:
336
+ # Return all columns including microstructure
337
+ return result, processor
338
+
339
+ # Return only OHLCV columns (backtesting.py compatible)
340
+ return result[["Open", "High", "Low", "Close", "Volume"]], processor
341
+
342
+
343
+ def _process_exness_ticks(
344
+ ticks: pl.DataFrame,
345
+ symbol: str,
346
+ threshold_decimal_bps: int,
347
+ validation: str,
348
+ include_incomplete: bool,
349
+ include_microstructure: bool,
350
+ *,
351
+ inter_bar_lookback_count: int | None = None,
352
+ ) -> pd.DataFrame:
353
+ """Process Exness ticks to range bars (internal).
354
+
355
+ Note: inter_bar_lookback_count is accepted but not yet implemented for Exness.
356
+ TODO(Issue #59): Add inter-bar feature support for Exness source.
357
+ """
358
+ _ = inter_bar_lookback_count # Unused for now, Exness uses separate processing path
359
+ try:
360
+ # Map validation string to enum
361
+ from rangebar._core import ValidationStrictness
362
+ from rangebar.exness import process_exness_ticks_to_dataframe
363
+
364
+ validation_enum = {
365
+ "permissive": ValidationStrictness.Permissive,
366
+ "strict": ValidationStrictness.Strict,
367
+ "paranoid": ValidationStrictness.Paranoid,
368
+ }[validation]
369
+
370
+ # Get instrument enum
371
+ from rangebar._core import ExnessInstrument
372
+
373
+ instrument_map = {
374
+ "EURUSD": ExnessInstrument.EURUSD,
375
+ "GBPUSD": ExnessInstrument.GBPUSD,
376
+ "USDJPY": ExnessInstrument.USDJPY,
377
+ "AUDUSD": ExnessInstrument.AUDUSD,
378
+ "USDCAD": ExnessInstrument.USDCAD,
379
+ "NZDUSD": ExnessInstrument.NZDUSD,
380
+ "EURGBP": ExnessInstrument.EURGBP,
381
+ "EURJPY": ExnessInstrument.EURJPY,
382
+ "GBPJPY": ExnessInstrument.GBPJPY,
383
+ "XAUUSD": ExnessInstrument.XAUUSD,
384
+ }
385
+
386
+ if symbol.upper() not in instrument_map:
387
+ msg = (
388
+ f"Unknown Exness instrument: {symbol}. "
389
+ f"Valid instruments: {list(instrument_map.keys())}"
390
+ )
391
+ raise ValueError(msg)
392
+
393
+ instrument = instrument_map[symbol.upper()]
394
+
395
+ df = process_exness_ticks_to_dataframe(
396
+ ticks.to_pandas(),
397
+ instrument,
398
+ threshold_decimal_bps,
399
+ validation_enum,
400
+ )
401
+
402
+ if not include_microstructure:
403
+ return df[["Open", "High", "Low", "Close", "Volume"]]
404
+
405
+ return df
406
+
407
+ except ImportError as e:
408
+ msg = (
409
+ "Exness processing requires the 'exness' feature. "
410
+ "Rebuild with: maturin develop --features data-providers"
411
+ )
412
+ raise RuntimeError(msg) from e
@@ -0,0 +1,76 @@
1
+ # Issue #46: Modularization M4 - Extract data classes from __init__.py
2
+ """Data classes for orchestration results and progress tracking."""
3
+
4
+ from __future__ import annotations
5
+
6
+ from dataclasses import dataclass
7
+ from typing import Literal
8
+
9
+
10
+ @dataclass
11
+ class PrecomputeProgress:
12
+ """Progress update for precomputation.
13
+
14
+ Attributes
15
+ ----------
16
+ phase : Literal["fetching", "processing", "caching"]
17
+ Current phase of precomputation
18
+ current_month : str
19
+ Current month being processed ("YYYY-MM" format)
20
+ months_completed : int
21
+ Number of months completed
22
+ months_total : int
23
+ Total number of months to process
24
+ bars_generated : int
25
+ Total bars generated so far
26
+ ticks_processed : int
27
+ Total ticks processed so far
28
+ elapsed_seconds : float
29
+ Elapsed time since precomputation started
30
+ """
31
+
32
+ phase: Literal["fetching", "processing", "caching"]
33
+ current_month: str
34
+ months_completed: int
35
+ months_total: int
36
+ bars_generated: int
37
+ ticks_processed: int
38
+ elapsed_seconds: float
39
+
40
+
41
+ @dataclass
42
+ class PrecomputeResult:
43
+ """Result of precomputation.
44
+
45
+ Attributes
46
+ ----------
47
+ symbol : str
48
+ Trading symbol (e.g., "BTCUSDT")
49
+ threshold_decimal_bps : int
50
+ Threshold used for bar construction
51
+ start_date : str
52
+ Start date of precomputation ("YYYY-MM-DD")
53
+ end_date : str
54
+ End date of precomputation ("YYYY-MM-DD")
55
+ total_bars : int
56
+ Total number of bars generated
57
+ total_ticks : int
58
+ Total number of ticks processed
59
+ elapsed_seconds : float
60
+ Total elapsed time for precomputation
61
+ continuity_valid : bool | None
62
+ True if all bars pass continuity validation, False if not,
63
+ None if validation was skipped
64
+ cache_key : str
65
+ Cache key for the generated bars
66
+ """
67
+
68
+ symbol: str
69
+ threshold_decimal_bps: int
70
+ start_date: str
71
+ end_date: str
72
+ total_bars: int
73
+ total_ticks: int
74
+ elapsed_seconds: float
75
+ continuity_valid: bool | None
76
+ cache_key: str