rangebar 11.6.1__cp313-cp313-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. rangebar/CLAUDE.md +327 -0
  2. rangebar/__init__.py +227 -0
  3. rangebar/__init__.pyi +1089 -0
  4. rangebar/_core.cpython-313-darwin.so +0 -0
  5. rangebar/checkpoint.py +472 -0
  6. rangebar/cli.py +298 -0
  7. rangebar/clickhouse/CLAUDE.md +139 -0
  8. rangebar/clickhouse/__init__.py +100 -0
  9. rangebar/clickhouse/bulk_operations.py +309 -0
  10. rangebar/clickhouse/cache.py +734 -0
  11. rangebar/clickhouse/client.py +121 -0
  12. rangebar/clickhouse/config.py +141 -0
  13. rangebar/clickhouse/mixin.py +120 -0
  14. rangebar/clickhouse/preflight.py +504 -0
  15. rangebar/clickhouse/query_operations.py +345 -0
  16. rangebar/clickhouse/schema.sql +187 -0
  17. rangebar/clickhouse/tunnel.py +222 -0
  18. rangebar/constants.py +288 -0
  19. rangebar/conversion.py +177 -0
  20. rangebar/exceptions.py +207 -0
  21. rangebar/exness.py +364 -0
  22. rangebar/hooks.py +311 -0
  23. rangebar/logging.py +171 -0
  24. rangebar/notify/__init__.py +15 -0
  25. rangebar/notify/pushover.py +155 -0
  26. rangebar/notify/telegram.py +271 -0
  27. rangebar/orchestration/__init__.py +20 -0
  28. rangebar/orchestration/count_bounded.py +797 -0
  29. rangebar/orchestration/helpers.py +412 -0
  30. rangebar/orchestration/models.py +76 -0
  31. rangebar/orchestration/precompute.py +498 -0
  32. rangebar/orchestration/range_bars.py +736 -0
  33. rangebar/orchestration/tick_fetcher.py +226 -0
  34. rangebar/ouroboros.py +454 -0
  35. rangebar/processors/__init__.py +22 -0
  36. rangebar/processors/api.py +383 -0
  37. rangebar/processors/core.py +522 -0
  38. rangebar/resource_guard.py +567 -0
  39. rangebar/storage/__init__.py +22 -0
  40. rangebar/storage/checksum_registry.py +218 -0
  41. rangebar/storage/parquet.py +728 -0
  42. rangebar/streaming.py +300 -0
  43. rangebar/validation/__init__.py +69 -0
  44. rangebar/validation/cache_staleness.py +277 -0
  45. rangebar/validation/continuity.py +664 -0
  46. rangebar/validation/gap_classification.py +294 -0
  47. rangebar/validation/post_storage.py +317 -0
  48. rangebar/validation/tier1.py +175 -0
  49. rangebar/validation/tier2.py +261 -0
  50. rangebar-11.6.1.dist-info/METADATA +308 -0
  51. rangebar-11.6.1.dist-info/RECORD +54 -0
  52. rangebar-11.6.1.dist-info/WHEEL +4 -0
  53. rangebar-11.6.1.dist-info/entry_points.txt +2 -0
  54. rangebar-11.6.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,522 @@
1
+ # polars-exception: backtesting.py requires Pandas DataFrames with DatetimeIndex
2
+ # Issue #46: Modularization M2 - Extract RangeBarProcessor from __init__.py
3
+ """RangeBarProcessor: Core processor for converting tick data to range bars.
4
+
5
+ This module contains the RangeBarProcessor class, which wraps the Rust-based
6
+ PyRangeBarProcessor to provide a Pythonic interface for range bar construction.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from typing import TYPE_CHECKING
12
+
13
+ import pandas as pd
14
+
15
+ from rangebar._core import PositionVerification
16
+ from rangebar._core import PyRangeBarProcessor as _PyRangeBarProcessor
17
+
18
+ if TYPE_CHECKING:
19
+ from arro3.core import RecordBatch as PyRecordBatch
20
+
21
+
22
+ class RangeBarProcessor:
23
+ """Process tick-level trade data into range bars.
24
+
25
+ Range bars close when price moves ±threshold from the bar's opening price,
26
+ providing market-adaptive time intervals that eliminate arbitrary time-based
27
+ artifacts.
28
+
29
+ Parameters
30
+ ----------
31
+ threshold_decimal_bps : int
32
+ Threshold in decimal basis points.
33
+ Examples: 250 = 25bps = 0.25%, 100 = 10bps = 0.1%
34
+ Valid range: [1, 100_000] (0.001% to 100%)
35
+ symbol : str, optional
36
+ Trading symbol (e.g., "BTCUSDT"). Required for checkpoint creation.
37
+ prevent_same_timestamp_close : bool, default=True
38
+ Timestamp gating for flash crash prevention (Issue #36).
39
+ If True (default): A bar cannot close on the same timestamp it opened.
40
+ This prevents flash crash scenarios from creating thousands of bars
41
+ at identical timestamps. If False: Legacy v8 behavior where bars can
42
+ close immediately on breach regardless of timestamp.
43
+ inter_bar_lookback_count : int, optional
44
+ Number of trades to keep in lookback buffer for inter-bar feature
45
+ computation (Issue #59). If set, enables 16 inter-bar features
46
+ computed from trades BEFORE each bar opens. Recommended: 100-500.
47
+ If None (default), inter-bar features are disabled.
48
+
49
+ Raises
50
+ ------
51
+ ValueError
52
+ If threshold_decimal_bps is out of valid range [1, 100_000]
53
+
54
+ Examples
55
+ --------
56
+ Create processor and convert trades to DataFrame:
57
+
58
+ >>> processor = RangeBarProcessor(threshold_decimal_bps=250) # 0.25%
59
+ >>> trades = [
60
+ ... {"timestamp": 1704067200000, "price": 42000.0, "quantity": 1.5},
61
+ ... {"timestamp": 1704067210000, "price": 42105.0, "quantity": 2.3},
62
+ ... ]
63
+ >>> bars = processor.process_trades(trades)
64
+ >>> df = processor.to_dataframe(bars)
65
+ >>> print(df.columns.tolist())
66
+ ['Open', 'High', 'Low', 'Close', 'Volume']
67
+
68
+ Cross-file continuity with checkpoints:
69
+
70
+ >>> processor = RangeBarProcessor(250, symbol="BTCUSDT")
71
+ >>> bars_file1 = processor.process_trades(file1_trades)
72
+ >>> checkpoint = processor.create_checkpoint()
73
+ >>> # Save checkpoint to JSON...
74
+ >>> # Later, resume from checkpoint:
75
+ >>> processor2 = RangeBarProcessor.from_checkpoint(checkpoint)
76
+ >>> bars_file2 = processor2.process_trades(file2_trades)
77
+ >>> # Incomplete bar from file1 continues correctly!
78
+
79
+ Notes
80
+ -----
81
+ Non-lookahead bias guarantee:
82
+ - Thresholds computed ONLY from bar open price (never recalculated)
83
+ - Breaching trade INCLUDED in closing bar
84
+ - Breaching trade also OPENS next bar
85
+
86
+ Temporal integrity:
87
+ - All trades processed in strict chronological order
88
+ - Unsorted trades raise RuntimeError
89
+
90
+ Cross-file continuity (v6.1.0+):
91
+ - Incomplete bars are preserved across file boundaries via checkpoints
92
+ - Thresholds are IMMUTABLE for bar's lifetime (computed from open)
93
+ - Price hash verification detects gaps in data stream
94
+ """
95
+
96
+ def __init__(
97
+ self,
98
+ threshold_decimal_bps: int,
99
+ symbol: str | None = None,
100
+ *,
101
+ prevent_same_timestamp_close: bool = True,
102
+ inter_bar_lookback_count: int | None = None,
103
+ ) -> None:
104
+ """Initialize processor with given threshold.
105
+
106
+ Parameters
107
+ ----------
108
+ threshold_decimal_bps : int
109
+ Threshold in decimal basis points (250 = 25bps = 0.25%)
110
+ symbol : str, optional
111
+ Trading symbol for checkpoint creation
112
+ prevent_same_timestamp_close : bool, default=True
113
+ Timestamp gating for flash crash prevention (Issue #36)
114
+ inter_bar_lookback_count : int, optional
115
+ Lookback trade count for inter-bar features (Issue #59)
116
+ """
117
+ # Validation happens in Rust layer, which raises PyValueError
118
+ self._processor = _PyRangeBarProcessor(
119
+ threshold_decimal_bps,
120
+ symbol,
121
+ prevent_same_timestamp_close,
122
+ inter_bar_lookback_count,
123
+ )
124
+ self.threshold_decimal_bps = threshold_decimal_bps
125
+ self.symbol = symbol
126
+ self.prevent_same_timestamp_close = prevent_same_timestamp_close
127
+ self.inter_bar_lookback_count = inter_bar_lookback_count
128
+
129
+ @classmethod
130
+ def from_checkpoint(cls, checkpoint: dict) -> RangeBarProcessor:
131
+ """Create processor from checkpoint for cross-file continuation.
132
+
133
+ Restores processor state including any incomplete bar that was being
134
+ built when the checkpoint was created. The incomplete bar will continue
135
+ building from where it left off.
136
+
137
+ Parameters
138
+ ----------
139
+ checkpoint : dict
140
+ Checkpoint state from create_checkpoint()
141
+
142
+ Returns
143
+ -------
144
+ RangeBarProcessor
145
+ New processor with restored state
146
+
147
+ Raises
148
+ ------
149
+ ValueError
150
+ If checkpoint is invalid or corrupted
151
+
152
+ Examples
153
+ --------
154
+ >>> import json
155
+ >>> with open("checkpoint.json") as f:
156
+ ... checkpoint = json.load(f)
157
+ >>> processor = RangeBarProcessor.from_checkpoint(checkpoint)
158
+ >>> bars = processor.process_trades(next_file_trades)
159
+ """
160
+ instance = cls.__new__(cls)
161
+ instance._processor = _PyRangeBarProcessor.from_checkpoint(checkpoint)
162
+ instance.threshold_decimal_bps = checkpoint["threshold_decimal_bps"]
163
+ instance.symbol = checkpoint.get("symbol")
164
+ # Default to True for old checkpoints without this field
165
+ instance.prevent_same_timestamp_close = checkpoint.get(
166
+ "prevent_same_timestamp_close", True
167
+ )
168
+ return instance
169
+
170
+ def process_trades(
171
+ self, trades: list[dict[str, int | float]]
172
+ ) -> list[dict[str, str | float | int]]:
173
+ """Process trades into range bars.
174
+
175
+ Parameters
176
+ ----------
177
+ trades : List[Dict]
178
+ List of trade dictionaries with keys:
179
+ - timestamp: int (milliseconds since epoch)
180
+ - price: float
181
+ - quantity: float (or 'volume')
182
+
183
+ Optional keys:
184
+ - agg_trade_id: int
185
+ - first_trade_id: int
186
+ - last_trade_id: int
187
+ - is_buyer_maker: bool
188
+
189
+ Returns
190
+ -------
191
+ List[Dict]
192
+ List of range bar dictionaries with keys:
193
+ - timestamp: str (RFC3339 format)
194
+ - open: float
195
+ - high: float
196
+ - low: float
197
+ - close: float
198
+ - volume: float
199
+ - vwap: float (volume-weighted average price)
200
+ - buy_volume: float
201
+ - sell_volume: float
202
+ - individual_trade_count: int
203
+ - agg_record_count: int
204
+
205
+ Raises
206
+ ------
207
+ KeyError
208
+ If required trade fields are missing
209
+ RuntimeError
210
+ If trades are not sorted chronologically
211
+
212
+ Examples
213
+ --------
214
+ >>> processor = RangeBarProcessor(250)
215
+ >>> trades = [
216
+ ... {"timestamp": 1704067200000, "price": 42000.0, "quantity": 1.0},
217
+ ... {"timestamp": 1704067210000, "price": 42105.0, "quantity": 2.0},
218
+ ... ]
219
+ >>> bars = processor.process_trades(trades)
220
+ >>> len(bars)
221
+ 1
222
+ >>> bars[0]["open"]
223
+ 42000.0
224
+ """
225
+ if not trades:
226
+ return []
227
+
228
+ return self._processor.process_trades(trades)
229
+
230
+ def process_trades_streaming(
231
+ self, trades: list[dict[str, int | float]]
232
+ ) -> list[dict[str, str | float | int]]:
233
+ """Process trades into range bars (streaming mode - preserves state).
234
+
235
+ Unlike `process_trades()`, this method maintains processor state across
236
+ calls, enabling continuous processing across multiple batches (e.g.,
237
+ month-by-month or chunk-by-chunk processing).
238
+
239
+ Use this method for:
240
+ - Multi-month precomputation (Issue #16)
241
+ - Chunked processing of large datasets
242
+ - Any scenario requiring bar continuity across batches
243
+
244
+ Parameters
245
+ ----------
246
+ trades : List[Dict]
247
+ List of trade dictionaries with keys:
248
+ - timestamp: int (milliseconds since epoch)
249
+ - price: float
250
+ - quantity: float (or 'volume')
251
+
252
+ Optional keys:
253
+ - agg_trade_id: int
254
+ - first_trade_id: int
255
+ - last_trade_id: int
256
+ - is_buyer_maker: bool
257
+
258
+ Returns
259
+ -------
260
+ List[Dict]
261
+ List of range bar dictionaries (only completed bars).
262
+ Same structure as process_trades().
263
+
264
+ Notes
265
+ -----
266
+ State persistence: The processor remembers the incomplete bar from
267
+ the previous call. When new trades arrive, they continue building
268
+ that bar until threshold breach, ensuring continuity.
269
+
270
+ Examples
271
+ --------
272
+ >>> processor = RangeBarProcessor(250)
273
+ >>> # First batch (month 1)
274
+ >>> bars1 = processor.process_trades_streaming(month1_trades)
275
+ >>> # Second batch (month 2) - continues from month 1's state
276
+ >>> bars2 = processor.process_trades_streaming(month2_trades)
277
+ >>> # No discontinuity at month boundary
278
+ """
279
+ if not trades:
280
+ return []
281
+
282
+ return self._processor.process_trades_streaming(trades)
283
+
284
+ def to_dataframe(
285
+ self,
286
+ bars: list[dict[str, str | float | int]],
287
+ include_microstructure: bool = False,
288
+ ) -> pd.DataFrame:
289
+ """Convert range bars to pandas DataFrame (backtesting.py compatible).
290
+
291
+ Parameters
292
+ ----------
293
+ bars : List[Dict]
294
+ List of range bar dictionaries from process_trades()
295
+ include_microstructure : bool, default=False
296
+ If True, include all microstructure columns (vwap, buy_volume,
297
+ sell_volume, ofi, kyle_lambda_proxy, etc.)
298
+
299
+ Returns
300
+ -------
301
+ pd.DataFrame
302
+ DataFrame with DatetimeIndex and OHLCV columns:
303
+ - Index: timestamp (DatetimeIndex)
304
+ - Columns: Open, High, Low, Close, Volume
305
+ - (if include_microstructure) Additional microstructure columns
306
+
307
+ Notes
308
+ -----
309
+ Output format is compatible with backtesting.py:
310
+ - Column names are capitalized (Open, High, Low, Close, Volume)
311
+ - Index is DatetimeIndex
312
+ - No NaN values (all bars complete)
313
+ - Sorted chronologically
314
+
315
+ Examples
316
+ --------
317
+ >>> processor = RangeBarProcessor(250)
318
+ >>> trades = [
319
+ ... {"timestamp": 1704067200000, "price": 42000.0, "quantity": 1.0},
320
+ ... {"timestamp": 1704067210000, "price": 42105.0, "quantity": 2.0},
321
+ ... ]
322
+ >>> bars = processor.process_trades(trades)
323
+ >>> df = processor.to_dataframe(bars)
324
+ >>> isinstance(df.index, pd.DatetimeIndex)
325
+ True
326
+ >>> list(df.columns)
327
+ ['Open', 'High', 'Low', 'Close', 'Volume']
328
+ """
329
+ if not bars:
330
+ return pd.DataFrame(
331
+ columns=["Open", "High", "Low", "Close", "Volume"]
332
+ ).set_index(pd.DatetimeIndex([]))
333
+
334
+ result = pd.DataFrame(bars)
335
+
336
+ # Convert timestamp from RFC3339 string to DatetimeIndex
337
+ # Use format='ISO8601' to handle variable-precision fractional seconds
338
+ result["timestamp"] = pd.to_datetime(result["timestamp"], format="ISO8601")
339
+ result = result.set_index("timestamp")
340
+
341
+ # Rename columns to backtesting.py format (capitalized)
342
+ result = result.rename(
343
+ columns={
344
+ "open": "Open",
345
+ "high": "High",
346
+ "low": "Low",
347
+ "close": "Close",
348
+ "volume": "Volume",
349
+ }
350
+ )
351
+
352
+ if include_microstructure:
353
+ # Return all columns including microstructure
354
+ return result
355
+
356
+ # Return only OHLCV columns (drop microstructure fields for backtesting)
357
+ return result[["Open", "High", "Low", "Close", "Volume"]]
358
+
359
+ def create_checkpoint(self, symbol: str | None = None) -> dict:
360
+ """Create checkpoint for cross-file continuation.
361
+
362
+ Captures current processing state including incomplete bar (if any).
363
+ The checkpoint can be serialized to JSON and used to resume processing
364
+ across file boundaries while maintaining bar continuity.
365
+
366
+ Parameters
367
+ ----------
368
+ symbol : str, optional
369
+ Symbol being processed. If None, uses the symbol provided at
370
+ construction time.
371
+
372
+ Returns
373
+ -------
374
+ dict
375
+ Checkpoint state (JSON-serializable) containing:
376
+ - symbol: Trading symbol
377
+ - threshold_decimal_bps: Threshold value
378
+ - incomplete_bar: Incomplete bar state (if any)
379
+ - thresholds: IMMUTABLE upper/lower thresholds for incomplete bar
380
+ - last_timestamp_us: Last processed timestamp
381
+ - last_trade_id: Last trade ID (for gap detection)
382
+ - price_hash: Hash for position verification
383
+ - anomaly_summary: Gap/overlap detection counters
384
+
385
+ Raises
386
+ ------
387
+ ValueError
388
+ If no symbol provided (neither at construction nor in this call)
389
+
390
+ Examples
391
+ --------
392
+ >>> processor = RangeBarProcessor(250, symbol="BTCUSDT")
393
+ >>> bars = processor.process_trades(trades)
394
+ >>> checkpoint = processor.create_checkpoint()
395
+ >>> # Save to JSON
396
+ >>> import json
397
+ >>> with open("checkpoint.json", "w") as f:
398
+ ... json.dump(checkpoint, f)
399
+ """
400
+ return self._processor.create_checkpoint(symbol)
401
+
402
+ def verify_position(
403
+ self, first_trade: dict[str, int | float]
404
+ ) -> PositionVerification:
405
+ """Verify position in data stream at file boundary.
406
+
407
+ Checks if the first trade of the next file matches the expected
408
+ position based on the processor's current state. Useful for
409
+ detecting data gaps when resuming from checkpoint.
410
+
411
+ Parameters
412
+ ----------
413
+ first_trade : dict
414
+ First trade of the next file with keys: timestamp, price, quantity
415
+
416
+ Returns
417
+ -------
418
+ PositionVerification
419
+ Verification result:
420
+ - is_exact: True if position matches exactly
421
+ - has_gap: True if there's a gap (missing trades)
422
+ - gap_details(): Returns (expected_id, actual_id, missing_count) if gap
423
+ - timestamp_gap_ms(): Returns gap in ms for timestamp-only sources
424
+
425
+ Examples
426
+ --------
427
+ >>> processor = RangeBarProcessor.from_checkpoint(checkpoint)
428
+ >>> verification = processor.verify_position(next_file_trades[0])
429
+ >>> if verification.has_gap:
430
+ ... expected, actual, missing = verification.gap_details()
431
+ ... print(f"Gap detected: {missing} trades missing")
432
+ """
433
+ return self._processor.verify_position(first_trade)
434
+
435
+ def get_incomplete_bar(self) -> dict | None:
436
+ """Get incomplete bar if any.
437
+
438
+ Returns the bar currently being built (not yet breached threshold).
439
+ Returns None if the last trade completed a bar cleanly.
440
+
441
+ Returns
442
+ -------
443
+ dict or None
444
+ Incomplete bar with OHLCV fields, or None
445
+ """
446
+ return self._processor.get_incomplete_bar()
447
+
448
+ @property
449
+ def has_incomplete_bar(self) -> bool:
450
+ """Check if there's an incomplete bar."""
451
+ return self._processor.has_incomplete_bar
452
+
453
+ def process_trades_streaming_arrow(
454
+ self, trades: list[dict[str, int | float]]
455
+ ) -> PyRecordBatch:
456
+ """Process trades into range bars, returning Arrow RecordBatch.
457
+
458
+ This is the most memory-efficient streaming API. Returns Arrow
459
+ RecordBatch for zero-copy transfer to Polars or other Arrow-compatible
460
+ systems.
461
+
462
+ Parameters
463
+ ----------
464
+ trades : List[Dict]
465
+ List of trade dictionaries with keys:
466
+ - timestamp: int (milliseconds since epoch)
467
+ - price: float
468
+ - quantity: float (or 'volume')
469
+
470
+ Returns
471
+ -------
472
+ PyRecordBatch
473
+ Arrow RecordBatch with 30 columns (OHLCV + microstructure).
474
+ Use `polars.from_arrow()` for zero-copy conversion.
475
+
476
+ Examples
477
+ --------
478
+ >>> import polars as pl
479
+ >>> processor = RangeBarProcessor(250)
480
+ >>> for trade_batch in stream_binance_trades( # doctest: +SKIP
481
+ ... "BTCUSDT", "2024-01-01", "2024-01-01"
482
+ ... ):
483
+ ... arrow_batch = processor.process_trades_streaming_arrow(trade_batch)
484
+ ... df = pl.from_arrow(arrow_batch) # Zero-copy!
485
+ ... process_batch(df)
486
+
487
+ Notes
488
+ -----
489
+ Requires the `arrow-export` feature to be enabled (default in v8.0+).
490
+ """
491
+ if not trades:
492
+ # Return empty batch with correct schema
493
+ from rangebar._core import bars_to_arrow
494
+
495
+ return bars_to_arrow([])
496
+
497
+ return self._processor.process_trades_streaming_arrow(trades)
498
+
499
+ def reset_at_ouroboros(self) -> dict | None:
500
+ """Reset processor state at an ouroboros boundary.
501
+
502
+ Clears the incomplete bar and position tracking while preserving
503
+ the threshold configuration. Use this when starting fresh at a
504
+ known boundary (year/month/week) for reproducibility.
505
+
506
+ Returns
507
+ -------
508
+ dict or None
509
+ The orphaned incomplete bar (if any), or None.
510
+ Mark returned bars with `is_orphan=True` for ML filtering.
511
+
512
+ Examples
513
+ --------
514
+ >>> # At year boundary (Jan 1 00:00:00 UTC)
515
+ >>> orphaned = processor.reset_at_ouroboros()
516
+ >>> if orphaned:
517
+ ... orphaned["is_orphan"] = True
518
+ ... orphaned["ouroboros_boundary"] = "2024-01-01T00:00:00Z"
519
+ ... orphaned["reason"] = "year_boundary"
520
+ >>> # Continue processing with clean state
521
+ """
522
+ return self._processor.reset_at_ouroboros()