rangebar 11.6.1__cp313-cp313-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rangebar/CLAUDE.md +327 -0
- rangebar/__init__.py +227 -0
- rangebar/__init__.pyi +1089 -0
- rangebar/_core.cpython-313-darwin.so +0 -0
- rangebar/checkpoint.py +472 -0
- rangebar/cli.py +298 -0
- rangebar/clickhouse/CLAUDE.md +139 -0
- rangebar/clickhouse/__init__.py +100 -0
- rangebar/clickhouse/bulk_operations.py +309 -0
- rangebar/clickhouse/cache.py +734 -0
- rangebar/clickhouse/client.py +121 -0
- rangebar/clickhouse/config.py +141 -0
- rangebar/clickhouse/mixin.py +120 -0
- rangebar/clickhouse/preflight.py +504 -0
- rangebar/clickhouse/query_operations.py +345 -0
- rangebar/clickhouse/schema.sql +187 -0
- rangebar/clickhouse/tunnel.py +222 -0
- rangebar/constants.py +288 -0
- rangebar/conversion.py +177 -0
- rangebar/exceptions.py +207 -0
- rangebar/exness.py +364 -0
- rangebar/hooks.py +311 -0
- rangebar/logging.py +171 -0
- rangebar/notify/__init__.py +15 -0
- rangebar/notify/pushover.py +155 -0
- rangebar/notify/telegram.py +271 -0
- rangebar/orchestration/__init__.py +20 -0
- rangebar/orchestration/count_bounded.py +797 -0
- rangebar/orchestration/helpers.py +412 -0
- rangebar/orchestration/models.py +76 -0
- rangebar/orchestration/precompute.py +498 -0
- rangebar/orchestration/range_bars.py +736 -0
- rangebar/orchestration/tick_fetcher.py +226 -0
- rangebar/ouroboros.py +454 -0
- rangebar/processors/__init__.py +22 -0
- rangebar/processors/api.py +383 -0
- rangebar/processors/core.py +522 -0
- rangebar/resource_guard.py +567 -0
- rangebar/storage/__init__.py +22 -0
- rangebar/storage/checksum_registry.py +218 -0
- rangebar/storage/parquet.py +728 -0
- rangebar/streaming.py +300 -0
- rangebar/validation/__init__.py +69 -0
- rangebar/validation/cache_staleness.py +277 -0
- rangebar/validation/continuity.py +664 -0
- rangebar/validation/gap_classification.py +294 -0
- rangebar/validation/post_storage.py +317 -0
- rangebar/validation/tier1.py +175 -0
- rangebar/validation/tier2.py +261 -0
- rangebar-11.6.1.dist-info/METADATA +308 -0
- rangebar-11.6.1.dist-info/RECORD +54 -0
- rangebar-11.6.1.dist-info/WHEEL +4 -0
- rangebar-11.6.1.dist-info/entry_points.txt +2 -0
- rangebar-11.6.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,522 @@
|
|
|
1
|
+
# polars-exception: backtesting.py requires Pandas DataFrames with DatetimeIndex
|
|
2
|
+
# Issue #46: Modularization M2 - Extract RangeBarProcessor from __init__.py
|
|
3
|
+
"""RangeBarProcessor: Core processor for converting tick data to range bars.
|
|
4
|
+
|
|
5
|
+
This module contains the RangeBarProcessor class, which wraps the Rust-based
|
|
6
|
+
PyRangeBarProcessor to provide a Pythonic interface for range bar construction.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from typing import TYPE_CHECKING
|
|
12
|
+
|
|
13
|
+
import pandas as pd
|
|
14
|
+
|
|
15
|
+
from rangebar._core import PositionVerification
|
|
16
|
+
from rangebar._core import PyRangeBarProcessor as _PyRangeBarProcessor
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from arro3.core import RecordBatch as PyRecordBatch
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class RangeBarProcessor:
|
|
23
|
+
"""Process tick-level trade data into range bars.
|
|
24
|
+
|
|
25
|
+
Range bars close when price moves ±threshold from the bar's opening price,
|
|
26
|
+
providing market-adaptive time intervals that eliminate arbitrary time-based
|
|
27
|
+
artifacts.
|
|
28
|
+
|
|
29
|
+
Parameters
|
|
30
|
+
----------
|
|
31
|
+
threshold_decimal_bps : int
|
|
32
|
+
Threshold in decimal basis points.
|
|
33
|
+
Examples: 250 = 25bps = 0.25%, 100 = 10bps = 0.1%
|
|
34
|
+
Valid range: [1, 100_000] (0.001% to 100%)
|
|
35
|
+
symbol : str, optional
|
|
36
|
+
Trading symbol (e.g., "BTCUSDT"). Required for checkpoint creation.
|
|
37
|
+
prevent_same_timestamp_close : bool, default=True
|
|
38
|
+
Timestamp gating for flash crash prevention (Issue #36).
|
|
39
|
+
If True (default): A bar cannot close on the same timestamp it opened.
|
|
40
|
+
This prevents flash crash scenarios from creating thousands of bars
|
|
41
|
+
at identical timestamps. If False: Legacy v8 behavior where bars can
|
|
42
|
+
close immediately on breach regardless of timestamp.
|
|
43
|
+
inter_bar_lookback_count : int, optional
|
|
44
|
+
Number of trades to keep in lookback buffer for inter-bar feature
|
|
45
|
+
computation (Issue #59). If set, enables 16 inter-bar features
|
|
46
|
+
computed from trades BEFORE each bar opens. Recommended: 100-500.
|
|
47
|
+
If None (default), inter-bar features are disabled.
|
|
48
|
+
|
|
49
|
+
Raises
|
|
50
|
+
------
|
|
51
|
+
ValueError
|
|
52
|
+
If threshold_decimal_bps is out of valid range [1, 100_000]
|
|
53
|
+
|
|
54
|
+
Examples
|
|
55
|
+
--------
|
|
56
|
+
Create processor and convert trades to DataFrame:
|
|
57
|
+
|
|
58
|
+
>>> processor = RangeBarProcessor(threshold_decimal_bps=250) # 0.25%
|
|
59
|
+
>>> trades = [
|
|
60
|
+
... {"timestamp": 1704067200000, "price": 42000.0, "quantity": 1.5},
|
|
61
|
+
... {"timestamp": 1704067210000, "price": 42105.0, "quantity": 2.3},
|
|
62
|
+
... ]
|
|
63
|
+
>>> bars = processor.process_trades(trades)
|
|
64
|
+
>>> df = processor.to_dataframe(bars)
|
|
65
|
+
>>> print(df.columns.tolist())
|
|
66
|
+
['Open', 'High', 'Low', 'Close', 'Volume']
|
|
67
|
+
|
|
68
|
+
Cross-file continuity with checkpoints:
|
|
69
|
+
|
|
70
|
+
>>> processor = RangeBarProcessor(250, symbol="BTCUSDT")
|
|
71
|
+
>>> bars_file1 = processor.process_trades(file1_trades)
|
|
72
|
+
>>> checkpoint = processor.create_checkpoint()
|
|
73
|
+
>>> # Save checkpoint to JSON...
|
|
74
|
+
>>> # Later, resume from checkpoint:
|
|
75
|
+
>>> processor2 = RangeBarProcessor.from_checkpoint(checkpoint)
|
|
76
|
+
>>> bars_file2 = processor2.process_trades(file2_trades)
|
|
77
|
+
>>> # Incomplete bar from file1 continues correctly!
|
|
78
|
+
|
|
79
|
+
Notes
|
|
80
|
+
-----
|
|
81
|
+
Non-lookahead bias guarantee:
|
|
82
|
+
- Thresholds computed ONLY from bar open price (never recalculated)
|
|
83
|
+
- Breaching trade INCLUDED in closing bar
|
|
84
|
+
- Breaching trade also OPENS next bar
|
|
85
|
+
|
|
86
|
+
Temporal integrity:
|
|
87
|
+
- All trades processed in strict chronological order
|
|
88
|
+
- Unsorted trades raise RuntimeError
|
|
89
|
+
|
|
90
|
+
Cross-file continuity (v6.1.0+):
|
|
91
|
+
- Incomplete bars are preserved across file boundaries via checkpoints
|
|
92
|
+
- Thresholds are IMMUTABLE for bar's lifetime (computed from open)
|
|
93
|
+
- Price hash verification detects gaps in data stream
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
def __init__(
|
|
97
|
+
self,
|
|
98
|
+
threshold_decimal_bps: int,
|
|
99
|
+
symbol: str | None = None,
|
|
100
|
+
*,
|
|
101
|
+
prevent_same_timestamp_close: bool = True,
|
|
102
|
+
inter_bar_lookback_count: int | None = None,
|
|
103
|
+
) -> None:
|
|
104
|
+
"""Initialize processor with given threshold.
|
|
105
|
+
|
|
106
|
+
Parameters
|
|
107
|
+
----------
|
|
108
|
+
threshold_decimal_bps : int
|
|
109
|
+
Threshold in decimal basis points (250 = 25bps = 0.25%)
|
|
110
|
+
symbol : str, optional
|
|
111
|
+
Trading symbol for checkpoint creation
|
|
112
|
+
prevent_same_timestamp_close : bool, default=True
|
|
113
|
+
Timestamp gating for flash crash prevention (Issue #36)
|
|
114
|
+
inter_bar_lookback_count : int, optional
|
|
115
|
+
Lookback trade count for inter-bar features (Issue #59)
|
|
116
|
+
"""
|
|
117
|
+
# Validation happens in Rust layer, which raises PyValueError
|
|
118
|
+
self._processor = _PyRangeBarProcessor(
|
|
119
|
+
threshold_decimal_bps,
|
|
120
|
+
symbol,
|
|
121
|
+
prevent_same_timestamp_close,
|
|
122
|
+
inter_bar_lookback_count,
|
|
123
|
+
)
|
|
124
|
+
self.threshold_decimal_bps = threshold_decimal_bps
|
|
125
|
+
self.symbol = symbol
|
|
126
|
+
self.prevent_same_timestamp_close = prevent_same_timestamp_close
|
|
127
|
+
self.inter_bar_lookback_count = inter_bar_lookback_count
|
|
128
|
+
|
|
129
|
+
@classmethod
|
|
130
|
+
def from_checkpoint(cls, checkpoint: dict) -> RangeBarProcessor:
|
|
131
|
+
"""Create processor from checkpoint for cross-file continuation.
|
|
132
|
+
|
|
133
|
+
Restores processor state including any incomplete bar that was being
|
|
134
|
+
built when the checkpoint was created. The incomplete bar will continue
|
|
135
|
+
building from where it left off.
|
|
136
|
+
|
|
137
|
+
Parameters
|
|
138
|
+
----------
|
|
139
|
+
checkpoint : dict
|
|
140
|
+
Checkpoint state from create_checkpoint()
|
|
141
|
+
|
|
142
|
+
Returns
|
|
143
|
+
-------
|
|
144
|
+
RangeBarProcessor
|
|
145
|
+
New processor with restored state
|
|
146
|
+
|
|
147
|
+
Raises
|
|
148
|
+
------
|
|
149
|
+
ValueError
|
|
150
|
+
If checkpoint is invalid or corrupted
|
|
151
|
+
|
|
152
|
+
Examples
|
|
153
|
+
--------
|
|
154
|
+
>>> import json
|
|
155
|
+
>>> with open("checkpoint.json") as f:
|
|
156
|
+
... checkpoint = json.load(f)
|
|
157
|
+
>>> processor = RangeBarProcessor.from_checkpoint(checkpoint)
|
|
158
|
+
>>> bars = processor.process_trades(next_file_trades)
|
|
159
|
+
"""
|
|
160
|
+
instance = cls.__new__(cls)
|
|
161
|
+
instance._processor = _PyRangeBarProcessor.from_checkpoint(checkpoint)
|
|
162
|
+
instance.threshold_decimal_bps = checkpoint["threshold_decimal_bps"]
|
|
163
|
+
instance.symbol = checkpoint.get("symbol")
|
|
164
|
+
# Default to True for old checkpoints without this field
|
|
165
|
+
instance.prevent_same_timestamp_close = checkpoint.get(
|
|
166
|
+
"prevent_same_timestamp_close", True
|
|
167
|
+
)
|
|
168
|
+
return instance
|
|
169
|
+
|
|
170
|
+
def process_trades(
|
|
171
|
+
self, trades: list[dict[str, int | float]]
|
|
172
|
+
) -> list[dict[str, str | float | int]]:
|
|
173
|
+
"""Process trades into range bars.
|
|
174
|
+
|
|
175
|
+
Parameters
|
|
176
|
+
----------
|
|
177
|
+
trades : List[Dict]
|
|
178
|
+
List of trade dictionaries with keys:
|
|
179
|
+
- timestamp: int (milliseconds since epoch)
|
|
180
|
+
- price: float
|
|
181
|
+
- quantity: float (or 'volume')
|
|
182
|
+
|
|
183
|
+
Optional keys:
|
|
184
|
+
- agg_trade_id: int
|
|
185
|
+
- first_trade_id: int
|
|
186
|
+
- last_trade_id: int
|
|
187
|
+
- is_buyer_maker: bool
|
|
188
|
+
|
|
189
|
+
Returns
|
|
190
|
+
-------
|
|
191
|
+
List[Dict]
|
|
192
|
+
List of range bar dictionaries with keys:
|
|
193
|
+
- timestamp: str (RFC3339 format)
|
|
194
|
+
- open: float
|
|
195
|
+
- high: float
|
|
196
|
+
- low: float
|
|
197
|
+
- close: float
|
|
198
|
+
- volume: float
|
|
199
|
+
- vwap: float (volume-weighted average price)
|
|
200
|
+
- buy_volume: float
|
|
201
|
+
- sell_volume: float
|
|
202
|
+
- individual_trade_count: int
|
|
203
|
+
- agg_record_count: int
|
|
204
|
+
|
|
205
|
+
Raises
|
|
206
|
+
------
|
|
207
|
+
KeyError
|
|
208
|
+
If required trade fields are missing
|
|
209
|
+
RuntimeError
|
|
210
|
+
If trades are not sorted chronologically
|
|
211
|
+
|
|
212
|
+
Examples
|
|
213
|
+
--------
|
|
214
|
+
>>> processor = RangeBarProcessor(250)
|
|
215
|
+
>>> trades = [
|
|
216
|
+
... {"timestamp": 1704067200000, "price": 42000.0, "quantity": 1.0},
|
|
217
|
+
... {"timestamp": 1704067210000, "price": 42105.0, "quantity": 2.0},
|
|
218
|
+
... ]
|
|
219
|
+
>>> bars = processor.process_trades(trades)
|
|
220
|
+
>>> len(bars)
|
|
221
|
+
1
|
|
222
|
+
>>> bars[0]["open"]
|
|
223
|
+
42000.0
|
|
224
|
+
"""
|
|
225
|
+
if not trades:
|
|
226
|
+
return []
|
|
227
|
+
|
|
228
|
+
return self._processor.process_trades(trades)
|
|
229
|
+
|
|
230
|
+
def process_trades_streaming(
|
|
231
|
+
self, trades: list[dict[str, int | float]]
|
|
232
|
+
) -> list[dict[str, str | float | int]]:
|
|
233
|
+
"""Process trades into range bars (streaming mode - preserves state).
|
|
234
|
+
|
|
235
|
+
Unlike `process_trades()`, this method maintains processor state across
|
|
236
|
+
calls, enabling continuous processing across multiple batches (e.g.,
|
|
237
|
+
month-by-month or chunk-by-chunk processing).
|
|
238
|
+
|
|
239
|
+
Use this method for:
|
|
240
|
+
- Multi-month precomputation (Issue #16)
|
|
241
|
+
- Chunked processing of large datasets
|
|
242
|
+
- Any scenario requiring bar continuity across batches
|
|
243
|
+
|
|
244
|
+
Parameters
|
|
245
|
+
----------
|
|
246
|
+
trades : List[Dict]
|
|
247
|
+
List of trade dictionaries with keys:
|
|
248
|
+
- timestamp: int (milliseconds since epoch)
|
|
249
|
+
- price: float
|
|
250
|
+
- quantity: float (or 'volume')
|
|
251
|
+
|
|
252
|
+
Optional keys:
|
|
253
|
+
- agg_trade_id: int
|
|
254
|
+
- first_trade_id: int
|
|
255
|
+
- last_trade_id: int
|
|
256
|
+
- is_buyer_maker: bool
|
|
257
|
+
|
|
258
|
+
Returns
|
|
259
|
+
-------
|
|
260
|
+
List[Dict]
|
|
261
|
+
List of range bar dictionaries (only completed bars).
|
|
262
|
+
Same structure as process_trades().
|
|
263
|
+
|
|
264
|
+
Notes
|
|
265
|
+
-----
|
|
266
|
+
State persistence: The processor remembers the incomplete bar from
|
|
267
|
+
the previous call. When new trades arrive, they continue building
|
|
268
|
+
that bar until threshold breach, ensuring continuity.
|
|
269
|
+
|
|
270
|
+
Examples
|
|
271
|
+
--------
|
|
272
|
+
>>> processor = RangeBarProcessor(250)
|
|
273
|
+
>>> # First batch (month 1)
|
|
274
|
+
>>> bars1 = processor.process_trades_streaming(month1_trades)
|
|
275
|
+
>>> # Second batch (month 2) - continues from month 1's state
|
|
276
|
+
>>> bars2 = processor.process_trades_streaming(month2_trades)
|
|
277
|
+
>>> # No discontinuity at month boundary
|
|
278
|
+
"""
|
|
279
|
+
if not trades:
|
|
280
|
+
return []
|
|
281
|
+
|
|
282
|
+
return self._processor.process_trades_streaming(trades)
|
|
283
|
+
|
|
284
|
+
def to_dataframe(
|
|
285
|
+
self,
|
|
286
|
+
bars: list[dict[str, str | float | int]],
|
|
287
|
+
include_microstructure: bool = False,
|
|
288
|
+
) -> pd.DataFrame:
|
|
289
|
+
"""Convert range bars to pandas DataFrame (backtesting.py compatible).
|
|
290
|
+
|
|
291
|
+
Parameters
|
|
292
|
+
----------
|
|
293
|
+
bars : List[Dict]
|
|
294
|
+
List of range bar dictionaries from process_trades()
|
|
295
|
+
include_microstructure : bool, default=False
|
|
296
|
+
If True, include all microstructure columns (vwap, buy_volume,
|
|
297
|
+
sell_volume, ofi, kyle_lambda_proxy, etc.)
|
|
298
|
+
|
|
299
|
+
Returns
|
|
300
|
+
-------
|
|
301
|
+
pd.DataFrame
|
|
302
|
+
DataFrame with DatetimeIndex and OHLCV columns:
|
|
303
|
+
- Index: timestamp (DatetimeIndex)
|
|
304
|
+
- Columns: Open, High, Low, Close, Volume
|
|
305
|
+
- (if include_microstructure) Additional microstructure columns
|
|
306
|
+
|
|
307
|
+
Notes
|
|
308
|
+
-----
|
|
309
|
+
Output format is compatible with backtesting.py:
|
|
310
|
+
- Column names are capitalized (Open, High, Low, Close, Volume)
|
|
311
|
+
- Index is DatetimeIndex
|
|
312
|
+
- No NaN values (all bars complete)
|
|
313
|
+
- Sorted chronologically
|
|
314
|
+
|
|
315
|
+
Examples
|
|
316
|
+
--------
|
|
317
|
+
>>> processor = RangeBarProcessor(250)
|
|
318
|
+
>>> trades = [
|
|
319
|
+
... {"timestamp": 1704067200000, "price": 42000.0, "quantity": 1.0},
|
|
320
|
+
... {"timestamp": 1704067210000, "price": 42105.0, "quantity": 2.0},
|
|
321
|
+
... ]
|
|
322
|
+
>>> bars = processor.process_trades(trades)
|
|
323
|
+
>>> df = processor.to_dataframe(bars)
|
|
324
|
+
>>> isinstance(df.index, pd.DatetimeIndex)
|
|
325
|
+
True
|
|
326
|
+
>>> list(df.columns)
|
|
327
|
+
['Open', 'High', 'Low', 'Close', 'Volume']
|
|
328
|
+
"""
|
|
329
|
+
if not bars:
|
|
330
|
+
return pd.DataFrame(
|
|
331
|
+
columns=["Open", "High", "Low", "Close", "Volume"]
|
|
332
|
+
).set_index(pd.DatetimeIndex([]))
|
|
333
|
+
|
|
334
|
+
result = pd.DataFrame(bars)
|
|
335
|
+
|
|
336
|
+
# Convert timestamp from RFC3339 string to DatetimeIndex
|
|
337
|
+
# Use format='ISO8601' to handle variable-precision fractional seconds
|
|
338
|
+
result["timestamp"] = pd.to_datetime(result["timestamp"], format="ISO8601")
|
|
339
|
+
result = result.set_index("timestamp")
|
|
340
|
+
|
|
341
|
+
# Rename columns to backtesting.py format (capitalized)
|
|
342
|
+
result = result.rename(
|
|
343
|
+
columns={
|
|
344
|
+
"open": "Open",
|
|
345
|
+
"high": "High",
|
|
346
|
+
"low": "Low",
|
|
347
|
+
"close": "Close",
|
|
348
|
+
"volume": "Volume",
|
|
349
|
+
}
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
if include_microstructure:
|
|
353
|
+
# Return all columns including microstructure
|
|
354
|
+
return result
|
|
355
|
+
|
|
356
|
+
# Return only OHLCV columns (drop microstructure fields for backtesting)
|
|
357
|
+
return result[["Open", "High", "Low", "Close", "Volume"]]
|
|
358
|
+
|
|
359
|
+
def create_checkpoint(self, symbol: str | None = None) -> dict:
|
|
360
|
+
"""Create checkpoint for cross-file continuation.
|
|
361
|
+
|
|
362
|
+
Captures current processing state including incomplete bar (if any).
|
|
363
|
+
The checkpoint can be serialized to JSON and used to resume processing
|
|
364
|
+
across file boundaries while maintaining bar continuity.
|
|
365
|
+
|
|
366
|
+
Parameters
|
|
367
|
+
----------
|
|
368
|
+
symbol : str, optional
|
|
369
|
+
Symbol being processed. If None, uses the symbol provided at
|
|
370
|
+
construction time.
|
|
371
|
+
|
|
372
|
+
Returns
|
|
373
|
+
-------
|
|
374
|
+
dict
|
|
375
|
+
Checkpoint state (JSON-serializable) containing:
|
|
376
|
+
- symbol: Trading symbol
|
|
377
|
+
- threshold_decimal_bps: Threshold value
|
|
378
|
+
- incomplete_bar: Incomplete bar state (if any)
|
|
379
|
+
- thresholds: IMMUTABLE upper/lower thresholds for incomplete bar
|
|
380
|
+
- last_timestamp_us: Last processed timestamp
|
|
381
|
+
- last_trade_id: Last trade ID (for gap detection)
|
|
382
|
+
- price_hash: Hash for position verification
|
|
383
|
+
- anomaly_summary: Gap/overlap detection counters
|
|
384
|
+
|
|
385
|
+
Raises
|
|
386
|
+
------
|
|
387
|
+
ValueError
|
|
388
|
+
If no symbol provided (neither at construction nor in this call)
|
|
389
|
+
|
|
390
|
+
Examples
|
|
391
|
+
--------
|
|
392
|
+
>>> processor = RangeBarProcessor(250, symbol="BTCUSDT")
|
|
393
|
+
>>> bars = processor.process_trades(trades)
|
|
394
|
+
>>> checkpoint = processor.create_checkpoint()
|
|
395
|
+
>>> # Save to JSON
|
|
396
|
+
>>> import json
|
|
397
|
+
>>> with open("checkpoint.json", "w") as f:
|
|
398
|
+
... json.dump(checkpoint, f)
|
|
399
|
+
"""
|
|
400
|
+
return self._processor.create_checkpoint(symbol)
|
|
401
|
+
|
|
402
|
+
def verify_position(
|
|
403
|
+
self, first_trade: dict[str, int | float]
|
|
404
|
+
) -> PositionVerification:
|
|
405
|
+
"""Verify position in data stream at file boundary.
|
|
406
|
+
|
|
407
|
+
Checks if the first trade of the next file matches the expected
|
|
408
|
+
position based on the processor's current state. Useful for
|
|
409
|
+
detecting data gaps when resuming from checkpoint.
|
|
410
|
+
|
|
411
|
+
Parameters
|
|
412
|
+
----------
|
|
413
|
+
first_trade : dict
|
|
414
|
+
First trade of the next file with keys: timestamp, price, quantity
|
|
415
|
+
|
|
416
|
+
Returns
|
|
417
|
+
-------
|
|
418
|
+
PositionVerification
|
|
419
|
+
Verification result:
|
|
420
|
+
- is_exact: True if position matches exactly
|
|
421
|
+
- has_gap: True if there's a gap (missing trades)
|
|
422
|
+
- gap_details(): Returns (expected_id, actual_id, missing_count) if gap
|
|
423
|
+
- timestamp_gap_ms(): Returns gap in ms for timestamp-only sources
|
|
424
|
+
|
|
425
|
+
Examples
|
|
426
|
+
--------
|
|
427
|
+
>>> processor = RangeBarProcessor.from_checkpoint(checkpoint)
|
|
428
|
+
>>> verification = processor.verify_position(next_file_trades[0])
|
|
429
|
+
>>> if verification.has_gap:
|
|
430
|
+
... expected, actual, missing = verification.gap_details()
|
|
431
|
+
... print(f"Gap detected: {missing} trades missing")
|
|
432
|
+
"""
|
|
433
|
+
return self._processor.verify_position(first_trade)
|
|
434
|
+
|
|
435
|
+
def get_incomplete_bar(self) -> dict | None:
|
|
436
|
+
"""Get incomplete bar if any.
|
|
437
|
+
|
|
438
|
+
Returns the bar currently being built (not yet breached threshold).
|
|
439
|
+
Returns None if the last trade completed a bar cleanly.
|
|
440
|
+
|
|
441
|
+
Returns
|
|
442
|
+
-------
|
|
443
|
+
dict or None
|
|
444
|
+
Incomplete bar with OHLCV fields, or None
|
|
445
|
+
"""
|
|
446
|
+
return self._processor.get_incomplete_bar()
|
|
447
|
+
|
|
448
|
+
@property
|
|
449
|
+
def has_incomplete_bar(self) -> bool:
|
|
450
|
+
"""Check if there's an incomplete bar."""
|
|
451
|
+
return self._processor.has_incomplete_bar
|
|
452
|
+
|
|
453
|
+
def process_trades_streaming_arrow(
|
|
454
|
+
self, trades: list[dict[str, int | float]]
|
|
455
|
+
) -> PyRecordBatch:
|
|
456
|
+
"""Process trades into range bars, returning Arrow RecordBatch.
|
|
457
|
+
|
|
458
|
+
This is the most memory-efficient streaming API. Returns Arrow
|
|
459
|
+
RecordBatch for zero-copy transfer to Polars or other Arrow-compatible
|
|
460
|
+
systems.
|
|
461
|
+
|
|
462
|
+
Parameters
|
|
463
|
+
----------
|
|
464
|
+
trades : List[Dict]
|
|
465
|
+
List of trade dictionaries with keys:
|
|
466
|
+
- timestamp: int (milliseconds since epoch)
|
|
467
|
+
- price: float
|
|
468
|
+
- quantity: float (or 'volume')
|
|
469
|
+
|
|
470
|
+
Returns
|
|
471
|
+
-------
|
|
472
|
+
PyRecordBatch
|
|
473
|
+
Arrow RecordBatch with 30 columns (OHLCV + microstructure).
|
|
474
|
+
Use `polars.from_arrow()` for zero-copy conversion.
|
|
475
|
+
|
|
476
|
+
Examples
|
|
477
|
+
--------
|
|
478
|
+
>>> import polars as pl
|
|
479
|
+
>>> processor = RangeBarProcessor(250)
|
|
480
|
+
>>> for trade_batch in stream_binance_trades( # doctest: +SKIP
|
|
481
|
+
... "BTCUSDT", "2024-01-01", "2024-01-01"
|
|
482
|
+
... ):
|
|
483
|
+
... arrow_batch = processor.process_trades_streaming_arrow(trade_batch)
|
|
484
|
+
... df = pl.from_arrow(arrow_batch) # Zero-copy!
|
|
485
|
+
... process_batch(df)
|
|
486
|
+
|
|
487
|
+
Notes
|
|
488
|
+
-----
|
|
489
|
+
Requires the `arrow-export` feature to be enabled (default in v8.0+).
|
|
490
|
+
"""
|
|
491
|
+
if not trades:
|
|
492
|
+
# Return empty batch with correct schema
|
|
493
|
+
from rangebar._core import bars_to_arrow
|
|
494
|
+
|
|
495
|
+
return bars_to_arrow([])
|
|
496
|
+
|
|
497
|
+
return self._processor.process_trades_streaming_arrow(trades)
|
|
498
|
+
|
|
499
|
+
def reset_at_ouroboros(self) -> dict | None:
|
|
500
|
+
"""Reset processor state at an ouroboros boundary.
|
|
501
|
+
|
|
502
|
+
Clears the incomplete bar and position tracking while preserving
|
|
503
|
+
the threshold configuration. Use this when starting fresh at a
|
|
504
|
+
known boundary (year/month/week) for reproducibility.
|
|
505
|
+
|
|
506
|
+
Returns
|
|
507
|
+
-------
|
|
508
|
+
dict or None
|
|
509
|
+
The orphaned incomplete bar (if any), or None.
|
|
510
|
+
Mark returned bars with `is_orphan=True` for ML filtering.
|
|
511
|
+
|
|
512
|
+
Examples
|
|
513
|
+
--------
|
|
514
|
+
>>> # At year boundary (Jan 1 00:00:00 UTC)
|
|
515
|
+
>>> orphaned = processor.reset_at_ouroboros()
|
|
516
|
+
>>> if orphaned:
|
|
517
|
+
... orphaned["is_orphan"] = True
|
|
518
|
+
... orphaned["ouroboros_boundary"] = "2024-01-01T00:00:00Z"
|
|
519
|
+
... orphaned["reason"] = "year_boundary"
|
|
520
|
+
>>> # Continue processing with clean state
|
|
521
|
+
"""
|
|
522
|
+
return self._processor.reset_at_ouroboros()
|