lumibot 4.0.21__py3-none-any.whl → 4.0.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lumibot might be problematic. Click here for more details.
- lumibot/backtesting/__init__.py +3 -3
- lumibot/data_sources/__init__.py +2 -1
- lumibot/data_sources/databento_data.py +5 -5
- lumibot/data_sources/databento_data_polars_backtesting.py +490 -0
- lumibot/data_sources/databento_data_polars_live.py +793 -0
- {lumibot-4.0.21.dist-info → lumibot-4.0.23.dist-info}/METADATA +1 -1
- {lumibot-4.0.21.dist-info → lumibot-4.0.23.dist-info}/RECORD +12 -10
- tests/backtest/test_databento.py +56 -2
- tests/test_databento_live.py +10 -10
- {lumibot-4.0.21.dist-info → lumibot-4.0.23.dist-info}/LICENSE +0 -0
- {lumibot-4.0.21.dist-info → lumibot-4.0.23.dist-info}/WHEEL +0 -0
- {lumibot-4.0.21.dist-info → lumibot-4.0.23.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,793 @@
|
|
|
1
|
+
"""DataBento data source using Polars with proper Live API integration - FIXED VERSION.
|
|
2
|
+
|
|
3
|
+
This implementation uses:
|
|
4
|
+
- Live API for real-time data streaming
|
|
5
|
+
- Historical API for data >24 hours old
|
|
6
|
+
- Proper handling of DataBento message types
|
|
7
|
+
- Correct price conversion from fixed-point format
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from datetime import datetime, timedelta, timezone
|
|
11
|
+
from decimal import Decimal
|
|
12
|
+
from typing import Dict, Optional, Union
|
|
13
|
+
import time
|
|
14
|
+
import threading
|
|
15
|
+
import queue
|
|
16
|
+
from collections import defaultdict
|
|
17
|
+
|
|
18
|
+
import polars as pl
|
|
19
|
+
import databento as db
|
|
20
|
+
|
|
21
|
+
from lumibot.data_sources import DataSource
|
|
22
|
+
from lumibot.data_sources.polars_mixin import PolarsMixin
|
|
23
|
+
from lumibot.entities import Asset, Bars, Quote
|
|
24
|
+
from lumibot.tools import databento_helper_polars
|
|
25
|
+
from lumibot.tools.databento_helper_polars import _ensure_polars_datetime_timezone as _ensure_polars_tz
|
|
26
|
+
from lumibot.tools.lumibot_logger import get_logger
|
|
27
|
+
|
|
28
|
+
logger = get_logger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class DataBentoDataPolarsLive(PolarsMixin, DataSource):
|
|
32
|
+
"""
|
|
33
|
+
DataBento data source optimized with Polars and proper Live API usage.
|
|
34
|
+
|
|
35
|
+
Uses Live API for real-time trade streaming to achieve <1 minute lag.
|
|
36
|
+
Falls back to Historical API for older data.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
SOURCE = "DATABENTO"
|
|
40
|
+
MIN_TIMESTEP = "minute"
|
|
41
|
+
TIMESTEP_MAPPING = {
|
|
42
|
+
"minute": "1m",
|
|
43
|
+
"day": "1d",
|
|
44
|
+
"hour": "1h"
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
api_key: str,
|
|
50
|
+
has_paid_subscription: bool = False,
|
|
51
|
+
enable_cache: bool = True,
|
|
52
|
+
cache_duration_minutes: int = 60,
|
|
53
|
+
enable_live_stream: bool = True,
|
|
54
|
+
timeout: int = None, # For backwards compatibility
|
|
55
|
+
max_retries: int = None # For backwards compatibility
|
|
56
|
+
):
|
|
57
|
+
"""Initialize DataBento data source with Live API support"""
|
|
58
|
+
super().__init__(api_key=api_key, has_paid_subscription=has_paid_subscription)
|
|
59
|
+
|
|
60
|
+
# Core configuration
|
|
61
|
+
self._api_key = api_key
|
|
62
|
+
self.has_paid_subscription = has_paid_subscription
|
|
63
|
+
self.enable_cache = enable_cache
|
|
64
|
+
self.cache_duration = timedelta(minutes=cache_duration_minutes)
|
|
65
|
+
self.enable_live_stream = enable_live_stream
|
|
66
|
+
|
|
67
|
+
# Caches
|
|
68
|
+
self._last_price_cache = {}
|
|
69
|
+
self._eager_cache = {}
|
|
70
|
+
self._filtered_data_cache = {}
|
|
71
|
+
self._cache_metadata = {}
|
|
72
|
+
self._cache_timestamps = {}
|
|
73
|
+
|
|
74
|
+
# Live streaming state
|
|
75
|
+
self._live_client = None
|
|
76
|
+
self._producer_threads = {} # Map symbol to producer thread
|
|
77
|
+
self._consumer_thread = None
|
|
78
|
+
self._finalizer_thread = None
|
|
79
|
+
self._stop_streaming = False
|
|
80
|
+
self._minute_bars = defaultdict(dict)
|
|
81
|
+
self._bars_lock = threading.Lock()
|
|
82
|
+
self._finalized_minutes = defaultdict(set)
|
|
83
|
+
self._subscribed_symbols = set()
|
|
84
|
+
self._last_trade_time = {}
|
|
85
|
+
self._last_ts_event = {} # Track last timestamp per symbol for reconnection
|
|
86
|
+
self._symbol_mapping = {} # Maps instrument_id to symbol
|
|
87
|
+
self._record_queue = queue.Queue(maxsize=10000)
|
|
88
|
+
self._reconnect_backoff = 1.0
|
|
89
|
+
|
|
90
|
+
# Live tick cache
|
|
91
|
+
self._live_cache_lock = threading.RLock()
|
|
92
|
+
self._latest_trades: Dict[str, dict] = {}
|
|
93
|
+
self._latest_quotes: Dict[str, dict] = {}
|
|
94
|
+
self._max_live_age = timedelta(seconds=2)
|
|
95
|
+
self._stale_warning_issued: Dict[str, bool] = {}
|
|
96
|
+
|
|
97
|
+
# Configuration
|
|
98
|
+
self._finalize_grace_seconds = 3 # Wait 3 seconds after minute ends to finalize
|
|
99
|
+
self._prune_older_minutes = 720 # Remove bars older than 12 hours
|
|
100
|
+
self._resub_overlap_seconds = 5 # Overlap on reconnection
|
|
101
|
+
|
|
102
|
+
if self.enable_live_stream:
|
|
103
|
+
self._init_live_streaming()
|
|
104
|
+
|
|
105
|
+
def _should_use_live_api(self, start_dt: datetime, end_dt: datetime) -> bool:
|
|
106
|
+
"""Return True when the requested window should use the live API."""
|
|
107
|
+
if not self.enable_live_stream:
|
|
108
|
+
return False
|
|
109
|
+
if start_dt is None or end_dt is None:
|
|
110
|
+
return False
|
|
111
|
+
if start_dt.tzinfo is None:
|
|
112
|
+
start_dt = start_dt.replace(tzinfo=timezone.utc)
|
|
113
|
+
if end_dt.tzinfo is None:
|
|
114
|
+
end_dt = end_dt.replace(tzinfo=timezone.utc)
|
|
115
|
+
if end_dt < start_dt:
|
|
116
|
+
start_dt, end_dt = end_dt, start_dt
|
|
117
|
+
now = datetime.now(timezone.utc)
|
|
118
|
+
live_window = timedelta(hours=24)
|
|
119
|
+
return end_dt >= now - live_window
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _init_live_streaming(self):
|
|
123
|
+
"""Initialize DataBento Live API client for real-time data"""
|
|
124
|
+
try:
|
|
125
|
+
self._stop_streaming = False
|
|
126
|
+
|
|
127
|
+
# Start consumer thread to process records from queue
|
|
128
|
+
self._consumer_thread = threading.Thread(target=self._consumer_loop, daemon=True)
|
|
129
|
+
self._consumer_thread.start()
|
|
130
|
+
|
|
131
|
+
# Start finalizer thread to mark old bars as complete
|
|
132
|
+
self._finalizer_thread = threading.Thread(target=self._finalizer_loop, daemon=True)
|
|
133
|
+
self._finalizer_thread.start()
|
|
134
|
+
|
|
135
|
+
logger.debug("[DATABENTO][LIVE] Live streaming threads initialized")
|
|
136
|
+
|
|
137
|
+
except Exception as e:
|
|
138
|
+
logger.error(f"[DATABENTO][LIVE] Failed to initialize Live streaming: {e}", exc_info=True)
|
|
139
|
+
self.enable_live_stream = False
|
|
140
|
+
|
|
141
|
+
def _live_stream_worker(self, symbol: str, start_time: datetime):
|
|
142
|
+
"""Producer thread that subscribes and iterates in the same context"""
|
|
143
|
+
logger.debug(f"[DATABENTO][PRODUCER] Starting for {symbol}")
|
|
144
|
+
reconnect_attempts = 0
|
|
145
|
+
max_reconnect_attempts = 5
|
|
146
|
+
backoff_seconds = 1
|
|
147
|
+
|
|
148
|
+
while not self._stop_streaming and reconnect_attempts < max_reconnect_attempts:
|
|
149
|
+
try:
|
|
150
|
+
# Create a new client for this producer
|
|
151
|
+
client = db.Live(key=self._api_key)
|
|
152
|
+
|
|
153
|
+
logger.debug(f"[DATABENTO][PRODUCER] Subscribing to {symbol} from {start_time.isoformat()}")
|
|
154
|
+
|
|
155
|
+
# Subscribe - must happen in same context as iteration
|
|
156
|
+
client.subscribe(
|
|
157
|
+
dataset="GLBX.MDP3",
|
|
158
|
+
schema="trades",
|
|
159
|
+
stype_in="raw_symbol",
|
|
160
|
+
symbols=[symbol],
|
|
161
|
+
start=start_time.isoformat()
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
# Attempt to subscribe to top-of-book quotes for richer data
|
|
165
|
+
try:
|
|
166
|
+
client.subscribe(
|
|
167
|
+
dataset="GLBX.MDP3",
|
|
168
|
+
schema="quotes",
|
|
169
|
+
stype_in="raw_symbol",
|
|
170
|
+
symbols=[symbol],
|
|
171
|
+
start=start_time.isoformat()
|
|
172
|
+
)
|
|
173
|
+
except Exception as quote_sub_err:
|
|
174
|
+
logger.debug(f"[DATABENTO][PRODUCER] Quote subscription not available for {symbol}: {quote_sub_err}")
|
|
175
|
+
|
|
176
|
+
# Immediately iterate in the SAME context
|
|
177
|
+
record_count = 0
|
|
178
|
+
error_count = 0
|
|
179
|
+
|
|
180
|
+
for record in client:
|
|
181
|
+
if self._stop_streaming:
|
|
182
|
+
break
|
|
183
|
+
|
|
184
|
+
record_count += 1
|
|
185
|
+
|
|
186
|
+
# Handle ErrorMsg records
|
|
187
|
+
if hasattr(record, '__class__') and record.__class__.__name__ == 'ErrorMsg':
|
|
188
|
+
error_count += 1
|
|
189
|
+
err_msg = getattr(record, 'err', 'Unknown error')
|
|
190
|
+
logger.error(f"[DATABENTO][PRODUCER] Error from server: {err_msg}")
|
|
191
|
+
if error_count > 3:
|
|
192
|
+
logger.error(f"[DATABENTO][PRODUCER] Too many errors, will reconnect")
|
|
193
|
+
break
|
|
194
|
+
continue
|
|
195
|
+
|
|
196
|
+
# Reset error count on successful records
|
|
197
|
+
error_count = 0
|
|
198
|
+
|
|
199
|
+
# Put record in queue for consumer
|
|
200
|
+
try:
|
|
201
|
+
self._record_queue.put((symbol, record), timeout=0.1)
|
|
202
|
+
|
|
203
|
+
# Track last event timestamp for reconnection
|
|
204
|
+
if hasattr(record, 'ts_event'):
|
|
205
|
+
self._last_ts_event[symbol] = getattr(record, 'ts_event')
|
|
206
|
+
|
|
207
|
+
# Log progress (only first few)
|
|
208
|
+
if record_count <= 3:
|
|
209
|
+
logger.debug(f"[DATABENTO][PRODUCER] {symbol} record #{record_count}: {record.__class__.__name__}")
|
|
210
|
+
|
|
211
|
+
except queue.Full:
|
|
212
|
+
logger.warning(f"[DATABENTO][PRODUCER] Queue full, dropping record")
|
|
213
|
+
|
|
214
|
+
# Clean exit
|
|
215
|
+
logger.info(f"[DATABENTO][PRODUCER] {symbol} stopped after {record_count} records")
|
|
216
|
+
break # Successful completion
|
|
217
|
+
|
|
218
|
+
except Exception as e:
|
|
219
|
+
logger.error(f"[DATABENTO][PRODUCER] {symbol} error: {e}")
|
|
220
|
+
reconnect_attempts += 1
|
|
221
|
+
|
|
222
|
+
if reconnect_attempts < max_reconnect_attempts:
|
|
223
|
+
sleep_time = backoff_seconds * (2 ** reconnect_attempts)
|
|
224
|
+
logger.info(f"[DATABENTO][PRODUCER] Reconnecting {symbol} in {sleep_time}s (attempt {reconnect_attempts})")
|
|
225
|
+
time.sleep(sleep_time)
|
|
226
|
+
|
|
227
|
+
# Update start time for reconnection to avoid duplicate data
|
|
228
|
+
if symbol in self._last_ts_event:
|
|
229
|
+
# Start from last received timestamp (databento timestamps are in nanoseconds)
|
|
230
|
+
ts_ns = self._last_ts_event[symbol]
|
|
231
|
+
if ts_ns > 0:
|
|
232
|
+
start_time = datetime.fromtimestamp(ts_ns / 1e9, tz=timezone.utc)
|
|
233
|
+
logger.info(f"[DATABENTO][PRODUCER] Resuming from last event: {start_time.isoformat()}")
|
|
234
|
+
else:
|
|
235
|
+
logger.error(f"[DATABENTO][PRODUCER] {symbol} max reconnection attempts reached")
|
|
236
|
+
|
|
237
|
+
def _subscribe_to_symbol(self, symbol: str, start_time: datetime = None, min_bars: int = 10):
|
|
238
|
+
"""Start a producer thread for a symbol"""
|
|
239
|
+
if symbol in self._subscribed_symbols:
|
|
240
|
+
logger.debug(f"[DATABENTO][LIVE] {symbol} already subscribed")
|
|
241
|
+
return
|
|
242
|
+
|
|
243
|
+
try:
|
|
244
|
+
# Calculate start time for replay
|
|
245
|
+
if start_time is None:
|
|
246
|
+
# Request enough history to build minute bars
|
|
247
|
+
start_time = datetime.now(timezone.utc) - timedelta(minutes=max(30, min_bars * 2))
|
|
248
|
+
|
|
249
|
+
logger.debug(f"[DATABENTO][LIVE] Starting producer for {symbol}")
|
|
250
|
+
logger.debug(f"[DATABENTO][LIVE] Replay from: {start_time.isoformat()}")
|
|
251
|
+
|
|
252
|
+
# Start producer thread for this symbol
|
|
253
|
+
producer_thread = threading.Thread(
|
|
254
|
+
target=self._live_stream_worker,
|
|
255
|
+
args=(symbol, start_time),
|
|
256
|
+
daemon=True,
|
|
257
|
+
name=f"databento-producer-{symbol}"
|
|
258
|
+
)
|
|
259
|
+
producer_thread.start()
|
|
260
|
+
|
|
261
|
+
self._subscribed_symbols.add(symbol)
|
|
262
|
+
self._producer_threads[symbol] = producer_thread
|
|
263
|
+
logger.debug(f"[DATABENTO][LIVE] Producer started for {symbol}")
|
|
264
|
+
|
|
265
|
+
except Exception as e:
|
|
266
|
+
logger.error(f"[DATABENTO][LIVE] Failed to start producer for {symbol}: {e}", exc_info=True)
|
|
267
|
+
|
|
268
|
+
def _consumer_loop(self):
|
|
269
|
+
"""Consumer thread that processes records from the queue"""
|
|
270
|
+
logger.debug("[DATABENTO][CONSUMER] Started")
|
|
271
|
+
trade_count = 0
|
|
272
|
+
|
|
273
|
+
while not self._stop_streaming:
|
|
274
|
+
try:
|
|
275
|
+
# Get record from queue with timeout
|
|
276
|
+
symbol, record = self._record_queue.get(timeout=1.0)
|
|
277
|
+
|
|
278
|
+
# Handle symbol mappings
|
|
279
|
+
if hasattr(record, '__class__') and record.__class__.__name__ == 'SymbolMappingMsg':
|
|
280
|
+
instrument_id = getattr(record, 'instrument_id', None)
|
|
281
|
+
if instrument_id:
|
|
282
|
+
for attr in ['raw_symbol', 'stype_out_symbol', 'symbol']:
|
|
283
|
+
mapped_symbol = getattr(record, attr, None)
|
|
284
|
+
if mapped_symbol:
|
|
285
|
+
self._symbol_mapping[instrument_id] = mapped_symbol
|
|
286
|
+
logger.debug(f"[DATABENTO][CONSUMER] Symbol mapping: {instrument_id} -> {mapped_symbol}")
|
|
287
|
+
break
|
|
288
|
+
|
|
289
|
+
# Process trade messages
|
|
290
|
+
elif hasattr(record, '__class__') and record.__class__.__name__ == 'TradeMsg':
|
|
291
|
+
instrument_id = getattr(record, 'instrument_id', None)
|
|
292
|
+
|
|
293
|
+
# Try to get symbol from mapping or use provided symbol
|
|
294
|
+
actual_symbol = self._symbol_mapping.get(instrument_id, symbol)
|
|
295
|
+
|
|
296
|
+
# Process the trade
|
|
297
|
+
if actual_symbol:
|
|
298
|
+
self._last_trade_time[actual_symbol] = datetime.now(timezone.utc)
|
|
299
|
+
trade_count += 1
|
|
300
|
+
|
|
301
|
+
# Log only first few trades for verification
|
|
302
|
+
raw_price = getattr(record, 'price', 0)
|
|
303
|
+
price = raw_price / 1e9 if raw_price > 1e10 else raw_price
|
|
304
|
+
size = getattr(record, 'size', 0)
|
|
305
|
+
ts_event = getattr(record, 'ts_event', 0)
|
|
306
|
+
trade_dt = datetime.fromtimestamp(ts_event / 1e9, tz=timezone.utc)
|
|
307
|
+
|
|
308
|
+
if trade_count <= 3:
|
|
309
|
+
logger.debug(f"[DATABENTO][CONSUMER] Trade #{trade_count} {actual_symbol} @ {price:.2f} size={size}")
|
|
310
|
+
|
|
311
|
+
# Update live trade cache
|
|
312
|
+
self._record_live_trade(actual_symbol, price, size, trade_dt)
|
|
313
|
+
|
|
314
|
+
# Aggregate the trade into minute bars
|
|
315
|
+
self._aggregate_trade(actual_symbol, price, size, trade_dt)
|
|
316
|
+
|
|
317
|
+
elif hasattr(record, '__class__') and record.__class__.__name__ in {"Mbp1Msg", "BboMsg", "QuoteMsg"}:
|
|
318
|
+
actual_symbol = getattr(record, 'symbol', symbol)
|
|
319
|
+
bid_px = getattr(record, 'bid_px', None)
|
|
320
|
+
ask_px = getattr(record, 'ask_px', None)
|
|
321
|
+
bid_sz = getattr(record, 'bid_sz', None)
|
|
322
|
+
ask_sz = getattr(record, 'ask_sz', None)
|
|
323
|
+
ts_event = getattr(record, 'ts_event', None)
|
|
324
|
+
|
|
325
|
+
if bid_px is not None or ask_px is not None:
|
|
326
|
+
# Normalize units (DataBento quotes may be scaled by 1e9)
|
|
327
|
+
def _normalize(val):
|
|
328
|
+
if val is None:
|
|
329
|
+
return None
|
|
330
|
+
return float(val) / 1e9 if val > 1e10 else float(val)
|
|
331
|
+
|
|
332
|
+
bid_price = _normalize(bid_px)
|
|
333
|
+
ask_price = _normalize(ask_px)
|
|
334
|
+
bid_size = float(bid_sz) if bid_sz is not None else None
|
|
335
|
+
ask_size = float(ask_sz) if ask_sz is not None else None
|
|
336
|
+
ts_dt = datetime.fromtimestamp(ts_event / 1e9, tz=timezone.utc) if ts_event else datetime.now(timezone.utc)
|
|
337
|
+
|
|
338
|
+
self._record_live_quote(actual_symbol, bid_price, ask_price, bid_size, ask_size, ts_dt)
|
|
339
|
+
|
|
340
|
+
except queue.Empty:
|
|
341
|
+
continue
|
|
342
|
+
except Exception as e:
|
|
343
|
+
logger.error(f"[DATABENTO][CONSUMER] Error processing record: {e}")
|
|
344
|
+
|
|
345
|
+
logger.info(f"[DATABENTO][CONSUMER] Stopped after {trade_count} trades")
|
|
346
|
+
|
|
347
|
+
def _finalizer_loop(self):
|
|
348
|
+
"""Finalizer thread that marks old bars as complete"""
|
|
349
|
+
logger.debug("[DATABENTO][FINALIZER] Started")
|
|
350
|
+
|
|
351
|
+
while not self._stop_streaming:
|
|
352
|
+
try:
|
|
353
|
+
time.sleep(5) # Check every 5 seconds
|
|
354
|
+
|
|
355
|
+
current_time = datetime.now(timezone.utc)
|
|
356
|
+
cutoff_time = current_time - timedelta(seconds=self._finalize_grace_seconds)
|
|
357
|
+
cutoff_minute = cutoff_time.replace(second=0, microsecond=0)
|
|
358
|
+
|
|
359
|
+
with self._bars_lock:
|
|
360
|
+
for symbol in list(self._minute_bars.keys()):
|
|
361
|
+
# Finalize minutes that are complete
|
|
362
|
+
for minute_dt in list(self._minute_bars[symbol].keys()):
|
|
363
|
+
if minute_dt < cutoff_minute and minute_dt not in self._finalized_minutes[symbol]:
|
|
364
|
+
self._finalized_minutes[symbol].add(minute_dt)
|
|
365
|
+
bar = self._minute_bars[symbol][minute_dt]
|
|
366
|
+
logger.debug(f"[DATABENTO][FINALIZER] Finalized {symbol} bar at {minute_dt}: OHLC={bar['open']:.2f}/{bar['high']:.2f}/{bar['low']:.2f}/{bar['close']:.2f} vol={bar['volume']}")
|
|
367
|
+
|
|
368
|
+
# Prune old bars to prevent unlimited memory growth
|
|
369
|
+
prune_before = current_time - timedelta(minutes=self._prune_older_minutes)
|
|
370
|
+
old_minutes = [dt for dt in self._minute_bars[symbol].keys() if dt < prune_before]
|
|
371
|
+
for old_dt in old_minutes:
|
|
372
|
+
del self._minute_bars[symbol][old_dt]
|
|
373
|
+
self._finalized_minutes[symbol].discard(old_dt)
|
|
374
|
+
|
|
375
|
+
if old_minutes:
|
|
376
|
+
logger.debug(f"[DATABENTO][FINALIZER] Pruned {len(old_minutes)} old bars for {symbol}")
|
|
377
|
+
|
|
378
|
+
except Exception as e:
|
|
379
|
+
logger.error(f"[DATABENTO][FINALIZER] Error: {e}")
|
|
380
|
+
|
|
381
|
+
logger.info("[DATABENTO][FINALIZER] Stopped")
|
|
382
|
+
|
|
383
|
+
def _aggregate_trade(self, symbol: str, price: float, size: float, trade_time: datetime):
|
|
384
|
+
"""Aggregate a trade into minute bars"""
|
|
385
|
+
minute = trade_time.replace(second=0, microsecond=0)
|
|
386
|
+
|
|
387
|
+
# Skip if already finalized
|
|
388
|
+
if minute in self._finalized_minutes[symbol]:
|
|
389
|
+
return
|
|
390
|
+
|
|
391
|
+
# Get current time to check if bar should be finalized
|
|
392
|
+
current_time = datetime.now(timezone.utc)
|
|
393
|
+
current_minute = current_time.replace(second=0, microsecond=0)
|
|
394
|
+
|
|
395
|
+
# Initialize symbol's bar dict if needed
|
|
396
|
+
if symbol not in self._minute_bars:
|
|
397
|
+
self._minute_bars[symbol] = {}
|
|
398
|
+
|
|
399
|
+
# Create or update the minute bar
|
|
400
|
+
if minute not in self._minute_bars[symbol]:
|
|
401
|
+
# New minute bar
|
|
402
|
+
self._minute_bars[symbol][minute] = {
|
|
403
|
+
'datetime': minute,
|
|
404
|
+
'open': price,
|
|
405
|
+
'high': price,
|
|
406
|
+
'low': price,
|
|
407
|
+
'close': price,
|
|
408
|
+
'volume': size
|
|
409
|
+
}
|
|
410
|
+
logger.debug(f"[DATABENTO][LIVE] New minute bar: {symbol} {minute} @ {price:.2f}")
|
|
411
|
+
else:
|
|
412
|
+
# Update existing bar
|
|
413
|
+
bar = self._minute_bars[symbol][minute]
|
|
414
|
+
bar['high'] = max(bar['high'], price)
|
|
415
|
+
bar['low'] = min(bar['low'], price)
|
|
416
|
+
bar['close'] = price
|
|
417
|
+
bar['volume'] += size
|
|
418
|
+
|
|
419
|
+
# Finalize old bars (anything older than current minute)
|
|
420
|
+
for bar_minute in list(self._minute_bars[symbol].keys()):
|
|
421
|
+
if bar_minute < current_minute and bar_minute not in self._finalized_minutes[symbol]:
|
|
422
|
+
self._finalized_minutes[symbol].add(bar_minute)
|
|
423
|
+
logger.debug(f"[DATABENTO][LIVE] Finalized bar: {symbol} {bar_minute}")
|
|
424
|
+
|
|
425
|
+
def _get_live_tail(self, symbol: str, after_dt: datetime) -> Optional[pl.DataFrame]:
|
|
426
|
+
"""Get finalized live bars newer than after_dt"""
|
|
427
|
+
if symbol not in self._minute_bars or not self._minute_bars[symbol]:
|
|
428
|
+
return None
|
|
429
|
+
|
|
430
|
+
current_minute = datetime.now(timezone.utc).replace(second=0, microsecond=0)
|
|
431
|
+
|
|
432
|
+
# Get finalized bars newer than after_dt
|
|
433
|
+
tail_bars = []
|
|
434
|
+
for minute, bar_data in sorted(self._minute_bars[symbol].items()):
|
|
435
|
+
if minute > after_dt and minute < current_minute:
|
|
436
|
+
# Only include core OHLCV data to match historical schema
|
|
437
|
+
simple_bar = {
|
|
438
|
+
'datetime': bar_data['datetime'],
|
|
439
|
+
'open': bar_data['open'],
|
|
440
|
+
'high': bar_data['high'],
|
|
441
|
+
'low': bar_data['low'],
|
|
442
|
+
'close': bar_data['close'],
|
|
443
|
+
'volume': bar_data['volume']
|
|
444
|
+
}
|
|
445
|
+
tail_bars.append(simple_bar)
|
|
446
|
+
|
|
447
|
+
if not tail_bars:
|
|
448
|
+
return None
|
|
449
|
+
|
|
450
|
+
df = pl.DataFrame(tail_bars).sort('datetime')
|
|
451
|
+
df = _ensure_polars_tz(df)
|
|
452
|
+
logger.debug(f"[DATABENTO][LIVE] Collected {len(df)} tail bars after {after_dt}")
|
|
453
|
+
return df
|
|
454
|
+
|
|
455
|
+
def _record_live_trade(self, symbol: str, price: float, size: float, trade_time: datetime):
|
|
456
|
+
"""Cache the latest trade for fast quote/price lookups."""
|
|
457
|
+
with self._live_cache_lock:
|
|
458
|
+
self._latest_trades[symbol] = {
|
|
459
|
+
"price": price,
|
|
460
|
+
"size": size,
|
|
461
|
+
"event_time": trade_time,
|
|
462
|
+
"received_at": datetime.now(timezone.utc)
|
|
463
|
+
}
|
|
464
|
+
self._stale_warning_issued.pop(symbol, None)
|
|
465
|
+
|
|
466
|
+
def _record_live_quote(
|
|
467
|
+
self,
|
|
468
|
+
symbol: str,
|
|
469
|
+
bid: Optional[float],
|
|
470
|
+
ask: Optional[float],
|
|
471
|
+
bid_size: Optional[float],
|
|
472
|
+
ask_size: Optional[float],
|
|
473
|
+
quote_time: datetime,
|
|
474
|
+
):
|
|
475
|
+
with self._live_cache_lock:
|
|
476
|
+
self._latest_quotes[symbol] = {
|
|
477
|
+
"bid": bid,
|
|
478
|
+
"ask": ask,
|
|
479
|
+
"bid_size": bid_size,
|
|
480
|
+
"ask_size": ask_size,
|
|
481
|
+
"event_time": quote_time,
|
|
482
|
+
"received_at": datetime.now(timezone.utc)
|
|
483
|
+
}
|
|
484
|
+
self._stale_warning_issued.pop(symbol, None)
|
|
485
|
+
|
|
486
|
+
def _get_live_trade(self, symbol: str) -> Optional[dict]:
|
|
487
|
+
with self._live_cache_lock:
|
|
488
|
+
return self._latest_trades.get(symbol)
|
|
489
|
+
|
|
490
|
+
def _get_live_quote(self, symbol: str) -> Optional[dict]:
|
|
491
|
+
with self._live_cache_lock:
|
|
492
|
+
return self._latest_quotes.get(symbol)
|
|
493
|
+
|
|
494
|
+
def _is_live_entry_fresh(self, entry: Optional[dict]) -> bool:
|
|
495
|
+
if not entry:
|
|
496
|
+
return False
|
|
497
|
+
received_at = entry.get("received_at")
|
|
498
|
+
if not received_at:
|
|
499
|
+
return False
|
|
500
|
+
return datetime.now(timezone.utc) - received_at <= self._max_live_age
|
|
501
|
+
|
|
502
|
+
def _warn_stale(self, symbol: str, context: str):
|
|
503
|
+
if not self._stale_warning_issued.get(symbol):
|
|
504
|
+
logger.warning(f"[DATABENTO][LIVE] Falling back to historical data for {symbol} ({context})")
|
|
505
|
+
self._stale_warning_issued[symbol] = True
|
|
506
|
+
|
|
507
|
+
def _resolve_futures_symbol(self, asset: Asset, reference_date: datetime = None) -> str:
|
|
508
|
+
"""Resolve asset to specific futures contract symbol"""
|
|
509
|
+
if asset.asset_type in [Asset.AssetType.FUTURE, Asset.AssetType.CONT_FUTURE]:
|
|
510
|
+
# For continuous futures, resolve to specific contract
|
|
511
|
+
if asset.asset_type == Asset.AssetType.CONT_FUTURE:
|
|
512
|
+
if hasattr(asset, 'resolve_continuous_futures_contract'):
|
|
513
|
+
return asset.resolve_continuous_futures_contract(
|
|
514
|
+
reference_date=reference_date,
|
|
515
|
+
year_digits=1,
|
|
516
|
+
)
|
|
517
|
+
|
|
518
|
+
# Manual resolution for common futures
|
|
519
|
+
symbol = asset.symbol.upper()
|
|
520
|
+
month = reference_date.month if reference_date else datetime.now().month
|
|
521
|
+
year = reference_date.year if reference_date else datetime.now().year
|
|
522
|
+
|
|
523
|
+
# Quarterly contracts
|
|
524
|
+
if month <= 3:
|
|
525
|
+
month_code = 'H'
|
|
526
|
+
elif month <= 6:
|
|
527
|
+
month_code = 'M'
|
|
528
|
+
elif month <= 9:
|
|
529
|
+
month_code = 'U'
|
|
530
|
+
else:
|
|
531
|
+
month_code = 'Z'
|
|
532
|
+
|
|
533
|
+
year_digit = year % 10
|
|
534
|
+
|
|
535
|
+
if symbol in ["ES", "NQ", "RTY", "YM", "MES", "MNQ", "MYM", "M2K", "CL", "GC", "SI"]:
|
|
536
|
+
return f"{symbol}{month_code}{year_digit}"
|
|
537
|
+
|
|
538
|
+
return asset.symbol
|
|
539
|
+
|
|
540
|
+
return asset.symbol
|
|
541
|
+
|
|
542
|
+
def get_historical_prices(
|
|
543
|
+
self,
|
|
544
|
+
asset: Asset,
|
|
545
|
+
length: int,
|
|
546
|
+
timestep: str = "minute",
|
|
547
|
+
timeshift: Optional[timedelta] = None,
|
|
548
|
+
quote: Optional[Asset] = None,
|
|
549
|
+
exchange: Optional[str] = None,
|
|
550
|
+
include_after_hours: bool = True,
|
|
551
|
+
return_polars: bool = False
|
|
552
|
+
) -> Optional[Bars]:
|
|
553
|
+
"""Get historical prices with live tail merge"""
|
|
554
|
+
|
|
555
|
+
# Validate asset type
|
|
556
|
+
if asset.asset_type not in [Asset.AssetType.FUTURE, Asset.AssetType.CONT_FUTURE]:
|
|
557
|
+
logger.error(f"DataBento only supports futures. Got: {asset.asset_type}")
|
|
558
|
+
return None
|
|
559
|
+
|
|
560
|
+
# Calculate time range
|
|
561
|
+
current_time = datetime.now(timezone.utc)
|
|
562
|
+
if timeshift:
|
|
563
|
+
current_time = current_time - timeshift
|
|
564
|
+
|
|
565
|
+
# Determine time range
|
|
566
|
+
if timestep == "minute":
|
|
567
|
+
time_needed = timedelta(minutes=max(length * 3, 30))
|
|
568
|
+
elif timestep == "hour":
|
|
569
|
+
time_needed = timedelta(hours=max(length * 2, 12))
|
|
570
|
+
else:
|
|
571
|
+
time_needed = timedelta(days=max(length * 2, 10))
|
|
572
|
+
|
|
573
|
+
start_time = current_time - time_needed
|
|
574
|
+
|
|
575
|
+
# Resolve to specific contract
|
|
576
|
+
symbol = self._resolve_futures_symbol(asset, current_time)
|
|
577
|
+
logger.debug(f"Resolved {asset.symbol} to {symbol}")
|
|
578
|
+
|
|
579
|
+
# Subscribe to live stream if enabled (only for recent data gap)
|
|
580
|
+
if self.enable_live_stream and symbol not in self._subscribed_symbols:
|
|
581
|
+
# Live API can only replay recent data (last ~30 minutes)
|
|
582
|
+
live_start_time = current_time - timedelta(minutes=30)
|
|
583
|
+
self._subscribe_to_symbol(symbol, live_start_time, min_bars=30)
|
|
584
|
+
# Give it a moment to start receiving data
|
|
585
|
+
time.sleep(0.5)
|
|
586
|
+
|
|
587
|
+
# Get historical data
|
|
588
|
+
logger.debug(f"[DATABENTO][HIST] Fetching {symbol} from {start_time} to {current_time}")
|
|
589
|
+
|
|
590
|
+
df = databento_helper_polars.get_price_data_from_databento_polars(
|
|
591
|
+
api_key=self._api_key,
|
|
592
|
+
asset=asset,
|
|
593
|
+
start=start_time,
|
|
594
|
+
end=current_time,
|
|
595
|
+
timestep=timestep,
|
|
596
|
+
venue=exchange,
|
|
597
|
+
force_cache_update=False
|
|
598
|
+
)
|
|
599
|
+
|
|
600
|
+
if df is not None and not df.is_empty():
|
|
601
|
+
# Try to append live tail if available
|
|
602
|
+
if self.enable_live_stream and 'datetime' in df.columns:
|
|
603
|
+
try:
|
|
604
|
+
hist_last = df['datetime'].max()
|
|
605
|
+
# Ensure hist_last is timezone-aware
|
|
606
|
+
if not hasattr(hist_last, 'tzinfo') or hist_last.tzinfo is None:
|
|
607
|
+
from datetime import timezone as tz
|
|
608
|
+
hist_last = hist_last.replace(tzinfo=tz.utc)
|
|
609
|
+
tail_df = self._get_live_tail(symbol, hist_last)
|
|
610
|
+
|
|
611
|
+
# Debug: check live bar status
|
|
612
|
+
if symbol in self._minute_bars:
|
|
613
|
+
live_bar_count = len(self._minute_bars[symbol])
|
|
614
|
+
finalized_count = len(self._finalized_minutes.get(symbol, []))
|
|
615
|
+
logger.debug(f"[DATABENTO][DEBUG] {symbol} has {live_bar_count} total bars, {finalized_count} finalized")
|
|
616
|
+
else:
|
|
617
|
+
logger.debug(f"[DATABENTO][DEBUG] No live bars for {symbol}")
|
|
618
|
+
|
|
619
|
+
if tail_df is not None and not tail_df.is_empty():
|
|
620
|
+
# Make sure both dataframes have the same columns and types
|
|
621
|
+
try:
|
|
622
|
+
# Ensure timezone compatibility
|
|
623
|
+
hist_tz_info = df['datetime'].dtype
|
|
624
|
+
tail_tz_info = tail_df['datetime'].dtype
|
|
625
|
+
|
|
626
|
+
logger.debug(f"[DATABENTO][MERGE] Historical datetime: {hist_tz_info}, Live datetime: {tail_tz_info}")
|
|
627
|
+
|
|
628
|
+
df = _ensure_polars_tz(df)
|
|
629
|
+
tail_df = _ensure_polars_tz(tail_df)
|
|
630
|
+
|
|
631
|
+
# Only keep columns that exist in both dataframes
|
|
632
|
+
common_columns = [col for col in df.columns if col in tail_df.columns]
|
|
633
|
+
df_subset = df.select(common_columns)
|
|
634
|
+
tail_subset = tail_df.select(common_columns)
|
|
635
|
+
|
|
636
|
+
# Ensure numeric columns have compatible types
|
|
637
|
+
for col in common_columns:
|
|
638
|
+
if col != 'datetime': # Don't modify datetime
|
|
639
|
+
df_dtype = df_subset[col].dtype
|
|
640
|
+
tail_dtype = tail_subset[col].dtype
|
|
641
|
+
|
|
642
|
+
# Convert both to Float64 for compatibility
|
|
643
|
+
if df_dtype != tail_dtype:
|
|
644
|
+
logger.debug(f"[DATABENTO][MERGE] Converting {col}: {df_dtype} vs {tail_dtype} -> Float64")
|
|
645
|
+
df_subset = df_subset.with_columns(pl.col(col).cast(pl.Float64))
|
|
646
|
+
tail_subset = tail_subset.with_columns(pl.col(col).cast(pl.Float64))
|
|
647
|
+
|
|
648
|
+
# Merge the data and drop duplicate minutes (keep latest)
|
|
649
|
+
merged_df = pl.concat([df_subset, tail_subset]).sort('datetime')
|
|
650
|
+
merged_df = merged_df.unique(subset=['datetime'], keep='last').sort('datetime')
|
|
651
|
+
|
|
652
|
+
# If original df had more columns, merge them back
|
|
653
|
+
if len(df.columns) > len(common_columns):
|
|
654
|
+
extra_cols = [col for col in df.columns if col not in common_columns]
|
|
655
|
+
df_extra = df.select(['datetime'] + extra_cols)
|
|
656
|
+
merged_df = merged_df.join(df_extra, on='datetime', how='left')
|
|
657
|
+
|
|
658
|
+
df = merged_df
|
|
659
|
+
logger.debug(f"[DATABENTO][MERGE] Successfully appended {len(tail_df)} live bars")
|
|
660
|
+
|
|
661
|
+
except Exception as merge_e:
|
|
662
|
+
logger.error(f"[DATABENTO][MERGE] All merge attempts failed: {merge_e}")
|
|
663
|
+
# Last resort - just log what we have
|
|
664
|
+
hist_latest = df['datetime'].max() if 'datetime' in df.columns else None
|
|
665
|
+
tail_latest = tail_df['datetime'].max() if 'datetime' in tail_df.columns else None
|
|
666
|
+
logger.error(f"[DATABENTO][MERGE] Historical latest: {hist_latest}, Live latest: {tail_latest}")
|
|
667
|
+
# Continue with historical data only
|
|
668
|
+
else:
|
|
669
|
+
lag = (current_time - hist_last).total_seconds()
|
|
670
|
+
logger.debug(f"[DATABENTO][MERGE] No live tail bars (lag={lag:.0f}s)")
|
|
671
|
+
|
|
672
|
+
except Exception as e:
|
|
673
|
+
logger.warning(f"[DATABENTO][MERGE] Failed to merge live tail: {e}")
|
|
674
|
+
|
|
675
|
+
# Trim to requested length
|
|
676
|
+
df = df.tail(length)
|
|
677
|
+
df = _ensure_polars_tz(df)
|
|
678
|
+
return Bars(
|
|
679
|
+
df=df,
|
|
680
|
+
source=self.SOURCE,
|
|
681
|
+
asset=asset,
|
|
682
|
+
quote=quote,
|
|
683
|
+
return_polars=return_polars,
|
|
684
|
+
tzinfo=self.tzinfo,
|
|
685
|
+
)
|
|
686
|
+
|
|
687
|
+
return None
|
|
688
|
+
|
|
689
|
+
def get_last_price(self, asset: Asset, quote: Optional[Asset] = None, exchange: Optional[str] = None) -> Optional[float]:
|
|
690
|
+
"""Get the last price for an asset"""
|
|
691
|
+
symbol = self._resolve_futures_symbol(asset)
|
|
692
|
+
|
|
693
|
+
# Try live tick cache
|
|
694
|
+
if self.enable_live_stream:
|
|
695
|
+
if symbol not in self._subscribed_symbols:
|
|
696
|
+
self._subscribe_to_symbol(symbol)
|
|
697
|
+
|
|
698
|
+
trade_entry = self._get_live_trade(symbol)
|
|
699
|
+
if self._is_live_entry_fresh(trade_entry):
|
|
700
|
+
return float(trade_entry["price"])
|
|
701
|
+
else:
|
|
702
|
+
self._warn_stale(symbol, "stale trade cache")
|
|
703
|
+
|
|
704
|
+
# Fallback to historical
|
|
705
|
+
bars = self.get_historical_prices(asset, 1, "minute", exchange=exchange)
|
|
706
|
+
if bars and len(bars) > 0:
|
|
707
|
+
return float(bars.df['close'].tail(1).item())
|
|
708
|
+
|
|
709
|
+
return None
|
|
710
|
+
|
|
711
|
+
def get_quote(self, asset: Asset, quote: Optional[Asset] = None, exchange: Optional[str] = None) -> Quote:
|
|
712
|
+
symbol = self._resolve_futures_symbol(asset)
|
|
713
|
+
bid = ask = price = bid_size = ask_size = None
|
|
714
|
+
event_time = datetime.now(timezone.utc)
|
|
715
|
+
age_ms = None
|
|
716
|
+
|
|
717
|
+
if self.enable_live_stream:
|
|
718
|
+
if symbol not in self._subscribed_symbols:
|
|
719
|
+
self._subscribe_to_symbol(symbol)
|
|
720
|
+
|
|
721
|
+
quote_entry = self._get_live_quote(symbol)
|
|
722
|
+
trade_entry = self._get_live_trade(symbol)
|
|
723
|
+
|
|
724
|
+
if self._is_live_entry_fresh(quote_entry):
|
|
725
|
+
bid = quote_entry.get("bid")
|
|
726
|
+
ask = quote_entry.get("ask")
|
|
727
|
+
bid_size = quote_entry.get("bid_size")
|
|
728
|
+
ask_size = quote_entry.get("ask_size")
|
|
729
|
+
event_time = quote_entry.get("event_time", event_time)
|
|
730
|
+
age_ms = int((datetime.now(timezone.utc) - quote_entry["received_at"]).total_seconds() * 1000)
|
|
731
|
+
|
|
732
|
+
if trade_entry and self._is_live_entry_fresh(trade_entry):
|
|
733
|
+
price = trade_entry.get("price")
|
|
734
|
+
elif bid is not None and ask is not None:
|
|
735
|
+
price = (bid + ask) / 2
|
|
736
|
+
elif self._is_live_entry_fresh(trade_entry):
|
|
737
|
+
price = trade_entry.get("price")
|
|
738
|
+
event_time = trade_entry.get("event_time", event_time)
|
|
739
|
+
age_ms = int((datetime.now(timezone.utc) - trade_entry["received_at"]).total_seconds() * 1000)
|
|
740
|
+
|
|
741
|
+
tick = 0.25 if price is not None else 0.25
|
|
742
|
+
if price is not None:
|
|
743
|
+
bid = price - tick / 2
|
|
744
|
+
ask = price + tick / 2
|
|
745
|
+
else:
|
|
746
|
+
self._warn_stale(symbol, "stale quote cache")
|
|
747
|
+
|
|
748
|
+
if price is None:
|
|
749
|
+
last_price = self.get_last_price(asset, quote=quote, exchange=exchange)
|
|
750
|
+
price = last_price
|
|
751
|
+
if last_price is not None and bid is None and ask is None:
|
|
752
|
+
tick = 0.25
|
|
753
|
+
bid = last_price - tick / 2
|
|
754
|
+
ask = last_price + tick / 2
|
|
755
|
+
|
|
756
|
+
return Quote(
|
|
757
|
+
asset=asset,
|
|
758
|
+
price=price,
|
|
759
|
+
bid=bid,
|
|
760
|
+
ask=ask,
|
|
761
|
+
bid_size=bid_size,
|
|
762
|
+
ask_size=ask_size,
|
|
763
|
+
timestamp=event_time,
|
|
764
|
+
quote_time=event_time,
|
|
765
|
+
raw_data={
|
|
766
|
+
"source": "databento_live" if self.enable_live_stream else "databento_rest",
|
|
767
|
+
"age_ms": age_ms,
|
|
768
|
+
}
|
|
769
|
+
)
|
|
770
|
+
|
|
771
|
+
def get_chains(self, asset: Asset, quote: Asset = None, exchange: str = None) -> dict:
|
|
772
|
+
"""Get option chains - not supported for futures"""
|
|
773
|
+
logger.warning("DataBento does not support option chains")
|
|
774
|
+
return {"Chains": {}, "Multiplier": 1, "Exchange": exchange or ""}
|
|
775
|
+
|
|
776
|
+
def __del__(self):
|
|
777
|
+
"""Cleanup on deletion"""
|
|
778
|
+
if hasattr(self, '_stop_streaming'):
|
|
779
|
+
self._stop_streaming = True
|
|
780
|
+
|
|
781
|
+
# Stop all producer threads
|
|
782
|
+
if hasattr(self, '_producer_threads'):
|
|
783
|
+
for symbol, thread in self._producer_threads.items():
|
|
784
|
+
if thread and thread.is_alive():
|
|
785
|
+
thread.join(timeout=1)
|
|
786
|
+
|
|
787
|
+
# Stop consumer thread
|
|
788
|
+
if hasattr(self, '_consumer_thread') and self._consumer_thread:
|
|
789
|
+
self._consumer_thread.join(timeout=1)
|
|
790
|
+
|
|
791
|
+
# Stop finalizer thread
|
|
792
|
+
if hasattr(self, '_finalizer_thread') and self._finalizer_thread:
|
|
793
|
+
self._finalizer_thread.join(timeout=1)
|