lumibot 4.0.23__py3-none-any.whl → 4.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lumibot might be problematic. Click here for more details.

Files changed (160) hide show
  1. lumibot/__pycache__/__init__.cpython-312.pyc +0 -0
  2. lumibot/__pycache__/constants.cpython-312.pyc +0 -0
  3. lumibot/__pycache__/credentials.cpython-312.pyc +0 -0
  4. lumibot/backtesting/__init__.py +6 -5
  5. lumibot/backtesting/__pycache__/__init__.cpython-312.pyc +0 -0
  6. lumibot/backtesting/__pycache__/alpaca_backtesting.cpython-312.pyc +0 -0
  7. lumibot/backtesting/__pycache__/alpha_vantage_backtesting.cpython-312.pyc +0 -0
  8. lumibot/backtesting/__pycache__/backtesting_broker.cpython-312.pyc +0 -0
  9. lumibot/backtesting/__pycache__/ccxt_backtesting.cpython-312.pyc +0 -0
  10. lumibot/backtesting/__pycache__/databento_backtesting.cpython-312.pyc +0 -0
  11. lumibot/backtesting/__pycache__/interactive_brokers_rest_backtesting.cpython-312.pyc +0 -0
  12. lumibot/backtesting/__pycache__/pandas_backtesting.cpython-312.pyc +0 -0
  13. lumibot/backtesting/__pycache__/polygon_backtesting.cpython-312.pyc +0 -0
  14. lumibot/backtesting/__pycache__/thetadata_backtesting.cpython-312.pyc +0 -0
  15. lumibot/backtesting/__pycache__/yahoo_backtesting.cpython-312.pyc +0 -0
  16. lumibot/backtesting/backtesting_broker.py +209 -9
  17. lumibot/backtesting/databento_backtesting.py +141 -24
  18. lumibot/backtesting/thetadata_backtesting.py +63 -42
  19. lumibot/brokers/__pycache__/__init__.cpython-312.pyc +0 -0
  20. lumibot/brokers/__pycache__/alpaca.cpython-312.pyc +0 -0
  21. lumibot/brokers/__pycache__/bitunix.cpython-312.pyc +0 -0
  22. lumibot/brokers/__pycache__/broker.cpython-312.pyc +0 -0
  23. lumibot/brokers/__pycache__/ccxt.cpython-312.pyc +0 -0
  24. lumibot/brokers/__pycache__/example_broker.cpython-312.pyc +0 -0
  25. lumibot/brokers/__pycache__/interactive_brokers.cpython-312.pyc +0 -0
  26. lumibot/brokers/__pycache__/interactive_brokers_rest.cpython-312.pyc +0 -0
  27. lumibot/brokers/__pycache__/projectx.cpython-312.pyc +0 -0
  28. lumibot/brokers/__pycache__/schwab.cpython-312.pyc +0 -0
  29. lumibot/brokers/__pycache__/tradier.cpython-312.pyc +0 -0
  30. lumibot/brokers/__pycache__/tradovate.cpython-312.pyc +0 -0
  31. lumibot/brokers/alpaca.py +11 -1
  32. lumibot/brokers/tradeovate.py +475 -0
  33. lumibot/components/grok_news_helper.py +284 -0
  34. lumibot/components/options_helper.py +90 -34
  35. lumibot/credentials.py +3 -0
  36. lumibot/data_sources/__pycache__/__init__.cpython-312.pyc +0 -0
  37. lumibot/data_sources/__pycache__/alpaca_data.cpython-312.pyc +0 -0
  38. lumibot/data_sources/__pycache__/alpha_vantage_data.cpython-312.pyc +0 -0
  39. lumibot/data_sources/__pycache__/bitunix_data.cpython-312.pyc +0 -0
  40. lumibot/data_sources/__pycache__/ccxt_backtesting_data.cpython-312.pyc +0 -0
  41. lumibot/data_sources/__pycache__/ccxt_data.cpython-312.pyc +0 -0
  42. lumibot/data_sources/__pycache__/data_source.cpython-312.pyc +0 -0
  43. lumibot/data_sources/__pycache__/data_source_backtesting.cpython-312.pyc +0 -0
  44. lumibot/data_sources/__pycache__/databento_data_polars_backtesting.cpython-312.pyc +0 -0
  45. lumibot/data_sources/__pycache__/databento_data_polars_live.cpython-312.pyc +0 -0
  46. lumibot/data_sources/__pycache__/example_broker_data.cpython-312.pyc +0 -0
  47. lumibot/data_sources/__pycache__/exceptions.cpython-312.pyc +0 -0
  48. lumibot/data_sources/__pycache__/interactive_brokers_data.cpython-312.pyc +0 -0
  49. lumibot/data_sources/__pycache__/interactive_brokers_rest_data.cpython-312.pyc +0 -0
  50. lumibot/data_sources/__pycache__/pandas_data.cpython-312.pyc +0 -0
  51. lumibot/data_sources/__pycache__/polars_mixin.cpython-312.pyc +0 -0
  52. lumibot/data_sources/__pycache__/polygon_data_polars.cpython-312.pyc +0 -0
  53. lumibot/data_sources/__pycache__/projectx_data.cpython-312.pyc +0 -0
  54. lumibot/data_sources/__pycache__/schwab_data.cpython-312.pyc +0 -0
  55. lumibot/data_sources/__pycache__/tradier_data.cpython-312.pyc +0 -0
  56. lumibot/data_sources/__pycache__/tradovate_data.cpython-312.pyc +0 -0
  57. lumibot/data_sources/__pycache__/yahoo_data_polars.cpython-312.pyc +0 -0
  58. lumibot/data_sources/data_source_backtesting.py +3 -5
  59. lumibot/data_sources/databento_data_polars_backtesting.py +194 -48
  60. lumibot/data_sources/pandas_data.py +6 -3
  61. lumibot/data_sources/polars_mixin.py +126 -21
  62. lumibot/data_sources/tradeovate_data.py +80 -0
  63. lumibot/data_sources/tradier_data.py +2 -1
  64. lumibot/entities/__pycache__/__init__.cpython-312.pyc +0 -0
  65. lumibot/entities/__pycache__/asset.cpython-312.pyc +0 -0
  66. lumibot/entities/__pycache__/bar.cpython-312.pyc +0 -0
  67. lumibot/entities/__pycache__/bars.cpython-312.pyc +0 -0
  68. lumibot/entities/__pycache__/chains.cpython-312.pyc +0 -0
  69. lumibot/entities/__pycache__/data.cpython-312.pyc +0 -0
  70. lumibot/entities/__pycache__/dataline.cpython-312.pyc +0 -0
  71. lumibot/entities/__pycache__/order.cpython-312.pyc +0 -0
  72. lumibot/entities/__pycache__/position.cpython-312.pyc +0 -0
  73. lumibot/entities/__pycache__/quote.cpython-312.pyc +0 -0
  74. lumibot/entities/__pycache__/trading_fee.cpython-312.pyc +0 -0
  75. lumibot/entities/asset.py +8 -0
  76. lumibot/entities/order.py +1 -1
  77. lumibot/entities/quote.py +14 -0
  78. lumibot/example_strategies/__pycache__/__init__.cpython-312.pyc +0 -0
  79. lumibot/example_strategies/__pycache__/test_broker_functions.cpython-312-pytest-8.4.1.pyc +0 -0
  80. lumibot/strategies/__pycache__/__init__.cpython-312.pyc +0 -0
  81. lumibot/strategies/__pycache__/_strategy.cpython-312.pyc +0 -0
  82. lumibot/strategies/__pycache__/strategy.cpython-312.pyc +0 -0
  83. lumibot/strategies/__pycache__/strategy_executor.cpython-312.pyc +0 -0
  84. lumibot/strategies/_strategy.py +95 -27
  85. lumibot/strategies/strategy.py +5 -6
  86. lumibot/strategies/strategy_executor.py +2 -2
  87. lumibot/tools/__pycache__/__init__.cpython-312.pyc +0 -0
  88. lumibot/tools/__pycache__/alpaca_helpers.cpython-312.pyc +0 -0
  89. lumibot/tools/__pycache__/bitunix_helpers.cpython-312.pyc +0 -0
  90. lumibot/tools/__pycache__/black_scholes.cpython-312.pyc +0 -0
  91. lumibot/tools/__pycache__/ccxt_data_store.cpython-312.pyc +0 -0
  92. lumibot/tools/__pycache__/databento_helper.cpython-312.pyc +0 -0
  93. lumibot/tools/__pycache__/databento_helper_polars.cpython-312.pyc +0 -0
  94. lumibot/tools/__pycache__/debugers.cpython-312.pyc +0 -0
  95. lumibot/tools/__pycache__/decorators.cpython-312.pyc +0 -0
  96. lumibot/tools/__pycache__/helpers.cpython-312.pyc +0 -0
  97. lumibot/tools/__pycache__/indicators.cpython-312.pyc +0 -0
  98. lumibot/tools/__pycache__/lumibot_logger.cpython-312.pyc +0 -0
  99. lumibot/tools/__pycache__/pandas.cpython-312.pyc +0 -0
  100. lumibot/tools/__pycache__/polygon_helper.cpython-312.pyc +0 -0
  101. lumibot/tools/__pycache__/polygon_helper_async.cpython-312.pyc +0 -0
  102. lumibot/tools/__pycache__/polygon_helper_polars_optimized.cpython-312.pyc +0 -0
  103. lumibot/tools/__pycache__/projectx_helpers.cpython-312.pyc +0 -0
  104. lumibot/tools/__pycache__/schwab_helper.cpython-312.pyc +0 -0
  105. lumibot/tools/__pycache__/thetadata_helper.cpython-312.pyc +0 -0
  106. lumibot/tools/__pycache__/types.cpython-312.pyc +0 -0
  107. lumibot/tools/__pycache__/yahoo_helper.cpython-312.pyc +0 -0
  108. lumibot/tools/__pycache__/yahoo_helper_polars_optimized.cpython-312.pyc +0 -0
  109. lumibot/tools/databento_helper.py +384 -133
  110. lumibot/tools/databento_helper_polars.py +218 -156
  111. lumibot/tools/databento_roll.py +216 -0
  112. lumibot/tools/lumibot_logger.py +32 -17
  113. lumibot/tools/polygon_helper.py +65 -0
  114. lumibot/tools/thetadata_helper.py +588 -70
  115. lumibot/traders/__pycache__/__init__.cpython-312.pyc +0 -0
  116. lumibot/traders/__pycache__/trader.cpython-312.pyc +0 -0
  117. lumibot/traders/trader.py +1 -1
  118. lumibot/trading_builtins/__pycache__/__init__.cpython-312.pyc +0 -0
  119. lumibot/trading_builtins/__pycache__/custom_stream.cpython-312.pyc +0 -0
  120. lumibot/trading_builtins/__pycache__/safe_list.cpython-312.pyc +0 -0
  121. {lumibot-4.0.23.dist-info → lumibot-4.1.0.dist-info}/METADATA +1 -2
  122. {lumibot-4.0.23.dist-info → lumibot-4.1.0.dist-info}/RECORD +160 -44
  123. tests/backtest/check_timing_offset.py +198 -0
  124. tests/backtest/check_volume_spike.py +112 -0
  125. tests/backtest/comprehensive_comparison.py +166 -0
  126. tests/backtest/debug_comparison.py +91 -0
  127. tests/backtest/diagnose_price_difference.py +97 -0
  128. tests/backtest/direct_api_comparison.py +203 -0
  129. tests/backtest/profile_thetadata_vs_polygon.py +255 -0
  130. tests/backtest/root_cause_analysis.py +109 -0
  131. tests/backtest/test_accuracy_verification.py +244 -0
  132. tests/backtest/test_daily_data_timestamp_comparison.py +801 -0
  133. tests/backtest/test_databento.py +4 -0
  134. tests/backtest/test_databento_comprehensive_trading.py +564 -0
  135. tests/backtest/test_debug_avg_fill_price.py +112 -0
  136. tests/backtest/test_dividends.py +8 -3
  137. tests/backtest/test_example_strategies.py +54 -47
  138. tests/backtest/test_futures_edge_cases.py +451 -0
  139. tests/backtest/test_futures_single_trade.py +270 -0
  140. tests/backtest/test_futures_ultra_simple.py +191 -0
  141. tests/backtest/test_index_data_verification.py +348 -0
  142. tests/backtest/test_polygon.py +45 -24
  143. tests/backtest/test_thetadata.py +246 -60
  144. tests/backtest/test_thetadata_comprehensive.py +729 -0
  145. tests/backtest/test_thetadata_vs_polygon.py +557 -0
  146. tests/backtest/test_yahoo.py +1 -2
  147. tests/conftest.py +20 -0
  148. tests/test_backtesting_data_source_env.py +249 -0
  149. tests/test_backtesting_quiet_logs_complete.py +10 -11
  150. tests/test_databento_helper.py +73 -86
  151. tests/test_databento_timezone_fixes.py +21 -4
  152. tests/test_get_historical_prices.py +6 -6
  153. tests/test_options_helper.py +162 -40
  154. tests/test_polygon_helper.py +21 -13
  155. tests/test_quiet_logs_requirements.py +5 -5
  156. tests/test_thetadata_helper.py +487 -171
  157. tests/test_yahoo_data.py +125 -0
  158. {lumibot-4.0.23.dist-info → lumibot-4.1.0.dist-info}/LICENSE +0 -0
  159. {lumibot-4.0.23.dist-info → lumibot-4.1.0.dist-info}/WHEEL +0 -0
  160. {lumibot-4.0.23.dist-info → lumibot-4.1.0.dist-info}/top_level.txt +0 -0
@@ -3,12 +3,13 @@ import os
3
3
  import re
4
4
  from datetime import date, datetime, timedelta, timezone
5
5
  from pathlib import Path
6
- from typing import Optional, List, Dict, Union
6
+ from typing import Optional, List, Dict, Tuple, Union
7
7
  from decimal import Decimal
8
8
 
9
9
  import pandas as pd
10
10
  from lumibot import LUMIBOT_CACHE_FOLDER
11
11
  from lumibot.entities import Asset
12
+ from lumibot.tools import databento_roll
12
13
 
13
14
  # Set up module-specific logger
14
15
  from lumibot.tools.lumibot_logger import get_logger
@@ -169,6 +170,88 @@ class DataBentoClient:
169
170
  # This should never be reached, but just in case
170
171
  raise Exception(f"DataBento request failed after {self.max_retries} retries")
171
172
 
173
+ def get_instrument_definition(
174
+ self,
175
+ dataset: str,
176
+ symbol: str,
177
+ reference_date: Union[str, datetime, date] = None
178
+ ) -> Optional[Dict]:
179
+ """
180
+ Get instrument definition (including multiplier) for a futures contract from DataBento.
181
+
182
+ Parameters
183
+ ----------
184
+ dataset : str
185
+ DataBento dataset identifier (e.g., 'GLBX.MDP3')
186
+ symbol : str
187
+ Symbol to retrieve definition for (e.g., 'MESH4', 'MES')
188
+ reference_date : str, datetime, or date, optional
189
+ Date to fetch definition for. If None, uses yesterday (to ensure data availability)
190
+
191
+ Returns
192
+ -------
193
+ dict or None
194
+ Instrument definition with fields like 'unit_of_measure_qty' (multiplier),
195
+ 'min_price_increment', 'expiration', etc. Returns None if not available.
196
+ """
197
+ try:
198
+ # Use yesterday if no reference date provided (ensures data is available)
199
+ if reference_date is None:
200
+ reference_date = datetime.now() - timedelta(days=1)
201
+
202
+ # Convert to date string
203
+ if isinstance(reference_date, datetime):
204
+ date_str = reference_date.strftime("%Y-%m-%d")
205
+ elif isinstance(reference_date, date):
206
+ date_str = reference_date.strftime("%Y-%m-%d")
207
+ else:
208
+ date_str = reference_date
209
+
210
+ logger.info(f"Fetching instrument definition for {symbol} from DataBento on {date_str}")
211
+
212
+ # Fetch instrument definition using 'definition' schema
213
+ # DataBento requires end > start, so add 1 day to end
214
+ from datetime import timedelta
215
+ if isinstance(reference_date, datetime):
216
+ end_date = (reference_date + timedelta(days=1)).strftime("%Y-%m-%d")
217
+ elif isinstance(reference_date, date):
218
+ end_date = (reference_date + timedelta(days=1)).strftime("%Y-%m-%d")
219
+ else:
220
+ # reference_date is a string
221
+ ref_dt = datetime.strptime(date_str, "%Y-%m-%d")
222
+ end_date = (ref_dt + timedelta(days=1)).strftime("%Y-%m-%d")
223
+
224
+ data = self.client.timeseries.get_range(
225
+ dataset=dataset,
226
+ symbols=[symbol],
227
+ schema="definition",
228
+ start=date_str,
229
+ end=end_date,
230
+ )
231
+
232
+ # Convert to DataFrame
233
+ if hasattr(data, 'to_df'):
234
+ df = data.to_df()
235
+ else:
236
+ df = pd.DataFrame(data)
237
+
238
+ if df.empty:
239
+ logger.warning(f"No instrument definition found for {symbol} on {date_str}")
240
+ return None
241
+
242
+ # Extract the first row as a dictionary
243
+ definition = df.iloc[0].to_dict()
244
+
245
+ # Log key fields
246
+ if 'unit_of_measure_qty' in definition:
247
+ logger.info(f"Found multiplier for {symbol}: {definition['unit_of_measure_qty']}")
248
+
249
+ return definition
250
+
251
+ except Exception as e:
252
+ logger.warning(f"Could not fetch instrument definition for {symbol}: {str(e)}")
253
+ return None
254
+
172
255
 
173
256
  def _convert_to_databento_format(symbol: str, asset_symbol: str = None) -> str:
174
257
  """
@@ -248,20 +331,31 @@ def _format_futures_symbol_for_databento(asset: Asset, reference_date: datetime
248
331
  ValueError
249
332
  If symbol resolution fails with actionable error message
250
333
  """
251
- symbol = asset.symbol
252
-
334
+ import re
335
+
336
+ symbol = asset.symbol.upper()
337
+
338
+ # Check if symbol already has contract month/year embedded (e.g., MESZ5, ESH24)
339
+ # Pattern: root + month code (F,G,H,J,K,M,N,Q,U,V,X,Z) + 1-2 digit year
340
+ has_contract_suffix = bool(re.match(r'^[A-Z]{1,4}[FGHJKMNQUVXZ]\d{1,2}$', symbol))
341
+
342
+ # If symbol already has contract month, return as-is
343
+ if has_contract_suffix:
344
+ logger.info(f"Symbol {symbol} already contains contract month/year, using as-is")
345
+ return symbol
346
+
253
347
  # For continuous contracts, resolve to active contract for the reference date
254
348
  if asset.asset_type == Asset.AssetType.CONT_FUTURE:
255
349
  logger.info(f"Resolving continuous futures symbol: {symbol}")
256
-
350
+
257
351
  # Use Asset class method for contract resolution
258
352
  resolved_symbol = asset.resolve_continuous_futures_contract(
259
353
  reference_date=reference_date,
260
354
  year_digits=1,
261
355
  )
262
-
356
+
263
357
  logger.info(f"Resolved continuous future {symbol} -> {resolved_symbol}")
264
-
358
+
265
359
  # Return format based on whether reference_date was provided
266
360
  if reference_date is not None:
267
361
  # When reference_date is provided, return full format (for DataBento helper tests)
@@ -270,7 +364,7 @@ def _format_futures_symbol_for_databento(asset: Asset, reference_date: datetime
270
364
  # When no reference_date, return DataBento format (for continuous futures resolution tests)
271
365
  databento_symbols = _generate_databento_symbol_alternatives(symbol, resolved_symbol)
272
366
  return databento_symbols[0] if databento_symbols else resolved_symbol
273
-
367
+
274
368
  # For specific futures contracts, format with expiration if provided
275
369
  if asset.asset_type == Asset.AssetType.FUTURE and asset.expiration:
276
370
  # DataBento uses month codes for specific contracts
@@ -278,20 +372,41 @@ def _format_futures_symbol_for_databento(asset: Asset, reference_date: datetime
278
372
  1: 'F', 2: 'G', 3: 'H', 4: 'J', 5: 'K', 6: 'M',
279
373
  7: 'N', 8: 'Q', 9: 'U', 10: 'V', 11: 'X', 12: 'Z'
280
374
  }
281
-
375
+
282
376
  year = asset.expiration.year % 100 # Last 2 digits of year for specific contracts
283
377
  month_code = month_codes.get(asset.expiration.month, 'H')
284
-
378
+
285
379
  # Format as SYMBOL{MONTH_CODE}{YY} (e.g., MESZ25 for December 2025)
286
380
  formatted_symbol = f"{symbol}{month_code}{year:02d}"
287
-
381
+
288
382
  logger.info(f"Formatted specific futures symbol: {asset.symbol} {asset.expiration} -> {formatted_symbol}")
289
-
383
+
290
384
  # For specific contracts, return full year format (not DataBento short format)
291
385
  return formatted_symbol
292
-
293
- # For regular futures without expiration, return raw symbol (no resolution)
294
- logger.info(f"Using raw futures symbol: {symbol}")
386
+
387
+ # IDIOT-PROOFING: If asset_type is FUTURE but no expiration, treat as continuous
388
+ if asset.asset_type == Asset.AssetType.FUTURE and not asset.expiration:
389
+ logger.warning(
390
+ f"Asset '{symbol}' has asset_type=FUTURE but no expiration specified. "
391
+ f"Auto-treating as continuous future and resolving to front month contract. "
392
+ f"To avoid this warning, use Asset.AssetType.CONT_FUTURE instead."
393
+ )
394
+ # Create temporary continuous futures asset and resolve
395
+ temp_asset = Asset(symbol=symbol, asset_type=Asset.AssetType.CONT_FUTURE)
396
+ resolved_symbol = temp_asset.resolve_continuous_futures_contract(
397
+ reference_date=reference_date,
398
+ year_digits=1,
399
+ )
400
+ logger.info(f"Auto-resolved future {symbol} -> {resolved_symbol}")
401
+
402
+ if reference_date is not None:
403
+ return resolved_symbol
404
+ else:
405
+ databento_symbols = _generate_databento_symbol_alternatives(symbol, resolved_symbol)
406
+ return databento_symbols[0] if databento_symbols else resolved_symbol
407
+
408
+ # For other asset types, return raw symbol
409
+ logger.info(f"Using raw symbol: {symbol}")
295
410
  return symbol
296
411
 
297
412
 
@@ -387,16 +502,29 @@ def _determine_databento_schema(timestep: str) -> str:
387
502
  return schema_mapping.get(timestep.lower(), 'ohlcv-1m')
388
503
 
389
504
 
390
- def _build_cache_filename(asset: Asset, start: datetime, end: datetime, timestep: str) -> Path:
391
- """Build a cache filename for the given parameters"""
392
- symbol = asset.symbol
393
- if asset.expiration:
505
+ def _build_cache_filename(
506
+ asset: Asset,
507
+ start: datetime,
508
+ end: datetime,
509
+ timestep: str,
510
+ symbol_override: Optional[str] = None,
511
+ ) -> Path:
512
+ """Build a cache filename for the given parameters."""
513
+ symbol = symbol_override or asset.symbol
514
+ if symbol_override is None and asset.expiration:
394
515
  symbol += f"_{asset.expiration.strftime('%Y%m%d')}"
395
-
396
- start_str = start.strftime('%Y%m%d')
397
- end_str = end.strftime('%Y%m%d')
398
- filename = f"{symbol}_{timestep}_{start_str}_{end_str}.parquet"
399
516
 
517
+ start_dt = start if isinstance(start, datetime) else datetime.combine(start, datetime.min.time())
518
+ end_dt = end if isinstance(end, datetime) else datetime.combine(end, datetime.min.time())
519
+
520
+ if (timestep or "").lower() in ("minute", "1m", "hour", "1h"):
521
+ start_str = start_dt.strftime("%Y%m%d%H%M")
522
+ end_str = end_dt.strftime("%Y%m%d%H%M")
523
+ else:
524
+ start_str = start_dt.strftime("%Y%m%d")
525
+ end_str = end_dt.strftime("%Y%m%d")
526
+
527
+ filename = f"{symbol}_{timestep}_{start_str}_{end_str}.parquet"
400
528
  return Path(LUMIBOT_DATABENTO_CACHE_FOLDER) / filename
401
529
 
402
530
 
@@ -457,6 +585,27 @@ def _save_cache(df: pd.DataFrame, cache_file: Path) -> None:
457
585
  logger.warning(f"Error saving cache file {cache_file}: {e}")
458
586
 
459
587
 
588
+ def _filter_front_month_rows_pandas(
589
+ df: pd.DataFrame,
590
+ schedule: List[Tuple[str, datetime, datetime]],
591
+ ) -> pd.DataFrame:
592
+ """Filter combined contract data so each timestamp uses the scheduled symbol."""
593
+ if df.empty or "symbol" not in df.columns or schedule is None:
594
+ return df
595
+
596
+ mask = pd.Series(False, index=df.index)
597
+ for symbol, start_dt, end_dt in schedule:
598
+ cond = df["symbol"] == symbol
599
+ if start_dt is not None:
600
+ cond &= df.index >= start_dt
601
+ if end_dt is not None:
602
+ cond &= df.index < end_dt
603
+ mask |= cond
604
+
605
+ filtered = df.loc[mask]
606
+ return filtered if not filtered.empty else df
607
+
608
+
460
609
  def _normalize_databento_dataframe(df: pd.DataFrame) -> pd.DataFrame:
461
610
  """
462
611
  Normalize DataBento DataFrame to Lumibot standard format
@@ -534,6 +683,84 @@ def _normalize_databento_dataframe(df: pd.DataFrame) -> pd.DataFrame:
534
683
  return df_norm
535
684
 
536
685
 
686
+ # Instrument definition cache: stores multipliers and contract specs (shared with polars)
687
+ _INSTRUMENT_DEFINITION_CACHE = {} # {(symbol, dataset): definition_dict}
688
+
689
+
690
+ def _fetch_and_update_futures_multiplier(
691
+ client: DataBentoClient,
692
+ asset: Asset,
693
+ resolved_symbol: str,
694
+ dataset: str = "GLBX.MDP3",
695
+ reference_date: Optional[datetime] = None
696
+ ) -> None:
697
+ """
698
+ Fetch futures contract multiplier from DataBento and update the asset in-place.
699
+ Uses caching to avoid repeated API calls.
700
+
701
+ Parameters
702
+ ----------
703
+ client : DataBentoClient
704
+ DataBento client instance
705
+ asset : Asset
706
+ Futures asset to fetch multiplier for (will be updated in-place)
707
+ resolved_symbol : str
708
+ The resolved contract symbol (e.g., "MESH4" for MES continuous)
709
+ dataset : str
710
+ DataBento dataset (default: GLBX.MDP3 for CME futures)
711
+ reference_date : datetime, optional
712
+ Reference date for fetching definition. If None, uses yesterday.
713
+ """
714
+ # Only fetch for futures contracts
715
+ if asset.asset_type not in (Asset.AssetType.FUTURE, Asset.AssetType.CONT_FUTURE):
716
+ logger.info(f"[MULTIPLIER] Skipping {asset.symbol} - not a futures contract (type={asset.asset_type})")
717
+ return
718
+
719
+ logger.info(f"[MULTIPLIER] Starting fetch for {asset.symbol}, current multiplier={asset.multiplier}")
720
+
721
+ # Skip if multiplier already set (and not default value of 1)
722
+ if asset.multiplier != 1:
723
+ logger.info(f"[MULTIPLIER] Asset {asset.symbol} already has multiplier={asset.multiplier}, skipping fetch")
724
+ return
725
+
726
+ # Use the resolved symbol for cache key
727
+ cache_key = (resolved_symbol, dataset)
728
+ logger.info(f"[MULTIPLIER] Cache key: {cache_key}, cache has {len(_INSTRUMENT_DEFINITION_CACHE)} entries")
729
+ if cache_key in _INSTRUMENT_DEFINITION_CACHE:
730
+ cached_def = _INSTRUMENT_DEFINITION_CACHE[cache_key]
731
+ if 'unit_of_measure_qty' in cached_def:
732
+ asset.multiplier = int(cached_def['unit_of_measure_qty'])
733
+ logger.info(f"[MULTIPLIER] ✓ Using cached multiplier for {resolved_symbol}: {asset.multiplier}")
734
+ return
735
+ else:
736
+ logger.warning(f"[MULTIPLIER] Cache entry exists but missing unit_of_measure_qty field")
737
+
738
+ # Fetch from DataBento using the RESOLVED symbol
739
+ logger.info(f"[MULTIPLIER] Fetching from DataBento for {resolved_symbol}, dataset={dataset}, ref_date={reference_date}")
740
+ definition = client.get_instrument_definition(
741
+ dataset=dataset,
742
+ symbol=resolved_symbol,
743
+ reference_date=reference_date
744
+ )
745
+
746
+ if definition:
747
+ logger.info(f"[MULTIPLIER] Got definition with {len(definition)} fields: {list(definition.keys())}")
748
+ # Cache it
749
+ _INSTRUMENT_DEFINITION_CACHE[cache_key] = definition
750
+
751
+ # Update asset
752
+ if 'unit_of_measure_qty' in definition:
753
+ multiplier = int(definition['unit_of_measure_qty'])
754
+ logger.info(f"[MULTIPLIER] BEFORE update: asset.multiplier = {asset.multiplier}")
755
+ asset.multiplier = multiplier
756
+ logger.info(f"[MULTIPLIER] ✓✓✓ SUCCESS! Set multiplier for {asset.symbol} (resolved to {resolved_symbol}): {multiplier}")
757
+ logger.info(f"[MULTIPLIER] AFTER update: asset.multiplier = {asset.multiplier}")
758
+ else:
759
+ logger.error(f"[MULTIPLIER] ✗ Definition missing unit_of_measure_qty field! Fields: {list(definition.keys())}")
760
+ else:
761
+ logger.error(f"[MULTIPLIER] ✗ Failed to get definition from DataBento for {resolved_symbol}")
762
+
763
+
537
764
  def get_price_data_from_databento(
538
765
  api_key: str,
539
766
  asset: Asset,
@@ -542,132 +769,156 @@ def get_price_data_from_databento(
542
769
  timestep: str = "minute",
543
770
  venue: Optional[str] = None,
544
771
  force_cache_update: bool = False,
772
+ reference_date: Optional[datetime] = None,
545
773
  **kwargs
546
774
  ) -> Optional[pd.DataFrame]:
547
- """
548
- Get historical price data from DataBento for the given asset
549
-
550
- Parameters
551
- ----------
552
- api_key : str
553
- DataBento API key
554
- asset : Asset
555
- Lumibot Asset object
556
- start : datetime
557
- Start datetime for data retrieval
558
- end : datetime
559
- End datetime for data retrieval
560
- timestep : str, optional
561
- Data timestep ('minute', 'hour', 'day'), default 'minute'
562
- venue : str, optional
563
- Specific exchange/venue filter
564
- force_cache_update : bool, optional
565
- Force refresh of cached data, default False
566
- **kwargs
567
- Additional parameters for DataBento API
568
-
569
- Returns
570
- -------
571
- pd.DataFrame or None
572
- Historical price data in standard OHLCV format, None if no data
573
- """
775
+ """Get historical price data from DataBento for the given asset."""
574
776
  if not DATABENTO_AVAILABLE:
575
777
  logger.error("DataBento package not available. Please install with: pip install databento")
576
778
  return None
577
-
578
- # Build cache filename
579
- cache_file = _build_cache_filename(asset, start, end, timestep)
580
-
581
- # Try to load from cache first
582
- if not force_cache_update:
583
- cached_data = _load_cache(cache_file)
584
- if cached_data is not None and not cached_data.empty:
585
- logger.debug(f"Loaded DataBento data from cache: {cache_file}")
586
- return _ensure_datetime_index_utc(cached_data)
587
-
588
- # Initialize DataBento client
779
+
780
+ dataset = _determine_databento_dataset(asset, venue)
781
+ schema = _determine_databento_schema(timestep)
782
+
783
+ start_naive = start.replace(tzinfo=None) if start.tzinfo is not None else start
784
+ end_naive = end.replace(tzinfo=None) if end.tzinfo is not None else end
785
+
786
+ if asset.asset_type == Asset.AssetType.CONT_FUTURE:
787
+ schedule_start = start
788
+ symbols = databento_roll.resolve_symbols_for_range(asset, schedule_start, end)
789
+ front_symbol = databento_roll.resolve_symbol_for_datetime(asset, reference_date or start)
790
+ if front_symbol not in symbols:
791
+ symbols.insert(0, front_symbol)
792
+ else:
793
+ schedule_start = start
794
+ front_symbol = _format_futures_symbol_for_databento(asset)
795
+ symbols = [front_symbol]
796
+
797
+ # Ensure multiplier is populated using the first contract.
589
798
  try:
590
- client = DataBentoClient(api_key=api_key)
591
-
592
- # Determine dataset and schema
593
- dataset = _determine_databento_dataset(asset, venue)
594
- schema = _determine_databento_schema(timestep)
595
-
596
- # For continuous futures, resolve to a specific contract FIRST
597
- # DataBento does not support continuous futures directly - we must resolve to actual contracts
598
- if asset.asset_type == Asset.AssetType.CONT_FUTURE:
599
- # Use the start date as reference for backtesting (determines which contract was active)
600
- resolved_symbol = _format_futures_symbol_for_databento(asset, reference_date=start)
601
-
602
- # Generate the correct DataBento symbol format (working format only)
603
- symbols_to_try = _generate_databento_symbol_alternatives(asset.symbol, resolved_symbol)
604
- logger.info(f"Resolved continuous future {asset.symbol} for {start.strftime('%Y-%m-%d')} -> {resolved_symbol}")
605
- logger.info(f"DataBento symbol (working format): {symbols_to_try[0]}")
606
- else:
607
- # For specific contracts, just use the formatted symbol
608
- symbol = _format_futures_symbol_for_databento(asset)
609
- symbols_to_try = [symbol]
610
-
611
- # Use the working DataBento symbol format
612
- df = None
613
-
614
- # Ensure start and end are timezone-naive for DataBento API
615
- start_naive = start.replace(tzinfo=None) if start.tzinfo is not None else start
616
- end_naive = end.replace(tzinfo=None) if end.tzinfo is not None else end
617
-
618
- for symbol_to_use in symbols_to_try:
799
+ client_for_multiplier = DataBentoClient(api_key=api_key)
800
+ _fetch_and_update_futures_multiplier(
801
+ client=client_for_multiplier,
802
+ asset=asset,
803
+ resolved_symbol=symbols[0],
804
+ dataset=dataset,
805
+ reference_date=reference_date or start,
806
+ )
807
+ except Exception as exc:
808
+ logger.warning(f"Unable to update futures multiplier for {asset.symbol}: {exc}")
809
+
810
+ frames: List[pd.DataFrame] = []
811
+ symbols_missing: List[str] = []
812
+
813
+ if not force_cache_update:
814
+ for symbol in symbols:
815
+ cache_path = _build_cache_filename(asset, start, end, timestep, symbol_override=symbol)
816
+ cached_df = _load_cache(cache_path)
817
+ if cached_df is None or cached_df.empty:
818
+ symbols_missing.append(symbol)
819
+ continue
820
+ cached_df = cached_df.copy()
821
+ cached_df["symbol"] = symbol
822
+ frames.append(cached_df)
823
+ else:
824
+ symbols_missing = list(symbols)
825
+
826
+ data_client: Optional[DataBentoClient] = None
827
+ if symbols_missing:
828
+ try:
829
+ data_client = DataBentoClient(api_key=api_key)
830
+ except Exception as exc:
831
+ logger.error(f"DataBento data fetch error: {exc}")
832
+ return None
833
+
834
+ min_step = timedelta(minutes=1)
835
+ if schema == "ohlcv-1h":
836
+ min_step = timedelta(hours=1)
837
+ elif schema == "ohlcv-1d":
838
+ min_step = timedelta(days=1)
839
+ if end_naive <= start_naive:
840
+ end_naive = start_naive + min_step
841
+
842
+ for symbol in symbols_missing:
619
843
  try:
620
- logger.info(f"Using DataBento symbol: {symbol_to_use}")
621
- logger.info(f"DataBento request details: dataset={dataset}, symbol={symbol_to_use}, schema={schema}, start={start_naive}, end={end_naive}")
622
-
623
- df = client.get_historical_data(
844
+ logger.debug(
845
+ "Requesting DataBento data for %s (%s) between %s and %s",
846
+ symbol,
847
+ schema,
848
+ start_naive,
849
+ end_naive,
850
+ )
851
+ df_raw = data_client.get_historical_data(
624
852
  dataset=dataset,
625
- symbols=symbol_to_use,
853
+ symbols=symbol,
626
854
  schema=schema,
627
855
  start=start_naive,
628
856
  end=end_naive,
629
- **kwargs
857
+ **kwargs,
630
858
  )
631
-
632
- if df is not None and not df.empty:
633
- logger.info(f"✓ SUCCESS: Retrieved {len(df)} rows for symbol: {symbol_to_use}")
634
-
635
- # Normalize the data
636
- df_normalized = _normalize_databento_dataframe(df)
637
-
638
- # Cache the data
639
- _save_cache(df_normalized, cache_file)
640
-
641
- logger.debug(f"Successfully retrieved and cached {len(df_normalized)} rows")
642
- return df_normalized
643
- else:
644
- logger.warning(f"✗ No data returned for symbol: {symbol_to_use}")
645
-
646
- except Exception as e:
647
- error_str = str(e).lower()
648
- if "symbology_invalid_request" in error_str or "none of the symbols could be resolved" in error_str:
649
- logger.warning(f"Symbol {symbol_to_use} not resolved in DataBento")
650
- else:
651
- logger.warning(f"✗ Error with symbol {symbol_to_use}: {str(e)}")
859
+ except Exception as exc:
860
+ logger.warning(f"Error fetching {symbol} from DataBento: {exc}")
652
861
  continue
653
-
654
- # If we get here, none of the symbols worked
655
- logger.error(f" DataBento symbol resolution FAILED for {asset.symbol}")
656
- logger.error(f"Symbols tried: {symbols_to_try}")
657
- logger.error("This indicates:")
658
- logger.error("1. Contract may not be available in DataBento GLBX.MDP3 dataset")
659
- logger.error("2. Data may not be available for the requested time range")
660
- logger.error("3. Markets may be closed (weekend/holiday)")
661
- logger.error("Check DataBento documentation: https://databento.com/docs/api-reference-historical/basics/symbology")
662
-
663
- return None
664
-
665
- except Exception as e:
666
- logger.error("DATABENTO_DATA_FETCH_ERROR: DataBento data fetch error: %s | Asset: %s, Start: %s, End: %s",
667
- str(e), asset.symbol, start, end)
668
-
862
+
863
+ if df_raw is None or df_raw.empty:
864
+ logger.warning(f"No data returned from DataBento for symbol {symbol}")
865
+ continue
866
+
867
+ df_normalized = _normalize_databento_dataframe(df_raw)
868
+ df_normalized["symbol"] = symbol
869
+ cache_path = _build_cache_filename(asset, start, end, timestep, symbol_override=symbol)
870
+ _save_cache(df_normalized, cache_path)
871
+ frames.append(df_normalized)
872
+
873
+ if not frames:
874
+ logger.warning(f"No DataBento data available for {asset.symbol} between {start} and {end}")
669
875
  return None
670
876
 
877
+ combined = pd.concat(frames, axis=0)
878
+ combined.sort_index(inplace=True)
879
+
880
+ definition_client: Optional[DataBentoClient] = None
881
+
882
+ def get_definition(symbol_code: str) -> Optional[Dict]:
883
+ nonlocal definition_client
884
+ cache_key = (symbol_code, dataset)
885
+ if cache_key in _INSTRUMENT_DEFINITION_CACHE:
886
+ return _INSTRUMENT_DEFINITION_CACHE[cache_key]
887
+ if definition_client is None:
888
+ try:
889
+ definition_client = DataBentoClient(api_key=api_key)
890
+ except Exception as exc:
891
+ logger.warning(f"Unable to create DataBento definition client: {exc}")
892
+ return None
893
+ try:
894
+ definition = definition_client.get_instrument_definition(
895
+ dataset=dataset,
896
+ symbol=symbol_code,
897
+ reference_date=reference_date or start,
898
+ )
899
+ except Exception as exc:
900
+ logger.warning(f"Failed to fetch definition for {symbol_code}: {exc}")
901
+ return None
902
+ if definition:
903
+ _INSTRUMENT_DEFINITION_CACHE[cache_key] = definition
904
+ return definition
905
+
906
+ schedule = databento_roll.build_roll_schedule(
907
+ asset,
908
+ schedule_start,
909
+ end,
910
+ definition_provider=get_definition,
911
+ roll_days=databento_roll.ROLL_DAYS_BEFORE_EXPIRATION,
912
+ )
913
+
914
+ if schedule:
915
+ combined = _filter_front_month_rows_pandas(combined, schedule)
916
+
917
+ if "symbol" in combined.columns:
918
+ combined = combined.drop(columns=["symbol"])
919
+
920
+ return combined
921
+
671
922
 
672
923
  def get_last_price_from_databento(
673
924
  api_key: str,