lumibot 4.1.3__py3-none-any.whl → 4.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lumibot might be problematic. Click here for more details.

Files changed (163) hide show
  1. lumibot/backtesting/__init__.py +19 -5
  2. lumibot/backtesting/backtesting_broker.py +98 -18
  3. lumibot/backtesting/databento_backtesting.py +5 -686
  4. lumibot/backtesting/databento_backtesting_pandas.py +738 -0
  5. lumibot/backtesting/databento_backtesting_polars.py +860 -546
  6. lumibot/backtesting/fix_debug.py +37 -0
  7. lumibot/backtesting/thetadata_backtesting.py +9 -355
  8. lumibot/backtesting/thetadata_backtesting_pandas.py +1167 -0
  9. lumibot/brokers/alpaca.py +8 -1
  10. lumibot/brokers/schwab.py +12 -2
  11. lumibot/credentials.py +13 -0
  12. lumibot/data_sources/__init__.py +5 -8
  13. lumibot/data_sources/data_source.py +6 -2
  14. lumibot/data_sources/data_source_backtesting.py +30 -0
  15. lumibot/data_sources/databento_data.py +5 -390
  16. lumibot/data_sources/databento_data_pandas.py +440 -0
  17. lumibot/data_sources/databento_data_polars.py +15 -9
  18. lumibot/data_sources/pandas_data.py +30 -17
  19. lumibot/data_sources/polars_data.py +986 -0
  20. lumibot/data_sources/polars_mixin.py +472 -96
  21. lumibot/data_sources/polygon_data_polars.py +5 -0
  22. lumibot/data_sources/yahoo_data.py +9 -2
  23. lumibot/data_sources/yahoo_data_polars.py +5 -0
  24. lumibot/entities/__init__.py +15 -0
  25. lumibot/entities/asset.py +5 -28
  26. lumibot/entities/bars.py +89 -20
  27. lumibot/entities/data.py +29 -6
  28. lumibot/entities/data_polars.py +668 -0
  29. lumibot/entities/position.py +38 -4
  30. lumibot/strategies/_strategy.py +2 -1
  31. lumibot/strategies/strategy.py +61 -49
  32. lumibot/tools/backtest_cache.py +284 -0
  33. lumibot/tools/databento_helper.py +35 -35
  34. lumibot/tools/databento_helper_polars.py +738 -775
  35. lumibot/tools/futures_roll.py +251 -0
  36. lumibot/tools/indicators.py +135 -104
  37. lumibot/tools/polars_utils.py +142 -0
  38. lumibot/tools/thetadata_helper.py +1068 -134
  39. {lumibot-4.1.3.dist-info → lumibot-4.2.1.dist-info}/METADATA +9 -1
  40. {lumibot-4.1.3.dist-info → lumibot-4.2.1.dist-info}/RECORD +71 -147
  41. tests/backtest/test_databento.py +37 -6
  42. tests/backtest/test_databento_comprehensive_trading.py +8 -4
  43. tests/backtest/test_databento_parity.py +4 -2
  44. tests/backtest/test_debug_avg_fill_price.py +1 -1
  45. tests/backtest/test_example_strategies.py +11 -1
  46. tests/backtest/test_futures_edge_cases.py +3 -3
  47. tests/backtest/test_futures_single_trade.py +2 -2
  48. tests/backtest/test_futures_ultra_simple.py +2 -2
  49. tests/backtest/test_polars_lru_eviction.py +470 -0
  50. tests/backtest/test_yahoo.py +42 -0
  51. tests/test_asset.py +4 -4
  52. tests/test_backtest_cache_manager.py +149 -0
  53. tests/test_backtesting_data_source_env.py +6 -0
  54. tests/test_continuous_futures_resolution.py +60 -48
  55. tests/test_data_polars_parity.py +160 -0
  56. tests/test_databento_asset_validation.py +23 -5
  57. tests/test_databento_backtesting.py +1 -1
  58. tests/test_databento_backtesting_polars.py +312 -192
  59. tests/test_databento_data.py +220 -463
  60. tests/test_databento_live.py +10 -10
  61. tests/test_futures_roll.py +38 -0
  62. tests/test_indicator_subplots.py +101 -0
  63. tests/test_market_infinite_loop_bug.py +77 -3
  64. tests/test_polars_resample.py +67 -0
  65. tests/test_polygon_helper.py +46 -0
  66. tests/test_thetadata_backwards_compat.py +97 -0
  67. tests/test_thetadata_helper.py +222 -23
  68. tests/test_thetadata_pandas_verification.py +186 -0
  69. lumibot/__pycache__/__init__.cpython-312.pyc +0 -0
  70. lumibot/__pycache__/constants.cpython-312.pyc +0 -0
  71. lumibot/__pycache__/credentials.cpython-312.pyc +0 -0
  72. lumibot/backtesting/__pycache__/__init__.cpython-312.pyc +0 -0
  73. lumibot/backtesting/__pycache__/alpaca_backtesting.cpython-312.pyc +0 -0
  74. lumibot/backtesting/__pycache__/alpha_vantage_backtesting.cpython-312.pyc +0 -0
  75. lumibot/backtesting/__pycache__/backtesting_broker.cpython-312.pyc +0 -0
  76. lumibot/backtesting/__pycache__/ccxt_backtesting.cpython-312.pyc +0 -0
  77. lumibot/backtesting/__pycache__/databento_backtesting.cpython-312.pyc +0 -0
  78. lumibot/backtesting/__pycache__/interactive_brokers_rest_backtesting.cpython-312.pyc +0 -0
  79. lumibot/backtesting/__pycache__/pandas_backtesting.cpython-312.pyc +0 -0
  80. lumibot/backtesting/__pycache__/polygon_backtesting.cpython-312.pyc +0 -0
  81. lumibot/backtesting/__pycache__/thetadata_backtesting.cpython-312.pyc +0 -0
  82. lumibot/backtesting/__pycache__/yahoo_backtesting.cpython-312.pyc +0 -0
  83. lumibot/brokers/__pycache__/__init__.cpython-312.pyc +0 -0
  84. lumibot/brokers/__pycache__/alpaca.cpython-312.pyc +0 -0
  85. lumibot/brokers/__pycache__/bitunix.cpython-312.pyc +0 -0
  86. lumibot/brokers/__pycache__/broker.cpython-312.pyc +0 -0
  87. lumibot/brokers/__pycache__/ccxt.cpython-312.pyc +0 -0
  88. lumibot/brokers/__pycache__/example_broker.cpython-312.pyc +0 -0
  89. lumibot/brokers/__pycache__/interactive_brokers.cpython-312.pyc +0 -0
  90. lumibot/brokers/__pycache__/interactive_brokers_rest.cpython-312.pyc +0 -0
  91. lumibot/brokers/__pycache__/projectx.cpython-312.pyc +0 -0
  92. lumibot/brokers/__pycache__/schwab.cpython-312.pyc +0 -0
  93. lumibot/brokers/__pycache__/tradier.cpython-312.pyc +0 -0
  94. lumibot/brokers/__pycache__/tradovate.cpython-312.pyc +0 -0
  95. lumibot/data_sources/__pycache__/__init__.cpython-312.pyc +0 -0
  96. lumibot/data_sources/__pycache__/alpaca_data.cpython-312.pyc +0 -0
  97. lumibot/data_sources/__pycache__/alpha_vantage_data.cpython-312.pyc +0 -0
  98. lumibot/data_sources/__pycache__/bitunix_data.cpython-312.pyc +0 -0
  99. lumibot/data_sources/__pycache__/ccxt_backtesting_data.cpython-312.pyc +0 -0
  100. lumibot/data_sources/__pycache__/ccxt_data.cpython-312.pyc +0 -0
  101. lumibot/data_sources/__pycache__/data_source.cpython-312.pyc +0 -0
  102. lumibot/data_sources/__pycache__/data_source_backtesting.cpython-312.pyc +0 -0
  103. lumibot/data_sources/__pycache__/databento_data_polars_backtesting.cpython-312.pyc +0 -0
  104. lumibot/data_sources/__pycache__/databento_data_polars_live.cpython-312.pyc +0 -0
  105. lumibot/data_sources/__pycache__/example_broker_data.cpython-312.pyc +0 -0
  106. lumibot/data_sources/__pycache__/exceptions.cpython-312.pyc +0 -0
  107. lumibot/data_sources/__pycache__/interactive_brokers_data.cpython-312.pyc +0 -0
  108. lumibot/data_sources/__pycache__/interactive_brokers_rest_data.cpython-312.pyc +0 -0
  109. lumibot/data_sources/__pycache__/pandas_data.cpython-312.pyc +0 -0
  110. lumibot/data_sources/__pycache__/polars_mixin.cpython-312.pyc +0 -0
  111. lumibot/data_sources/__pycache__/polygon_data_polars.cpython-312.pyc +0 -0
  112. lumibot/data_sources/__pycache__/projectx_data.cpython-312.pyc +0 -0
  113. lumibot/data_sources/__pycache__/schwab_data.cpython-312.pyc +0 -0
  114. lumibot/data_sources/__pycache__/tradier_data.cpython-312.pyc +0 -0
  115. lumibot/data_sources/__pycache__/tradovate_data.cpython-312.pyc +0 -0
  116. lumibot/data_sources/__pycache__/yahoo_data_polars.cpython-312.pyc +0 -0
  117. lumibot/entities/__pycache__/__init__.cpython-312.pyc +0 -0
  118. lumibot/entities/__pycache__/asset.cpython-312.pyc +0 -0
  119. lumibot/entities/__pycache__/bar.cpython-312.pyc +0 -0
  120. lumibot/entities/__pycache__/bars.cpython-312.pyc +0 -0
  121. lumibot/entities/__pycache__/chains.cpython-312.pyc +0 -0
  122. lumibot/entities/__pycache__/data.cpython-312.pyc +0 -0
  123. lumibot/entities/__pycache__/dataline.cpython-312.pyc +0 -0
  124. lumibot/entities/__pycache__/order.cpython-312.pyc +0 -0
  125. lumibot/entities/__pycache__/position.cpython-312.pyc +0 -0
  126. lumibot/entities/__pycache__/quote.cpython-312.pyc +0 -0
  127. lumibot/entities/__pycache__/trading_fee.cpython-312.pyc +0 -0
  128. lumibot/example_strategies/__pycache__/__init__.cpython-312.pyc +0 -0
  129. lumibot/example_strategies/__pycache__/test_broker_functions.cpython-312-pytest-8.4.1.pyc +0 -0
  130. lumibot/strategies/__pycache__/__init__.cpython-312.pyc +0 -0
  131. lumibot/strategies/__pycache__/_strategy.cpython-312.pyc +0 -0
  132. lumibot/strategies/__pycache__/strategy.cpython-312.pyc +0 -0
  133. lumibot/strategies/__pycache__/strategy_executor.cpython-312.pyc +0 -0
  134. lumibot/tools/__pycache__/__init__.cpython-312.pyc +0 -0
  135. lumibot/tools/__pycache__/alpaca_helpers.cpython-312.pyc +0 -0
  136. lumibot/tools/__pycache__/bitunix_helpers.cpython-312.pyc +0 -0
  137. lumibot/tools/__pycache__/black_scholes.cpython-312.pyc +0 -0
  138. lumibot/tools/__pycache__/ccxt_data_store.cpython-312.pyc +0 -0
  139. lumibot/tools/__pycache__/databento_helper.cpython-312.pyc +0 -0
  140. lumibot/tools/__pycache__/databento_helper_polars.cpython-312.pyc +0 -0
  141. lumibot/tools/__pycache__/debugers.cpython-312.pyc +0 -0
  142. lumibot/tools/__pycache__/decorators.cpython-312.pyc +0 -0
  143. lumibot/tools/__pycache__/helpers.cpython-312.pyc +0 -0
  144. lumibot/tools/__pycache__/indicators.cpython-312.pyc +0 -0
  145. lumibot/tools/__pycache__/lumibot_logger.cpython-312.pyc +0 -0
  146. lumibot/tools/__pycache__/pandas.cpython-312.pyc +0 -0
  147. lumibot/tools/__pycache__/polygon_helper.cpython-312.pyc +0 -0
  148. lumibot/tools/__pycache__/polygon_helper_async.cpython-312.pyc +0 -0
  149. lumibot/tools/__pycache__/polygon_helper_polars_optimized.cpython-312.pyc +0 -0
  150. lumibot/tools/__pycache__/projectx_helpers.cpython-312.pyc +0 -0
  151. lumibot/tools/__pycache__/schwab_helper.cpython-312.pyc +0 -0
  152. lumibot/tools/__pycache__/thetadata_helper.cpython-312.pyc +0 -0
  153. lumibot/tools/__pycache__/types.cpython-312.pyc +0 -0
  154. lumibot/tools/__pycache__/yahoo_helper.cpython-312.pyc +0 -0
  155. lumibot/tools/__pycache__/yahoo_helper_polars_optimized.cpython-312.pyc +0 -0
  156. lumibot/traders/__pycache__/__init__.cpython-312.pyc +0 -0
  157. lumibot/traders/__pycache__/trader.cpython-312.pyc +0 -0
  158. lumibot/trading_builtins/__pycache__/__init__.cpython-312.pyc +0 -0
  159. lumibot/trading_builtins/__pycache__/custom_stream.cpython-312.pyc +0 -0
  160. lumibot/trading_builtins/__pycache__/safe_list.cpython-312.pyc +0 -0
  161. {lumibot-4.1.3.dist-info → lumibot-4.2.1.dist-info}/WHEEL +0 -0
  162. {lumibot-4.1.3.dist-info → lumibot-4.2.1.dist-info}/licenses/LICENSE +0 -0
  163. {lumibot-4.1.3.dist-info → lumibot-4.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,738 @@
1
+ import traceback
2
+ from datetime import datetime, timedelta
3
+
4
+ import pandas as pd
5
+
6
+ from lumibot import LUMIBOT_DEFAULT_PYTZ
7
+ from lumibot.data_sources import PandasData
8
+ from lumibot.entities import Asset, Data
9
+ from lumibot.tools import databento_helper
10
+ from lumibot.tools.databento_helper import DataBentoAuthenticationError
11
+ from lumibot.tools.helpers import to_datetime_aware
12
+ from termcolor import colored
13
+
14
+ from lumibot.tools.lumibot_logger import get_logger
15
+ logger = get_logger(__name__)
16
+
17
+ START_BUFFER = timedelta(days=5)
18
+
19
+
20
+ class DataBentoDataBacktestingPandas(PandasData):
21
+ """
22
+ Backtesting implementation of DataBento data source
23
+
24
+ This class extends PandasData to provide DataBento-specific backtesting functionality,
25
+ including data retrieval, caching, and time-based filtering for historical simulations.
26
+ """
27
+
28
+ def __init__(
29
+ self,
30
+ datetime_start,
31
+ datetime_end,
32
+ pandas_data=None,
33
+ api_key=None,
34
+ timeout=30,
35
+ max_retries=3,
36
+ **kwargs,
37
+ ):
38
+ """
39
+ Initialize DataBento backtesting data source
40
+
41
+ Parameters
42
+ ----------
43
+ datetime_start : datetime
44
+ Start datetime for backtesting period
45
+ datetime_end : datetime
46
+ End datetime for backtesting period
47
+ pandas_data : dict, optional
48
+ Pre-loaded pandas data
49
+ api_key : str
50
+ DataBento API key
51
+ timeout : int, optional
52
+ API request timeout in seconds, default 30
53
+ max_retries : int, optional
54
+ Maximum number of API retry attempts, default 3
55
+ **kwargs
56
+ Additional parameters passed to parent class
57
+ """
58
+ super().__init__(
59
+ datetime_start=datetime_start,
60
+ datetime_end=datetime_end,
61
+ pandas_data=pandas_data,
62
+ api_key=api_key,
63
+ **kwargs
64
+ )
65
+
66
+ # Store DataBento-specific configuration
67
+ self._api_key = api_key
68
+ self._timeout = timeout
69
+ self._max_retries = max_retries
70
+
71
+ # Track which assets we've already fetched to avoid redundant requests
72
+ self._prefetched_assets = set()
73
+ # Track data requests to avoid repeated log messages
74
+ self._logged_requests = set()
75
+
76
+ # OPTIMIZATION: Iteration-level caching to avoid redundant filtering
77
+ # Cache filtered DataFrames per iteration (datetime)
78
+ self._filtered_bars_cache = {} # {(asset_key, length, timestep, timeshift, dt): DataFrame}
79
+ self._last_price_cache = {} # {(asset_key, dt): price}
80
+ self._cache_datetime = None # Track when to invalidate cache
81
+
82
+ # Track which futures assets we've fetched multipliers for (to avoid redundant API calls)
83
+ self._multiplier_fetched_assets = set()
84
+
85
+ # Verify DataBento availability
86
+ if not databento_helper.DATABENTO_AVAILABLE:
87
+ logger.error("DataBento package not available. Please install with: pip install databento")
88
+ raise ImportError("DataBento package not available")
89
+
90
+ logger.debug(f"DataBento backtesting initialized for period: {datetime_start} to {datetime_end}")
91
+
92
+ def _check_and_clear_cache(self):
93
+ """
94
+ OPTIMIZATION: Clear iteration caches when datetime changes.
95
+ This ensures fresh filtering for each new iteration while reusing
96
+ results within the same iteration.
97
+ """
98
+ current_dt = self.get_datetime()
99
+ if self._cache_datetime != current_dt:
100
+ self._filtered_bars_cache.clear()
101
+ self._last_price_cache.clear()
102
+ self._cache_datetime = current_dt
103
+
104
+ def _ensure_futures_multiplier(self, asset):
105
+ """
106
+ Ensure futures asset has correct multiplier set.
107
+
108
+ This method is idempotent and cached - safe to call multiple times.
109
+ Only fetches multiplier once per unique asset.
110
+
111
+ Design rationale:
112
+ - Futures multipliers must be fetched from data provider (e.g., DataBento)
113
+ - Asset class defaults to multiplier=1
114
+ - Data source is responsible for updating multiplier on first use
115
+ - Lazy fetching is more efficient than prefetching all possible assets
116
+
117
+ Parameters
118
+ ----------
119
+ asset : Asset
120
+ The asset to ensure has correct multiplier
121
+ """
122
+ # Skip if not a futures asset
123
+ if asset.asset_type not in (Asset.AssetType.FUTURE, Asset.AssetType.CONT_FUTURE):
124
+ return
125
+
126
+ # Skip if multiplier already set to non-default value
127
+ if asset.multiplier != 1:
128
+ return
129
+
130
+ # Create cache key to track which assets we've already processed
131
+ # Use symbol + asset_type + expiration to handle different contracts
132
+ cache_key = (asset.symbol, asset.asset_type, getattr(asset, 'expiration', None))
133
+
134
+ # Check if we already tried to fetch for this asset
135
+ if cache_key in self._multiplier_fetched_assets:
136
+ return # Already attempted (even if failed, don't retry every time)
137
+
138
+ # Mark as attempted to avoid redundant API calls
139
+ self._multiplier_fetched_assets.add(cache_key)
140
+
141
+ # Fetch and set multiplier from DataBento
142
+ try:
143
+ client = databento_helper.DataBentoClient(self._api_key)
144
+
145
+ # Resolve symbol based on asset type
146
+ if asset.asset_type == Asset.AssetType.CONT_FUTURE:
147
+ resolved_symbol = databento_helper._format_futures_symbol_for_databento(
148
+ asset, reference_date=self.datetime_start
149
+ )
150
+ else:
151
+ resolved_symbol = databento_helper._format_futures_symbol_for_databento(asset)
152
+
153
+ # Fetch multiplier from DataBento instrument definition
154
+ databento_helper._fetch_and_update_futures_multiplier(
155
+ client=client,
156
+ asset=asset,
157
+ resolved_symbol=resolved_symbol,
158
+ dataset="GLBX.MDP3",
159
+ reference_date=self.datetime_start
160
+ )
161
+
162
+ logger.debug(f"Successfully set multiplier for {asset.symbol}: {asset.multiplier}")
163
+
164
+ except DataBentoAuthenticationError as e:
165
+ logger.error(colored(f"DataBento authentication failed while fetching multiplier for {asset.symbol}: {e}", "red"))
166
+ raise
167
+ except Exception as e:
168
+ logger.warning(f"Could not fetch multiplier for {asset.symbol}: {e}")
169
+
170
+ def prefetch_data(self, assets, timestep="minute"):
171
+ """
172
+ Prefetch all required data for the specified assets for the entire backtest period.
173
+ This reduces redundant API calls and log spam during backtesting.
174
+
175
+ Parameters
176
+ ----------
177
+ assets : list of Asset
178
+ List of assets to prefetch data for
179
+ timestep : str, optional
180
+ Timestep to fetch (default: "minute")
181
+ """
182
+ if not assets:
183
+ return
184
+
185
+ logger.debug(f"Prefetching DataBento data for {len(assets)} assets...")
186
+
187
+ for asset in assets:
188
+ # Create search key for the asset
189
+ quote_asset = Asset("USD", "forex")
190
+ search_asset = (asset, quote_asset)
191
+
192
+ # Skip if already prefetched
193
+ if search_asset in self._prefetched_assets:
194
+ continue
195
+
196
+ try:
197
+ # Calculate start with buffer for better data coverage
198
+ start_datetime = self.datetime_start - START_BUFFER
199
+ end_datetime = self.datetime_end + timedelta(days=1)
200
+
201
+ logger.debug(f"Fetching {asset.symbol} data from {start_datetime.date()} to {end_datetime.date()}")
202
+
203
+ # Get data from DataBento for entire period
204
+ df = databento_helper.get_price_data_from_databento(
205
+ api_key=self._api_key,
206
+ asset=asset,
207
+ start=start_datetime,
208
+ end=end_datetime,
209
+ timestep=timestep,
210
+ venue=None,
211
+ force_cache_update=False
212
+ )
213
+
214
+ if df is None or df.empty:
215
+ # For empty data, create an empty Data object with proper timezone handling
216
+ empty_df = pd.DataFrame(columns=['open', 'high', 'low', 'close', 'volume'])
217
+ # Create an empty DatetimeIndex with proper timezone
218
+ empty_df.index = pd.DatetimeIndex([], tz=LUMIBOT_DEFAULT_PYTZ, name='datetime')
219
+
220
+ data_obj = Data(
221
+ asset,
222
+ df=empty_df,
223
+ timestep=timestep,
224
+ quote=quote_asset,
225
+ # Explicitly set dates to avoid timezone issues
226
+ date_start=None,
227
+ date_end=None
228
+ )
229
+ self.pandas_data[search_asset] = data_obj
230
+ else:
231
+ # Create Data object and store
232
+ data_obj = Data(
233
+ asset,
234
+ df=df,
235
+ timestep=timestep,
236
+ quote=quote_asset,
237
+ )
238
+ self.pandas_data[search_asset] = data_obj
239
+ logger.debug(f"Cached {len(df)} rows for {asset.symbol}")
240
+
241
+ # Mark as prefetched
242
+ self._prefetched_assets.add(search_asset)
243
+
244
+ except DataBentoAuthenticationError as e:
245
+ logger.error(colored(f"DataBento authentication failed while prefetching {asset.symbol}: {e}", "red"))
246
+ raise
247
+ except Exception as e:
248
+ logger.error(f"Error prefetching data for {asset.symbol}: {str(e)}")
249
+ logger.error(traceback.format_exc())
250
+
251
+ def _update_pandas_data(self, asset, quote, length, timestep, start_dt=None):
252
+ """
253
+ Get asset data and update the self.pandas_data dictionary.
254
+
255
+ This method retrieves historical data from DataBento and caches it for backtesting use.
256
+ If data has already been prefetched, it skips redundant API calls.
257
+
258
+ Parameters
259
+ ----------
260
+ asset : Asset
261
+ The asset to get data for.
262
+ quote : Asset
263
+ The quote asset to use. For DataBento, this is typically not used.
264
+ length : int
265
+ The number of data points to get.
266
+ timestep : str
267
+ The timestep to use. For example, "minute", "hour", or "day".
268
+ start_dt : datetime, optional
269
+ The start datetime to use. If None, the current self.datetime_start will be used.
270
+ """
271
+ search_asset = asset
272
+ asset_separated = asset
273
+ quote_asset = quote if quote is not None else Asset("USD", "forex")
274
+
275
+ # Handle tuple assets (asset, quote pairs)
276
+ if isinstance(search_asset, tuple):
277
+ asset_separated, quote_asset = search_asset
278
+ else:
279
+ search_asset = (search_asset, quote_asset)
280
+
281
+ # Ensure futures have correct multiplier set
282
+ self._ensure_futures_multiplier(asset_separated)
283
+
284
+ # If this asset was already prefetched, we don't need to do anything
285
+ if search_asset in self._prefetched_assets:
286
+ return
287
+
288
+ # Check if we already have adequate data for this asset
289
+ if search_asset in self.pandas_data:
290
+ asset_data = self.pandas_data[search_asset]
291
+ asset_data_df = asset_data.df
292
+
293
+ # Only check if we have actual data (not empty DataFrame)
294
+ if not asset_data_df.empty and len(asset_data_df.index) > 0:
295
+ data_start_datetime = asset_data_df.index[0]
296
+ data_end_datetime = asset_data_df.index[-1]
297
+
298
+ # Get the timestep of the existing data
299
+ data_timestep = asset_data.timestep
300
+
301
+ # If the timestep matches, check if we have sufficient coverage
302
+ if data_timestep == timestep:
303
+ # Ensure both datetimes are timezone-aware for comparison
304
+ data_start_tz = to_datetime_aware(data_start_datetime)
305
+ data_end_tz = to_datetime_aware(data_end_datetime)
306
+
307
+ # Get the start datetime with buffer
308
+ start_datetime, _ = self.get_start_datetime_and_ts_unit(
309
+ length, timestep, start_dt, start_buffer=START_BUFFER
310
+ )
311
+ start_tz = to_datetime_aware(start_datetime)
312
+
313
+ # Check if existing data covers the needed time range with buffer
314
+ needed_start = start_tz - START_BUFFER
315
+ needed_end = self.datetime_end
316
+
317
+ if data_start_tz <= needed_start and data_end_tz >= needed_end:
318
+ # Data is already sufficient - return silently
319
+ return
320
+
321
+ # We need to fetch new data from DataBento
322
+ # Create a unique key for logging to avoid spam
323
+ log_key = f"{asset_separated.symbol}_{timestep}"
324
+
325
+ try:
326
+ # Only log fetch message once per asset/timestep combination
327
+ if log_key not in self._logged_requests:
328
+ logger.debug(f"Fetching {timestep} data for {asset_separated.symbol}")
329
+ self._logged_requests.add(log_key)
330
+
331
+ # Get the start datetime and timestep unit
332
+ start_datetime, ts_unit = self.get_start_datetime_and_ts_unit(
333
+ length, timestep, start_dt, start_buffer=START_BUFFER
334
+ )
335
+
336
+ # Calculate end datetime (use current backtest end or a bit beyond)
337
+ end_datetime = self.datetime_end + timedelta(days=1)
338
+
339
+ # Get data from DataBento
340
+ df = databento_helper.get_price_data_from_databento(
341
+ api_key=self._api_key,
342
+ asset=asset_separated,
343
+ start=start_datetime,
344
+ end=end_datetime,
345
+ timestep=ts_unit,
346
+ venue=None, # Could add venue support later
347
+ force_cache_update=False
348
+ )
349
+
350
+ if df is None or df.empty:
351
+ # For empty data, create an empty Data object with proper timezone handling
352
+ # to maintain backward compatibility with tests
353
+ empty_df = pd.DataFrame(columns=['open', 'high', 'low', 'close', 'volume'])
354
+ # Create an empty DatetimeIndex with proper timezone
355
+ empty_df.index = pd.DatetimeIndex([], tz=LUMIBOT_DEFAULT_PYTZ, name='datetime')
356
+
357
+ data_obj = Data(
358
+ asset_separated,
359
+ df=empty_df,
360
+ timestep=ts_unit,
361
+ quote=quote_asset,
362
+ # Use timezone-aware dates to avoid timezone issues
363
+ date_start=LUMIBOT_DEFAULT_PYTZ.localize(datetime(2000, 1, 1)),
364
+ date_end=LUMIBOT_DEFAULT_PYTZ.localize(datetime(2000, 1, 1))
365
+ )
366
+ self.pandas_data[search_asset] = data_obj
367
+ return
368
+
369
+ # Ensure the DataFrame has a datetime index
370
+ if not isinstance(df.index, pd.DatetimeIndex):
371
+ logger.error(f"DataBento data for {asset_separated.symbol} doesn't have datetime index")
372
+ return
373
+
374
+ # Create Data object and store in pandas_data
375
+ data_obj = Data(
376
+ asset_separated,
377
+ df=df,
378
+ timestep=ts_unit,
379
+ quote=quote_asset,
380
+ )
381
+
382
+ self.pandas_data[search_asset] = data_obj
383
+
384
+ except DataBentoAuthenticationError as e:
385
+ logger.error(colored(f"DataBento authentication failed for {asset_separated.symbol}: {e}", "red"))
386
+ raise
387
+ except Exception as e:
388
+ logger.error(f"Error updating pandas data for {asset_separated.symbol}: {str(e)}")
389
+ logger.error(traceback.format_exc())
390
+
391
+ def get_last_price(self, asset, quote=None, exchange=None):
392
+ """
393
+ Get the last price for an asset at the current backtest time
394
+
395
+ Parameters
396
+ ----------
397
+ asset : Asset
398
+ Asset to get the price for
399
+ quote : Asset, optional
400
+ Quote asset (not typically used with DataBento)
401
+ exchange : str, optional
402
+ Exchange filter
403
+
404
+ Returns
405
+ -------
406
+ float, Decimal, or None
407
+ Last price at current backtest time
408
+ """
409
+ try:
410
+ # OPTIMIZATION: Check cache first
411
+ self._check_and_clear_cache()
412
+ current_dt = self.get_datetime()
413
+
414
+ # Try to get data from our cached pandas_data first
415
+ search_asset = asset
416
+ quote_asset = quote if quote is not None else Asset("USD", "forex")
417
+
418
+ if isinstance(search_asset, tuple):
419
+ asset_separated, quote_asset = search_asset
420
+ else:
421
+ search_asset = (search_asset, quote_asset)
422
+ asset_separated = asset
423
+
424
+ # Ensure futures have correct multiplier set
425
+ self._ensure_futures_multiplier(asset_separated)
426
+
427
+ # OPTIMIZATION: Check iteration cache
428
+ cache_key = (search_asset, current_dt)
429
+ if cache_key in self._last_price_cache:
430
+ return self._last_price_cache[cache_key]
431
+
432
+ if search_asset in self.pandas_data:
433
+ asset_data = self.pandas_data[search_asset]
434
+ df = asset_data.df
435
+
436
+ if not df.empty and 'close' in df.columns:
437
+ # Ensure current_dt is timezone-aware for comparison
438
+ current_dt_aware = to_datetime_aware(current_dt)
439
+
440
+ # Step back one bar so only fully closed bars are visible
441
+ bar_delta = timedelta(minutes=1)
442
+ if asset_data.timestep == "hour":
443
+ bar_delta = timedelta(hours=1)
444
+ elif asset_data.timestep == "day":
445
+ bar_delta = timedelta(days=1)
446
+
447
+ cutoff_dt = current_dt_aware - bar_delta
448
+
449
+ # Filter to data up to current backtest time (exclude current bar unless broker overrides)
450
+ filtered_df = df[df.index <= cutoff_dt]
451
+
452
+ # If we have no prior bar (e.g., first iteration), allow the current timestamp
453
+ if filtered_df.empty:
454
+ filtered_df = df[df.index <= current_dt_aware]
455
+
456
+ if not filtered_df.empty:
457
+ last_price = filtered_df['close'].iloc[-1]
458
+ if not pd.isna(last_price):
459
+ price = float(last_price)
460
+ # OPTIMIZATION: Cache the result
461
+ self._last_price_cache[cache_key] = price
462
+ return price
463
+
464
+ # If no cached data, try to get recent data
465
+ logger.warning(f"No cached data for {asset.symbol}, attempting direct fetch")
466
+ return databento_helper.get_last_price_from_databento(
467
+ api_key=self._api_key,
468
+ asset=asset_separated,
469
+ venue=exchange
470
+ )
471
+
472
+ except DataBentoAuthenticationError as e:
473
+ logger.error(colored(f"DataBento authentication failed while getting last price for {asset.symbol}: {e}", "red"))
474
+ raise
475
+ except Exception as e:
476
+ logger.error(f"Error getting last price for {asset.symbol}: {e}")
477
+ return None
478
+
479
+ def get_chains(self, asset, quote=None):
480
+ """
481
+ Get option chains for an asset
482
+
483
+ DataBento doesn't provide options chain data, so this returns an empty dict.
484
+
485
+ Parameters
486
+ ----------
487
+ asset : Asset
488
+ Asset to get chains for
489
+ quote : Asset, optional
490
+ Quote asset
491
+
492
+ Returns
493
+ -------
494
+ dict
495
+ Empty dictionary
496
+ """
497
+ logger.warning("DataBento does not provide options chain data")
498
+ return {}
499
+
500
+ def _get_bars_dict(self, assets, length, timestep, timeshift=None):
501
+ """
502
+ Override parent method to handle DataBento-specific data retrieval
503
+
504
+ Parameters
505
+ ----------
506
+ assets : list
507
+ List of assets to get data for
508
+ length : int
509
+ Number of bars to retrieve
510
+ timestep : str
511
+ Timestep for the data
512
+ timeshift : timedelta, optional
513
+ Time shift to apply
514
+
515
+ Returns
516
+ -------
517
+ dict
518
+ Dictionary mapping assets to their bar data
519
+ """
520
+ result = {}
521
+
522
+ for asset in assets:
523
+ try:
524
+ # Update pandas data if needed
525
+ self._update_pandas_data(asset, None, length, timestep)
526
+
527
+ # Get data from pandas_data
528
+ search_asset = asset
529
+ if not isinstance(search_asset, tuple):
530
+ search_asset = (search_asset, Asset("USD", "forex"))
531
+
532
+ if search_asset in self.pandas_data:
533
+ asset_data = self.pandas_data[search_asset]
534
+ df = asset_data.df
535
+
536
+ if not df.empty:
537
+ # Apply timeshift if specified
538
+ current_dt = self.get_datetime()
539
+ shift_seconds = 0
540
+ if timeshift:
541
+ if isinstance(timeshift, int):
542
+ shift_seconds = timeshift * 60
543
+ current_dt = current_dt - timedelta(minutes=timeshift)
544
+ else:
545
+ shift_seconds = timeshift.total_seconds()
546
+ current_dt = current_dt - timeshift
547
+
548
+ # Ensure current_dt is timezone-aware for comparison
549
+ current_dt_aware = to_datetime_aware(current_dt)
550
+
551
+ # Filter data up to current backtest time (exclude current bar unless broker overrides)
552
+ include_current = getattr(self, "_include_current_bar_for_orders", False)
553
+ allow_current = include_current or shift_seconds > 0
554
+ mask = df.index <= current_dt_aware if allow_current else df.index < current_dt_aware
555
+ filtered_df = df[mask]
556
+
557
+ # Take the last 'length' bars
558
+ result_df = filtered_df.tail(length)
559
+
560
+ if not result_df.empty:
561
+ result[asset] = result_df
562
+ else:
563
+ logger.warning(f"No data available for {asset.symbol} at {current_dt}")
564
+ result[asset] = None
565
+ else:
566
+ logger.warning(f"Empty data for {asset.symbol}")
567
+ result[asset] = None
568
+ else:
569
+ logger.warning(f"No data found for {asset.symbol}")
570
+ result[asset] = None
571
+
572
+ except DataBentoAuthenticationError as e:
573
+ logger.error(colored(f"DataBento authentication failed while getting bars for {asset}: {e}", "red"))
574
+ raise
575
+ except Exception as e:
576
+ logger.error(f"Error getting bars for {asset}: {e}")
577
+ result[asset] = None
578
+
579
+ return result
580
+
581
+ def _pull_source_symbol_bars(
582
+ self,
583
+ asset,
584
+ length,
585
+ timestep="",
586
+ timeshift=0,
587
+ quote=None,
588
+ exchange=None,
589
+ include_after_hours=True,
590
+ ):
591
+ """
592
+ Override parent method to fetch data from DataBento instead of pre-loaded data store
593
+
594
+ This method is called by get_historical_prices and is responsible for actually
595
+ fetching the data from the DataBento API.
596
+ """
597
+ timestep = timestep if timestep else "minute"
598
+
599
+ # OPTIMIZATION: Check iteration cache first
600
+ self._check_and_clear_cache()
601
+ current_dt = self.get_datetime()
602
+
603
+ # Get data from our cached pandas_data
604
+ search_asset = asset
605
+ quote_asset = quote if quote is not None else Asset("USD", "forex")
606
+
607
+ if isinstance(search_asset, tuple):
608
+ asset_separated, quote_asset = search_asset
609
+ else:
610
+ search_asset = (search_asset, quote_asset)
611
+ asset_separated = asset
612
+
613
+ # OPTIMIZATION: Build cache key and check cache
614
+ # Convert timeshift to consistent format for caching
615
+ timeshift_key = 0
616
+ if timeshift:
617
+ if isinstance(timeshift, int):
618
+ timeshift_key = timeshift
619
+ else:
620
+ timeshift_key = int(timeshift.total_seconds() / 60)
621
+
622
+ cache_key = (search_asset, length, timestep, timeshift_key, current_dt)
623
+ if cache_key in self._filtered_bars_cache:
624
+ return self._filtered_bars_cache[cache_key]
625
+
626
+ # Check if we need to fetch data by calling _update_pandas_data first
627
+ # This will only fetch if data is not already cached or prefetched
628
+ self._update_pandas_data(asset, quote, length, timestep)
629
+
630
+ # Check if we have data in pandas_data cache
631
+ if search_asset in self.pandas_data:
632
+ asset_data = self.pandas_data[search_asset]
633
+ df = asset_data.df
634
+
635
+ if not df.empty:
636
+ # ========================================================================
637
+ # CRITICAL: NEGATIVE TIMESHIFT ARITHMETIC FOR LOOKAHEAD
638
+ # ========================================================================
639
+ # Negative timeshift allows broker to "peek ahead" for realistic fills.
640
+ #
641
+ # Example with timeshift=-2 at broker_dt=09:30:
642
+ # - Arithmetic: current_dt - timeshift
643
+ # = 09:30 - timedelta(minutes=-2)
644
+ # = 09:30 - (-2 minutes)
645
+ # = 09:30 + 2 minutes
646
+ # = 09:32
647
+ # - Data source returns bars up to 09:32: [..., 09:29, 09:30, 09:31, 09:32]
648
+ # - Broker filters to future bars (>= 09:30): [09:30, 09:31, 09:32]
649
+ # - Broker uses FIRST future bar (09:31) and its OPEN price for fills
650
+ #
651
+ # Why this is necessary:
652
+ # - Real world: Order placed at 09:30:30 fills at 09:31:00 open
653
+ # - Backtesting: Broker at 09:30 needs to see 09:31 bar for realistic fills
654
+ #
655
+ # DO NOT change this arithmetic! "current_dt - timeshift" with negative
656
+ # timeshift is CORRECT and INTENTIONAL.
657
+ # ========================================================================
658
+ shift_seconds = 0
659
+ if timeshift:
660
+ if isinstance(timeshift, int):
661
+ shift_seconds = timeshift * 60
662
+ current_dt = current_dt - timedelta(minutes=timeshift)
663
+ else:
664
+ shift_seconds = timeshift.total_seconds()
665
+ current_dt = current_dt - timeshift
666
+
667
+ # Ensure current_dt is timezone-aware for comparison
668
+ current_dt_aware = to_datetime_aware(current_dt)
669
+
670
+ # Step back one bar to avoid exposing the in-progress bar
671
+ bar_delta = timedelta(minutes=1)
672
+ if asset_data.timestep == "hour":
673
+ bar_delta = timedelta(hours=1)
674
+ elif asset_data.timestep == "day":
675
+ bar_delta = timedelta(days=1)
676
+
677
+ cutoff_dt = current_dt_aware - bar_delta
678
+
679
+ # INSTRUMENTATION: Log timeshift application and filtering
680
+ broker_dt_orig = self.get_datetime()
681
+ filter_branch = "shift_seconds > 0" if shift_seconds > 0 else "shift_seconds <= 0"
682
+
683
+ # Filter data up to current backtest time (exclude current bar unless broker overrides)
684
+ filtered_df = df[df.index <= cutoff_dt] if shift_seconds > 0 else df[df.index < current_dt_aware]
685
+
686
+ # Log what bar we're returning
687
+ if not filtered_df.empty:
688
+ returned_bar_dt = filtered_df.index[-1]
689
+ logger.debug(f"[TIMESHIFT_PANDAS] asset={asset_separated.symbol} broker_dt={broker_dt_orig} "
690
+ f"timeshift={timeshift} shift_seconds={shift_seconds} "
691
+ f"shifted_dt={current_dt_aware} cutoff_dt={cutoff_dt} "
692
+ f"filter={filter_branch} returned_bar={returned_bar_dt}")
693
+
694
+ # Take the last 'length' bars
695
+ result_df = filtered_df.tail(length)
696
+
697
+ # OPTIMIZATION: Cache the result before returning
698
+ if not result_df.empty:
699
+ self._filtered_bars_cache[cache_key] = result_df
700
+ return result_df
701
+ else:
702
+ self._filtered_bars_cache[cache_key] = None
703
+ return None
704
+ else:
705
+ return None
706
+ else:
707
+ return None
708
+
709
+ def initialize_data_for_backtest(self, strategy_assets, timestep="minute"):
710
+ """
711
+ Convenience method to prefetch all required data for a backtest strategy.
712
+ This should be called during strategy initialization to load all data up front.
713
+
714
+ Parameters
715
+ ----------
716
+ strategy_assets : list of Asset or list of str
717
+ List of assets or asset symbols that the strategy will use
718
+ timestep : str, optional
719
+ Primary timestep for the data (default: "minute")
720
+ """
721
+ # Convert string symbols to Asset objects if needed
722
+ assets = []
723
+ for asset in strategy_assets:
724
+ if isinstance(asset, str):
725
+ # Try to determine asset type from symbol format
726
+ if any(month in asset for month in ['F', 'G', 'H', 'J', 'K', 'M', 'N', 'Q', 'U', 'V', 'X', 'Z']):
727
+ # Looks like a futures symbol
728
+ assets.append(Asset(asset, "future"))
729
+ else:
730
+ # Default to stock
731
+ assets.append(Asset(asset, "stock"))
732
+ else:
733
+ assets.append(asset)
734
+
735
+ # Prefetch data for all assets
736
+ self.prefetch_data(assets, timestep)
737
+
738
+ logger.debug(f"Initialized DataBento backtesting with prefetched data for {len(assets)} assets")