lumibot 4.1.3__py3-none-any.whl → 4.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lumibot might be problematic. Click here for more details.

Files changed (163) hide show
  1. lumibot/backtesting/__init__.py +19 -5
  2. lumibot/backtesting/backtesting_broker.py +98 -18
  3. lumibot/backtesting/databento_backtesting.py +5 -686
  4. lumibot/backtesting/databento_backtesting_pandas.py +738 -0
  5. lumibot/backtesting/databento_backtesting_polars.py +860 -546
  6. lumibot/backtesting/fix_debug.py +37 -0
  7. lumibot/backtesting/thetadata_backtesting.py +9 -355
  8. lumibot/backtesting/thetadata_backtesting_pandas.py +1167 -0
  9. lumibot/brokers/alpaca.py +8 -1
  10. lumibot/brokers/schwab.py +12 -2
  11. lumibot/credentials.py +13 -0
  12. lumibot/data_sources/__init__.py +5 -8
  13. lumibot/data_sources/data_source.py +6 -2
  14. lumibot/data_sources/data_source_backtesting.py +30 -0
  15. lumibot/data_sources/databento_data.py +5 -390
  16. lumibot/data_sources/databento_data_pandas.py +440 -0
  17. lumibot/data_sources/databento_data_polars.py +15 -9
  18. lumibot/data_sources/pandas_data.py +30 -17
  19. lumibot/data_sources/polars_data.py +986 -0
  20. lumibot/data_sources/polars_mixin.py +472 -96
  21. lumibot/data_sources/polygon_data_polars.py +5 -0
  22. lumibot/data_sources/yahoo_data.py +9 -2
  23. lumibot/data_sources/yahoo_data_polars.py +5 -0
  24. lumibot/entities/__init__.py +15 -0
  25. lumibot/entities/asset.py +5 -28
  26. lumibot/entities/bars.py +89 -20
  27. lumibot/entities/data.py +29 -6
  28. lumibot/entities/data_polars.py +668 -0
  29. lumibot/entities/position.py +38 -4
  30. lumibot/strategies/_strategy.py +2 -1
  31. lumibot/strategies/strategy.py +61 -49
  32. lumibot/tools/backtest_cache.py +284 -0
  33. lumibot/tools/databento_helper.py +35 -35
  34. lumibot/tools/databento_helper_polars.py +738 -775
  35. lumibot/tools/futures_roll.py +251 -0
  36. lumibot/tools/indicators.py +135 -104
  37. lumibot/tools/polars_utils.py +142 -0
  38. lumibot/tools/thetadata_helper.py +1068 -134
  39. {lumibot-4.1.3.dist-info → lumibot-4.2.1.dist-info}/METADATA +9 -1
  40. {lumibot-4.1.3.dist-info → lumibot-4.2.1.dist-info}/RECORD +71 -147
  41. tests/backtest/test_databento.py +37 -6
  42. tests/backtest/test_databento_comprehensive_trading.py +8 -4
  43. tests/backtest/test_databento_parity.py +4 -2
  44. tests/backtest/test_debug_avg_fill_price.py +1 -1
  45. tests/backtest/test_example_strategies.py +11 -1
  46. tests/backtest/test_futures_edge_cases.py +3 -3
  47. tests/backtest/test_futures_single_trade.py +2 -2
  48. tests/backtest/test_futures_ultra_simple.py +2 -2
  49. tests/backtest/test_polars_lru_eviction.py +470 -0
  50. tests/backtest/test_yahoo.py +42 -0
  51. tests/test_asset.py +4 -4
  52. tests/test_backtest_cache_manager.py +149 -0
  53. tests/test_backtesting_data_source_env.py +6 -0
  54. tests/test_continuous_futures_resolution.py +60 -48
  55. tests/test_data_polars_parity.py +160 -0
  56. tests/test_databento_asset_validation.py +23 -5
  57. tests/test_databento_backtesting.py +1 -1
  58. tests/test_databento_backtesting_polars.py +312 -192
  59. tests/test_databento_data.py +220 -463
  60. tests/test_databento_live.py +10 -10
  61. tests/test_futures_roll.py +38 -0
  62. tests/test_indicator_subplots.py +101 -0
  63. tests/test_market_infinite_loop_bug.py +77 -3
  64. tests/test_polars_resample.py +67 -0
  65. tests/test_polygon_helper.py +46 -0
  66. tests/test_thetadata_backwards_compat.py +97 -0
  67. tests/test_thetadata_helper.py +222 -23
  68. tests/test_thetadata_pandas_verification.py +186 -0
  69. lumibot/__pycache__/__init__.cpython-312.pyc +0 -0
  70. lumibot/__pycache__/constants.cpython-312.pyc +0 -0
  71. lumibot/__pycache__/credentials.cpython-312.pyc +0 -0
  72. lumibot/backtesting/__pycache__/__init__.cpython-312.pyc +0 -0
  73. lumibot/backtesting/__pycache__/alpaca_backtesting.cpython-312.pyc +0 -0
  74. lumibot/backtesting/__pycache__/alpha_vantage_backtesting.cpython-312.pyc +0 -0
  75. lumibot/backtesting/__pycache__/backtesting_broker.cpython-312.pyc +0 -0
  76. lumibot/backtesting/__pycache__/ccxt_backtesting.cpython-312.pyc +0 -0
  77. lumibot/backtesting/__pycache__/databento_backtesting.cpython-312.pyc +0 -0
  78. lumibot/backtesting/__pycache__/interactive_brokers_rest_backtesting.cpython-312.pyc +0 -0
  79. lumibot/backtesting/__pycache__/pandas_backtesting.cpython-312.pyc +0 -0
  80. lumibot/backtesting/__pycache__/polygon_backtesting.cpython-312.pyc +0 -0
  81. lumibot/backtesting/__pycache__/thetadata_backtesting.cpython-312.pyc +0 -0
  82. lumibot/backtesting/__pycache__/yahoo_backtesting.cpython-312.pyc +0 -0
  83. lumibot/brokers/__pycache__/__init__.cpython-312.pyc +0 -0
  84. lumibot/brokers/__pycache__/alpaca.cpython-312.pyc +0 -0
  85. lumibot/brokers/__pycache__/bitunix.cpython-312.pyc +0 -0
  86. lumibot/brokers/__pycache__/broker.cpython-312.pyc +0 -0
  87. lumibot/brokers/__pycache__/ccxt.cpython-312.pyc +0 -0
  88. lumibot/brokers/__pycache__/example_broker.cpython-312.pyc +0 -0
  89. lumibot/brokers/__pycache__/interactive_brokers.cpython-312.pyc +0 -0
  90. lumibot/brokers/__pycache__/interactive_brokers_rest.cpython-312.pyc +0 -0
  91. lumibot/brokers/__pycache__/projectx.cpython-312.pyc +0 -0
  92. lumibot/brokers/__pycache__/schwab.cpython-312.pyc +0 -0
  93. lumibot/brokers/__pycache__/tradier.cpython-312.pyc +0 -0
  94. lumibot/brokers/__pycache__/tradovate.cpython-312.pyc +0 -0
  95. lumibot/data_sources/__pycache__/__init__.cpython-312.pyc +0 -0
  96. lumibot/data_sources/__pycache__/alpaca_data.cpython-312.pyc +0 -0
  97. lumibot/data_sources/__pycache__/alpha_vantage_data.cpython-312.pyc +0 -0
  98. lumibot/data_sources/__pycache__/bitunix_data.cpython-312.pyc +0 -0
  99. lumibot/data_sources/__pycache__/ccxt_backtesting_data.cpython-312.pyc +0 -0
  100. lumibot/data_sources/__pycache__/ccxt_data.cpython-312.pyc +0 -0
  101. lumibot/data_sources/__pycache__/data_source.cpython-312.pyc +0 -0
  102. lumibot/data_sources/__pycache__/data_source_backtesting.cpython-312.pyc +0 -0
  103. lumibot/data_sources/__pycache__/databento_data_polars_backtesting.cpython-312.pyc +0 -0
  104. lumibot/data_sources/__pycache__/databento_data_polars_live.cpython-312.pyc +0 -0
  105. lumibot/data_sources/__pycache__/example_broker_data.cpython-312.pyc +0 -0
  106. lumibot/data_sources/__pycache__/exceptions.cpython-312.pyc +0 -0
  107. lumibot/data_sources/__pycache__/interactive_brokers_data.cpython-312.pyc +0 -0
  108. lumibot/data_sources/__pycache__/interactive_brokers_rest_data.cpython-312.pyc +0 -0
  109. lumibot/data_sources/__pycache__/pandas_data.cpython-312.pyc +0 -0
  110. lumibot/data_sources/__pycache__/polars_mixin.cpython-312.pyc +0 -0
  111. lumibot/data_sources/__pycache__/polygon_data_polars.cpython-312.pyc +0 -0
  112. lumibot/data_sources/__pycache__/projectx_data.cpython-312.pyc +0 -0
  113. lumibot/data_sources/__pycache__/schwab_data.cpython-312.pyc +0 -0
  114. lumibot/data_sources/__pycache__/tradier_data.cpython-312.pyc +0 -0
  115. lumibot/data_sources/__pycache__/tradovate_data.cpython-312.pyc +0 -0
  116. lumibot/data_sources/__pycache__/yahoo_data_polars.cpython-312.pyc +0 -0
  117. lumibot/entities/__pycache__/__init__.cpython-312.pyc +0 -0
  118. lumibot/entities/__pycache__/asset.cpython-312.pyc +0 -0
  119. lumibot/entities/__pycache__/bar.cpython-312.pyc +0 -0
  120. lumibot/entities/__pycache__/bars.cpython-312.pyc +0 -0
  121. lumibot/entities/__pycache__/chains.cpython-312.pyc +0 -0
  122. lumibot/entities/__pycache__/data.cpython-312.pyc +0 -0
  123. lumibot/entities/__pycache__/dataline.cpython-312.pyc +0 -0
  124. lumibot/entities/__pycache__/order.cpython-312.pyc +0 -0
  125. lumibot/entities/__pycache__/position.cpython-312.pyc +0 -0
  126. lumibot/entities/__pycache__/quote.cpython-312.pyc +0 -0
  127. lumibot/entities/__pycache__/trading_fee.cpython-312.pyc +0 -0
  128. lumibot/example_strategies/__pycache__/__init__.cpython-312.pyc +0 -0
  129. lumibot/example_strategies/__pycache__/test_broker_functions.cpython-312-pytest-8.4.1.pyc +0 -0
  130. lumibot/strategies/__pycache__/__init__.cpython-312.pyc +0 -0
  131. lumibot/strategies/__pycache__/_strategy.cpython-312.pyc +0 -0
  132. lumibot/strategies/__pycache__/strategy.cpython-312.pyc +0 -0
  133. lumibot/strategies/__pycache__/strategy_executor.cpython-312.pyc +0 -0
  134. lumibot/tools/__pycache__/__init__.cpython-312.pyc +0 -0
  135. lumibot/tools/__pycache__/alpaca_helpers.cpython-312.pyc +0 -0
  136. lumibot/tools/__pycache__/bitunix_helpers.cpython-312.pyc +0 -0
  137. lumibot/tools/__pycache__/black_scholes.cpython-312.pyc +0 -0
  138. lumibot/tools/__pycache__/ccxt_data_store.cpython-312.pyc +0 -0
  139. lumibot/tools/__pycache__/databento_helper.cpython-312.pyc +0 -0
  140. lumibot/tools/__pycache__/databento_helper_polars.cpython-312.pyc +0 -0
  141. lumibot/tools/__pycache__/debugers.cpython-312.pyc +0 -0
  142. lumibot/tools/__pycache__/decorators.cpython-312.pyc +0 -0
  143. lumibot/tools/__pycache__/helpers.cpython-312.pyc +0 -0
  144. lumibot/tools/__pycache__/indicators.cpython-312.pyc +0 -0
  145. lumibot/tools/__pycache__/lumibot_logger.cpython-312.pyc +0 -0
  146. lumibot/tools/__pycache__/pandas.cpython-312.pyc +0 -0
  147. lumibot/tools/__pycache__/polygon_helper.cpython-312.pyc +0 -0
  148. lumibot/tools/__pycache__/polygon_helper_async.cpython-312.pyc +0 -0
  149. lumibot/tools/__pycache__/polygon_helper_polars_optimized.cpython-312.pyc +0 -0
  150. lumibot/tools/__pycache__/projectx_helpers.cpython-312.pyc +0 -0
  151. lumibot/tools/__pycache__/schwab_helper.cpython-312.pyc +0 -0
  152. lumibot/tools/__pycache__/thetadata_helper.cpython-312.pyc +0 -0
  153. lumibot/tools/__pycache__/types.cpython-312.pyc +0 -0
  154. lumibot/tools/__pycache__/yahoo_helper.cpython-312.pyc +0 -0
  155. lumibot/tools/__pycache__/yahoo_helper_polars_optimized.cpython-312.pyc +0 -0
  156. lumibot/traders/__pycache__/__init__.cpython-312.pyc +0 -0
  157. lumibot/traders/__pycache__/trader.cpython-312.pyc +0 -0
  158. lumibot/trading_builtins/__pycache__/__init__.cpython-312.pyc +0 -0
  159. lumibot/trading_builtins/__pycache__/custom_stream.cpython-312.pyc +0 -0
  160. lumibot/trading_builtins/__pycache__/safe_list.cpython-312.pyc +0 -0
  161. {lumibot-4.1.3.dist-info → lumibot-4.2.1.dist-info}/WHEEL +0 -0
  162. {lumibot-4.1.3.dist-info → lumibot-4.2.1.dist-info}/licenses/LICENSE +0 -0
  163. {lumibot-4.1.3.dist-info → lumibot-4.2.1.dist-info}/top_level.txt +0 -0
@@ -1,688 +1,7 @@
1
- import traceback
2
- from datetime import datetime, timedelta
1
+ """Canonical DataBento backtesting aliasing the Polars implementation."""
3
2
 
4
- import pandas as pd
3
+ from .databento_backtesting_polars import DataBentoDataBacktestingPolars as DataBentoDataBacktesting
4
+ from .databento_backtesting_pandas import DataBentoDataBacktestingPandas
5
+ from .databento_backtesting_polars import DataBentoDataBacktestingPolars
5
6
 
6
- from lumibot import LUMIBOT_DEFAULT_PYTZ
7
- from lumibot.data_sources import PandasData
8
- from lumibot.entities import Asset, Data
9
- from lumibot.tools import databento_helper
10
- from lumibot.tools.helpers import to_datetime_aware
11
-
12
- from lumibot.tools.lumibot_logger import get_logger
13
- logger = get_logger(__name__)
14
-
15
- START_BUFFER = timedelta(days=5)
16
-
17
-
18
- class DataBentoDataBacktesting(PandasData):
19
- """
20
- Backtesting implementation of DataBento data source
21
-
22
- This class extends PandasData to provide DataBento-specific backtesting functionality,
23
- including data retrieval, caching, and time-based filtering for historical simulations.
24
- """
25
-
26
- def __init__(
27
- self,
28
- datetime_start,
29
- datetime_end,
30
- pandas_data=None,
31
- api_key=None,
32
- timeout=30,
33
- max_retries=3,
34
- **kwargs,
35
- ):
36
- """
37
- Initialize DataBento backtesting data source
38
-
39
- Parameters
40
- ----------
41
- datetime_start : datetime
42
- Start datetime for backtesting period
43
- datetime_end : datetime
44
- End datetime for backtesting period
45
- pandas_data : dict, optional
46
- Pre-loaded pandas data
47
- api_key : str
48
- DataBento API key
49
- timeout : int, optional
50
- API request timeout in seconds, default 30
51
- max_retries : int, optional
52
- Maximum number of API retry attempts, default 3
53
- **kwargs
54
- Additional parameters passed to parent class
55
- """
56
- super().__init__(
57
- datetime_start=datetime_start,
58
- datetime_end=datetime_end,
59
- pandas_data=pandas_data,
60
- api_key=api_key,
61
- **kwargs
62
- )
63
-
64
- # Store DataBento-specific configuration
65
- self._api_key = api_key
66
- self._timeout = timeout
67
- self._max_retries = max_retries
68
-
69
- # Track which assets we've already fetched to avoid redundant requests
70
- self._prefetched_assets = set()
71
- # Track data requests to avoid repeated log messages
72
- self._logged_requests = set()
73
-
74
- # OPTIMIZATION: Iteration-level caching to avoid redundant filtering
75
- # Cache filtered DataFrames per iteration (datetime)
76
- self._filtered_bars_cache = {} # {(asset_key, length, timestep, timeshift, dt): DataFrame}
77
- self._last_price_cache = {} # {(asset_key, dt): price}
78
- self._cache_datetime = None # Track when to invalidate cache
79
-
80
- # Track which futures assets we've fetched multipliers for (to avoid redundant API calls)
81
- self._multiplier_fetched_assets = set()
82
-
83
- # Verify DataBento availability
84
- if not databento_helper.DATABENTO_AVAILABLE:
85
- logger.error("DataBento package not available. Please install with: pip install databento")
86
- raise ImportError("DataBento package not available")
87
-
88
- logger.info(f"DataBento backtesting initialized for period: {datetime_start} to {datetime_end}")
89
-
90
- def _check_and_clear_cache(self):
91
- """
92
- OPTIMIZATION: Clear iteration caches when datetime changes.
93
- This ensures fresh filtering for each new iteration while reusing
94
- results within the same iteration.
95
- """
96
- current_dt = self.get_datetime()
97
- if self._cache_datetime != current_dt:
98
- self._filtered_bars_cache.clear()
99
- self._last_price_cache.clear()
100
- self._cache_datetime = current_dt
101
-
102
- def _ensure_futures_multiplier(self, asset):
103
- """
104
- Ensure futures asset has correct multiplier set.
105
-
106
- This method is idempotent and cached - safe to call multiple times.
107
- Only fetches multiplier once per unique asset.
108
-
109
- Design rationale:
110
- - Futures multipliers must be fetched from data provider (e.g., DataBento)
111
- - Asset class defaults to multiplier=1
112
- - Data source is responsible for updating multiplier on first use
113
- - Lazy fetching is more efficient than prefetching all possible assets
114
-
115
- Parameters
116
- ----------
117
- asset : Asset
118
- The asset to ensure has correct multiplier
119
- """
120
- # Skip if not a futures asset
121
- if asset.asset_type not in (Asset.AssetType.FUTURE, Asset.AssetType.CONT_FUTURE):
122
- return
123
-
124
- # Skip if multiplier already set to non-default value
125
- if asset.multiplier != 1:
126
- return
127
-
128
- # Create cache key to track which assets we've already processed
129
- # Use symbol + asset_type + expiration to handle different contracts
130
- cache_key = (asset.symbol, asset.asset_type, getattr(asset, 'expiration', None))
131
-
132
- # Check if we already tried to fetch for this asset
133
- if cache_key in self._multiplier_fetched_assets:
134
- return # Already attempted (even if failed, don't retry every time)
135
-
136
- # Mark as attempted to avoid redundant API calls
137
- self._multiplier_fetched_assets.add(cache_key)
138
-
139
- # Fetch and set multiplier from DataBento
140
- try:
141
- client = databento_helper.DataBentoClient(self._api_key)
142
-
143
- # Resolve symbol based on asset type
144
- if asset.asset_type == Asset.AssetType.CONT_FUTURE:
145
- resolved_symbol = databento_helper._format_futures_symbol_for_databento(
146
- asset, reference_date=self.datetime_start
147
- )
148
- else:
149
- resolved_symbol = databento_helper._format_futures_symbol_for_databento(asset)
150
-
151
- # Fetch multiplier from DataBento instrument definition
152
- databento_helper._fetch_and_update_futures_multiplier(
153
- client=client,
154
- asset=asset,
155
- resolved_symbol=resolved_symbol,
156
- dataset="GLBX.MDP3",
157
- reference_date=self.datetime_start
158
- )
159
-
160
- logger.info(f"Successfully set multiplier for {asset.symbol}: {asset.multiplier}")
161
-
162
- except Exception as e:
163
- logger.warning(f"Could not fetch multiplier for {asset.symbol}: {e}")
164
-
165
- def prefetch_data(self, assets, timestep="minute"):
166
- """
167
- Prefetch all required data for the specified assets for the entire backtest period.
168
- This reduces redundant API calls and log spam during backtesting.
169
-
170
- Parameters
171
- ----------
172
- assets : list of Asset
173
- List of assets to prefetch data for
174
- timestep : str, optional
175
- Timestep to fetch (default: "minute")
176
- """
177
- if not assets:
178
- return
179
-
180
- logger.info(f"Prefetching DataBento data for {len(assets)} assets...")
181
-
182
- for asset in assets:
183
- # Create search key for the asset
184
- quote_asset = Asset("USD", "forex")
185
- search_asset = (asset, quote_asset)
186
-
187
- # Skip if already prefetched
188
- if search_asset in self._prefetched_assets:
189
- continue
190
-
191
- try:
192
- # Calculate start with buffer for better data coverage
193
- start_datetime = self.datetime_start - START_BUFFER
194
- end_datetime = self.datetime_end + timedelta(days=1)
195
-
196
- logger.info(f"Fetching {asset.symbol} data from {start_datetime.date()} to {end_datetime.date()}")
197
-
198
- # Get data from DataBento for entire period
199
- df = databento_helper.get_price_data_from_databento(
200
- api_key=self._api_key,
201
- asset=asset,
202
- start=start_datetime,
203
- end=end_datetime,
204
- timestep=timestep,
205
- venue=None,
206
- force_cache_update=False
207
- )
208
-
209
- if df is None or df.empty:
210
- # For empty data, create an empty Data object with proper timezone handling
211
- empty_df = pd.DataFrame(columns=['open', 'high', 'low', 'close', 'volume'])
212
- # Create an empty DatetimeIndex with proper timezone
213
- empty_df.index = pd.DatetimeIndex([], tz=LUMIBOT_DEFAULT_PYTZ, name='datetime')
214
-
215
- data_obj = Data(
216
- asset,
217
- df=empty_df,
218
- timestep=timestep,
219
- quote=quote_asset,
220
- # Explicitly set dates to avoid timezone issues
221
- date_start=None,
222
- date_end=None
223
- )
224
- self.pandas_data[search_asset] = data_obj
225
- else:
226
- # Create Data object and store
227
- data_obj = Data(
228
- asset,
229
- df=df,
230
- timestep=timestep,
231
- quote=quote_asset,
232
- )
233
- self.pandas_data[search_asset] = data_obj
234
- logger.info(f"Cached {len(df)} rows for {asset.symbol}")
235
-
236
- # Mark as prefetched
237
- self._prefetched_assets.add(search_asset)
238
-
239
- except Exception as e:
240
- logger.error(f"Error prefetching data for {asset.symbol}: {str(e)}")
241
- logger.error(traceback.format_exc())
242
-
243
- def _update_pandas_data(self, asset, quote, length, timestep, start_dt=None):
244
- """
245
- Get asset data and update the self.pandas_data dictionary.
246
-
247
- This method retrieves historical data from DataBento and caches it for backtesting use.
248
- If data has already been prefetched, it skips redundant API calls.
249
-
250
- Parameters
251
- ----------
252
- asset : Asset
253
- The asset to get data for.
254
- quote : Asset
255
- The quote asset to use. For DataBento, this is typically not used.
256
- length : int
257
- The number of data points to get.
258
- timestep : str
259
- The timestep to use. For example, "minute", "hour", or "day".
260
- start_dt : datetime, optional
261
- The start datetime to use. If None, the current self.datetime_start will be used.
262
- """
263
- search_asset = asset
264
- asset_separated = asset
265
- quote_asset = quote if quote is not None else Asset("USD", "forex")
266
-
267
- # Handle tuple assets (asset, quote pairs)
268
- if isinstance(search_asset, tuple):
269
- asset_separated, quote_asset = search_asset
270
- else:
271
- search_asset = (search_asset, quote_asset)
272
-
273
- # Ensure futures have correct multiplier set
274
- self._ensure_futures_multiplier(asset_separated)
275
-
276
- # If this asset was already prefetched, we don't need to do anything
277
- if search_asset in self._prefetched_assets:
278
- return
279
-
280
- # Check if we already have adequate data for this asset
281
- if search_asset in self.pandas_data:
282
- asset_data = self.pandas_data[search_asset]
283
- asset_data_df = asset_data.df
284
-
285
- # Only check if we have actual data (not empty DataFrame)
286
- if not asset_data_df.empty and len(asset_data_df.index) > 0:
287
- data_start_datetime = asset_data_df.index[0]
288
- data_end_datetime = asset_data_df.index[-1]
289
-
290
- # Get the timestep of the existing data
291
- data_timestep = asset_data.timestep
292
-
293
- # If the timestep matches, check if we have sufficient coverage
294
- if data_timestep == timestep:
295
- # Ensure both datetimes are timezone-aware for comparison
296
- data_start_tz = to_datetime_aware(data_start_datetime)
297
- data_end_tz = to_datetime_aware(data_end_datetime)
298
-
299
- # Get the start datetime with buffer
300
- start_datetime, _ = self.get_start_datetime_and_ts_unit(
301
- length, timestep, start_dt, start_buffer=START_BUFFER
302
- )
303
- start_tz = to_datetime_aware(start_datetime)
304
-
305
- # Check if existing data covers the needed time range with buffer
306
- needed_start = start_tz - START_BUFFER
307
- needed_end = self.datetime_end
308
-
309
- if data_start_tz <= needed_start and data_end_tz >= needed_end:
310
- # Data is already sufficient - return silently
311
- return
312
-
313
- # We need to fetch new data from DataBento
314
- # Create a unique key for logging to avoid spam
315
- log_key = f"{asset_separated.symbol}_{timestep}"
316
-
317
- try:
318
- # Only log fetch message once per asset/timestep combination
319
- if log_key not in self._logged_requests:
320
- logger.info(f"Fetching {timestep} data for {asset_separated.symbol}")
321
- self._logged_requests.add(log_key)
322
-
323
- # Get the start datetime and timestep unit
324
- start_datetime, ts_unit = self.get_start_datetime_and_ts_unit(
325
- length, timestep, start_dt, start_buffer=START_BUFFER
326
- )
327
-
328
- # Calculate end datetime (use current backtest end or a bit beyond)
329
- end_datetime = self.datetime_end + timedelta(days=1)
330
-
331
- # Get data from DataBento
332
- df = databento_helper.get_price_data_from_databento(
333
- api_key=self._api_key,
334
- asset=asset_separated,
335
- start=start_datetime,
336
- end=end_datetime,
337
- timestep=ts_unit,
338
- venue=None, # Could add venue support later
339
- force_cache_update=False
340
- )
341
-
342
- if df is None or df.empty:
343
- # For empty data, create an empty Data object with proper timezone handling
344
- # to maintain backward compatibility with tests
345
- empty_df = pd.DataFrame(columns=['open', 'high', 'low', 'close', 'volume'])
346
- # Create an empty DatetimeIndex with proper timezone
347
- empty_df.index = pd.DatetimeIndex([], tz=LUMIBOT_DEFAULT_PYTZ, name='datetime')
348
-
349
- data_obj = Data(
350
- asset_separated,
351
- df=empty_df,
352
- timestep=ts_unit,
353
- quote=quote_asset,
354
- # Use timezone-aware dates to avoid timezone issues
355
- date_start=LUMIBOT_DEFAULT_PYTZ.localize(datetime(2000, 1, 1)),
356
- date_end=LUMIBOT_DEFAULT_PYTZ.localize(datetime(2000, 1, 1))
357
- )
358
- self.pandas_data[search_asset] = data_obj
359
- return
360
-
361
- # Ensure the DataFrame has a datetime index
362
- if not isinstance(df.index, pd.DatetimeIndex):
363
- logger.error(f"DataBento data for {asset_separated.symbol} doesn't have datetime index")
364
- return
365
-
366
- # Create Data object and store in pandas_data
367
- data_obj = Data(
368
- asset_separated,
369
- df=df,
370
- timestep=ts_unit,
371
- quote=quote_asset,
372
- )
373
-
374
- self.pandas_data[search_asset] = data_obj
375
-
376
- except Exception as e:
377
- logger.error(f"Error updating pandas data for {asset_separated.symbol}: {str(e)}")
378
- logger.error(traceback.format_exc())
379
-
380
- def get_last_price(self, asset, quote=None, exchange=None):
381
- """
382
- Get the last price for an asset at the current backtest time
383
-
384
- Parameters
385
- ----------
386
- asset : Asset
387
- Asset to get the price for
388
- quote : Asset, optional
389
- Quote asset (not typically used with DataBento)
390
- exchange : str, optional
391
- Exchange filter
392
-
393
- Returns
394
- -------
395
- float, Decimal, or None
396
- Last price at current backtest time
397
- """
398
- try:
399
- # OPTIMIZATION: Check cache first
400
- self._check_and_clear_cache()
401
- current_dt = self.get_datetime()
402
-
403
- # Try to get data from our cached pandas_data first
404
- search_asset = asset
405
- quote_asset = quote if quote is not None else Asset("USD", "forex")
406
-
407
- if isinstance(search_asset, tuple):
408
- asset_separated, quote_asset = search_asset
409
- else:
410
- search_asset = (search_asset, quote_asset)
411
- asset_separated = asset
412
-
413
- # Ensure futures have correct multiplier set
414
- self._ensure_futures_multiplier(asset_separated)
415
-
416
- # OPTIMIZATION: Check iteration cache
417
- cache_key = (search_asset, current_dt)
418
- if cache_key in self._last_price_cache:
419
- return self._last_price_cache[cache_key]
420
-
421
- if search_asset in self.pandas_data:
422
- asset_data = self.pandas_data[search_asset]
423
- df = asset_data.df
424
-
425
- if not df.empty and 'close' in df.columns:
426
- # Ensure current_dt is timezone-aware for comparison
427
- current_dt_aware = to_datetime_aware(current_dt)
428
-
429
- # Step back one bar so only fully closed bars are visible
430
- bar_delta = timedelta(minutes=1)
431
- if asset_data.timestep == "hour":
432
- bar_delta = timedelta(hours=1)
433
- elif asset_data.timestep == "day":
434
- bar_delta = timedelta(days=1)
435
-
436
- cutoff_dt = current_dt_aware - bar_delta
437
-
438
- # Filter to data up to current backtest time (exclude current bar unless broker overrides)
439
- filtered_df = df[df.index <= cutoff_dt]
440
-
441
- # If we have no prior bar (e.g., first iteration), allow the current timestamp
442
- if filtered_df.empty:
443
- filtered_df = df[df.index <= current_dt_aware]
444
-
445
- if not filtered_df.empty:
446
- last_price = filtered_df['close'].iloc[-1]
447
- if not pd.isna(last_price):
448
- price = float(last_price)
449
- # OPTIMIZATION: Cache the result
450
- self._last_price_cache[cache_key] = price
451
- return price
452
-
453
- # If no cached data, try to get recent data
454
- logger.warning(f"No cached data for {asset.symbol}, attempting direct fetch")
455
- return databento_helper.get_last_price_from_databento(
456
- api_key=self._api_key,
457
- asset=asset_separated,
458
- venue=exchange
459
- )
460
-
461
- except Exception as e:
462
- logger.error(f"Error getting last price for {asset.symbol}: {e}")
463
- return None
464
-
465
- def get_chains(self, asset, quote=None):
466
- """
467
- Get option chains for an asset
468
-
469
- DataBento doesn't provide options chain data, so this returns an empty dict.
470
-
471
- Parameters
472
- ----------
473
- asset : Asset
474
- Asset to get chains for
475
- quote : Asset, optional
476
- Quote asset
477
-
478
- Returns
479
- -------
480
- dict
481
- Empty dictionary
482
- """
483
- logger.warning("DataBento does not provide options chain data")
484
- return {}
485
-
486
- def _get_bars_dict(self, assets, length, timestep, timeshift=None):
487
- """
488
- Override parent method to handle DataBento-specific data retrieval
489
-
490
- Parameters
491
- ----------
492
- assets : list
493
- List of assets to get data for
494
- length : int
495
- Number of bars to retrieve
496
- timestep : str
497
- Timestep for the data
498
- timeshift : timedelta, optional
499
- Time shift to apply
500
-
501
- Returns
502
- -------
503
- dict
504
- Dictionary mapping assets to their bar data
505
- """
506
- result = {}
507
-
508
- for asset in assets:
509
- try:
510
- # Update pandas data if needed
511
- self._update_pandas_data(asset, None, length, timestep)
512
-
513
- # Get data from pandas_data
514
- search_asset = asset
515
- if not isinstance(search_asset, tuple):
516
- search_asset = (search_asset, Asset("USD", "forex"))
517
-
518
- if search_asset in self.pandas_data:
519
- asset_data = self.pandas_data[search_asset]
520
- df = asset_data.df
521
-
522
- if not df.empty:
523
- # Apply timeshift if specified
524
- current_dt = self.get_datetime()
525
- shift_seconds = 0
526
- if timeshift:
527
- if isinstance(timeshift, int):
528
- shift_seconds = timeshift * 60
529
- current_dt = current_dt - timedelta(minutes=timeshift)
530
- else:
531
- shift_seconds = timeshift.total_seconds()
532
- current_dt = current_dt - timeshift
533
-
534
- # Ensure current_dt is timezone-aware for comparison
535
- current_dt_aware = to_datetime_aware(current_dt)
536
-
537
- # Filter data up to current backtest time (exclude current bar unless broker overrides)
538
- include_current = getattr(self, "_include_current_bar_for_orders", False)
539
- allow_current = include_current or shift_seconds > 0
540
- mask = df.index <= current_dt_aware if allow_current else df.index < current_dt_aware
541
- filtered_df = df[mask]
542
-
543
- # Take the last 'length' bars
544
- result_df = filtered_df.tail(length)
545
-
546
- if not result_df.empty:
547
- result[asset] = result_df
548
- else:
549
- logger.warning(f"No data available for {asset.symbol} at {current_dt}")
550
- result[asset] = None
551
- else:
552
- logger.warning(f"Empty data for {asset.symbol}")
553
- result[asset] = None
554
- else:
555
- logger.warning(f"No data found for {asset.symbol}")
556
- result[asset] = None
557
-
558
- except Exception as e:
559
- logger.error(f"Error getting bars for {asset}: {e}")
560
- result[asset] = None
561
-
562
- return result
563
-
564
- def _pull_source_symbol_bars(
565
- self,
566
- asset,
567
- length,
568
- timestep="",
569
- timeshift=0,
570
- quote=None,
571
- exchange=None,
572
- include_after_hours=True,
573
- ):
574
- """
575
- Override parent method to fetch data from DataBento instead of pre-loaded data store
576
-
577
- This method is called by get_historical_prices and is responsible for actually
578
- fetching the data from the DataBento API.
579
- """
580
- timestep = timestep if timestep else "minute"
581
-
582
- # OPTIMIZATION: Check iteration cache first
583
- self._check_and_clear_cache()
584
- current_dt = self.get_datetime()
585
-
586
- # Get data from our cached pandas_data
587
- search_asset = asset
588
- quote_asset = quote if quote is not None else Asset("USD", "forex")
589
-
590
- if isinstance(search_asset, tuple):
591
- asset_separated, quote_asset = search_asset
592
- else:
593
- search_asset = (search_asset, quote_asset)
594
- asset_separated = asset
595
-
596
- # OPTIMIZATION: Build cache key and check cache
597
- # Convert timeshift to consistent format for caching
598
- timeshift_key = 0
599
- if timeshift:
600
- if isinstance(timeshift, int):
601
- timeshift_key = timeshift
602
- else:
603
- timeshift_key = int(timeshift.total_seconds() / 60)
604
-
605
- cache_key = (search_asset, length, timestep, timeshift_key, current_dt)
606
- if cache_key in self._filtered_bars_cache:
607
- return self._filtered_bars_cache[cache_key]
608
-
609
- # Check if we need to fetch data by calling _update_pandas_data first
610
- # This will only fetch if data is not already cached or prefetched
611
- self._update_pandas_data(asset, quote, length, timestep)
612
-
613
- # Check if we have data in pandas_data cache
614
- if search_asset in self.pandas_data:
615
- asset_data = self.pandas_data[search_asset]
616
- df = asset_data.df
617
-
618
- if not df.empty:
619
- # Apply timeshift if specified
620
- shift_seconds = 0
621
- if timeshift:
622
- if isinstance(timeshift, int):
623
- shift_seconds = timeshift * 60
624
- current_dt = current_dt - timedelta(minutes=timeshift)
625
- else:
626
- shift_seconds = timeshift.total_seconds()
627
- current_dt = current_dt - timeshift
628
-
629
- # Ensure current_dt is timezone-aware for comparison
630
- current_dt_aware = to_datetime_aware(current_dt)
631
-
632
- # Step back one bar to avoid exposing the in-progress bar
633
- bar_delta = timedelta(minutes=1)
634
- if asset_data.timestep == "hour":
635
- bar_delta = timedelta(hours=1)
636
- elif asset_data.timestep == "day":
637
- bar_delta = timedelta(days=1)
638
-
639
- cutoff_dt = current_dt_aware - bar_delta
640
-
641
- # Filter data up to current backtest time (exclude current bar unless broker overrides)
642
- filtered_df = df[df.index <= cutoff_dt] if shift_seconds > 0 else df[df.index < current_dt_aware]
643
-
644
- # Take the last 'length' bars
645
- result_df = filtered_df.tail(length)
646
-
647
- # OPTIMIZATION: Cache the result before returning
648
- if not result_df.empty:
649
- self._filtered_bars_cache[cache_key] = result_df
650
- return result_df
651
- else:
652
- self._filtered_bars_cache[cache_key] = None
653
- return None
654
- else:
655
- return None
656
- else:
657
- return None
658
-
659
- def initialize_data_for_backtest(self, strategy_assets, timestep="minute"):
660
- """
661
- Convenience method to prefetch all required data for a backtest strategy.
662
- This should be called during strategy initialization to load all data up front.
663
-
664
- Parameters
665
- ----------
666
- strategy_assets : list of Asset or list of str
667
- List of assets or asset symbols that the strategy will use
668
- timestep : str, optional
669
- Primary timestep for the data (default: "minute")
670
- """
671
- # Convert string symbols to Asset objects if needed
672
- assets = []
673
- for asset in strategy_assets:
674
- if isinstance(asset, str):
675
- # Try to determine asset type from symbol format
676
- if any(month in asset for month in ['F', 'G', 'H', 'J', 'K', 'M', 'N', 'Q', 'U', 'V', 'X', 'Z']):
677
- # Looks like a futures symbol
678
- assets.append(Asset(asset, "future"))
679
- else:
680
- # Default to stock
681
- assets.append(Asset(asset, "stock"))
682
- else:
683
- assets.append(asset)
684
-
685
- # Prefetch data for all assets
686
- self.prefetch_data(assets, timestep)
687
-
688
- logger.info(f"Initialized DataBento backtesting with prefetched data for {len(assets)} assets")
7
+ __all__ = ["DataBentoDataBacktesting", "DataBentoDataBacktestingPandas", "DataBentoDataBacktestingPolars"]