lumibot 4.0.22__py3-none-any.whl → 4.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lumibot might be problematic. Click here for more details.

Files changed (164) hide show
  1. lumibot/__pycache__/__init__.cpython-312.pyc +0 -0
  2. lumibot/__pycache__/constants.cpython-312.pyc +0 -0
  3. lumibot/__pycache__/credentials.cpython-312.pyc +0 -0
  4. lumibot/backtesting/__init__.py +6 -5
  5. lumibot/backtesting/__pycache__/__init__.cpython-312.pyc +0 -0
  6. lumibot/backtesting/__pycache__/alpaca_backtesting.cpython-312.pyc +0 -0
  7. lumibot/backtesting/__pycache__/alpha_vantage_backtesting.cpython-312.pyc +0 -0
  8. lumibot/backtesting/__pycache__/backtesting_broker.cpython-312.pyc +0 -0
  9. lumibot/backtesting/__pycache__/ccxt_backtesting.cpython-312.pyc +0 -0
  10. lumibot/backtesting/__pycache__/databento_backtesting.cpython-312.pyc +0 -0
  11. lumibot/backtesting/__pycache__/interactive_brokers_rest_backtesting.cpython-312.pyc +0 -0
  12. lumibot/backtesting/__pycache__/pandas_backtesting.cpython-312.pyc +0 -0
  13. lumibot/backtesting/__pycache__/polygon_backtesting.cpython-312.pyc +0 -0
  14. lumibot/backtesting/__pycache__/thetadata_backtesting.cpython-312.pyc +0 -0
  15. lumibot/backtesting/__pycache__/yahoo_backtesting.cpython-312.pyc +0 -0
  16. lumibot/backtesting/backtesting_broker.py +209 -9
  17. lumibot/backtesting/databento_backtesting.py +141 -24
  18. lumibot/backtesting/thetadata_backtesting.py +63 -42
  19. lumibot/brokers/__pycache__/__init__.cpython-312.pyc +0 -0
  20. lumibot/brokers/__pycache__/alpaca.cpython-312.pyc +0 -0
  21. lumibot/brokers/__pycache__/bitunix.cpython-312.pyc +0 -0
  22. lumibot/brokers/__pycache__/broker.cpython-312.pyc +0 -0
  23. lumibot/brokers/__pycache__/ccxt.cpython-312.pyc +0 -0
  24. lumibot/brokers/__pycache__/example_broker.cpython-312.pyc +0 -0
  25. lumibot/brokers/__pycache__/interactive_brokers.cpython-312.pyc +0 -0
  26. lumibot/brokers/__pycache__/interactive_brokers_rest.cpython-312.pyc +0 -0
  27. lumibot/brokers/__pycache__/projectx.cpython-312.pyc +0 -0
  28. lumibot/brokers/__pycache__/schwab.cpython-312.pyc +0 -0
  29. lumibot/brokers/__pycache__/tradier.cpython-312.pyc +0 -0
  30. lumibot/brokers/__pycache__/tradovate.cpython-312.pyc +0 -0
  31. lumibot/brokers/alpaca.py +11 -1
  32. lumibot/brokers/tradeovate.py +475 -0
  33. lumibot/components/grok_news_helper.py +284 -0
  34. lumibot/components/options_helper.py +90 -34
  35. lumibot/credentials.py +3 -0
  36. lumibot/data_sources/__init__.py +2 -1
  37. lumibot/data_sources/__pycache__/__init__.cpython-312.pyc +0 -0
  38. lumibot/data_sources/__pycache__/alpaca_data.cpython-312.pyc +0 -0
  39. lumibot/data_sources/__pycache__/alpha_vantage_data.cpython-312.pyc +0 -0
  40. lumibot/data_sources/__pycache__/bitunix_data.cpython-312.pyc +0 -0
  41. lumibot/data_sources/__pycache__/ccxt_backtesting_data.cpython-312.pyc +0 -0
  42. lumibot/data_sources/__pycache__/ccxt_data.cpython-312.pyc +0 -0
  43. lumibot/data_sources/__pycache__/data_source.cpython-312.pyc +0 -0
  44. lumibot/data_sources/__pycache__/data_source_backtesting.cpython-312.pyc +0 -0
  45. lumibot/data_sources/__pycache__/databento_data_polars_backtesting.cpython-312.pyc +0 -0
  46. lumibot/data_sources/__pycache__/databento_data_polars_live.cpython-312.pyc +0 -0
  47. lumibot/data_sources/__pycache__/example_broker_data.cpython-312.pyc +0 -0
  48. lumibot/data_sources/__pycache__/exceptions.cpython-312.pyc +0 -0
  49. lumibot/data_sources/__pycache__/interactive_brokers_data.cpython-312.pyc +0 -0
  50. lumibot/data_sources/__pycache__/interactive_brokers_rest_data.cpython-312.pyc +0 -0
  51. lumibot/data_sources/__pycache__/pandas_data.cpython-312.pyc +0 -0
  52. lumibot/data_sources/__pycache__/polars_mixin.cpython-312.pyc +0 -0
  53. lumibot/data_sources/__pycache__/polygon_data_polars.cpython-312.pyc +0 -0
  54. lumibot/data_sources/__pycache__/projectx_data.cpython-312.pyc +0 -0
  55. lumibot/data_sources/__pycache__/schwab_data.cpython-312.pyc +0 -0
  56. lumibot/data_sources/__pycache__/tradier_data.cpython-312.pyc +0 -0
  57. lumibot/data_sources/__pycache__/tradovate_data.cpython-312.pyc +0 -0
  58. lumibot/data_sources/__pycache__/yahoo_data_polars.cpython-312.pyc +0 -0
  59. lumibot/data_sources/data_source_backtesting.py +3 -5
  60. lumibot/data_sources/databento_data.py +5 -5
  61. lumibot/data_sources/databento_data_polars_backtesting.py +636 -0
  62. lumibot/data_sources/databento_data_polars_live.py +793 -0
  63. lumibot/data_sources/pandas_data.py +6 -3
  64. lumibot/data_sources/polars_mixin.py +126 -21
  65. lumibot/data_sources/tradeovate_data.py +80 -0
  66. lumibot/data_sources/tradier_data.py +2 -1
  67. lumibot/entities/__pycache__/__init__.cpython-312.pyc +0 -0
  68. lumibot/entities/__pycache__/asset.cpython-312.pyc +0 -0
  69. lumibot/entities/__pycache__/bar.cpython-312.pyc +0 -0
  70. lumibot/entities/__pycache__/bars.cpython-312.pyc +0 -0
  71. lumibot/entities/__pycache__/chains.cpython-312.pyc +0 -0
  72. lumibot/entities/__pycache__/data.cpython-312.pyc +0 -0
  73. lumibot/entities/__pycache__/dataline.cpython-312.pyc +0 -0
  74. lumibot/entities/__pycache__/order.cpython-312.pyc +0 -0
  75. lumibot/entities/__pycache__/position.cpython-312.pyc +0 -0
  76. lumibot/entities/__pycache__/quote.cpython-312.pyc +0 -0
  77. lumibot/entities/__pycache__/trading_fee.cpython-312.pyc +0 -0
  78. lumibot/entities/asset.py +8 -0
  79. lumibot/entities/order.py +1 -1
  80. lumibot/entities/quote.py +14 -0
  81. lumibot/example_strategies/__pycache__/__init__.cpython-312.pyc +0 -0
  82. lumibot/example_strategies/__pycache__/test_broker_functions.cpython-312-pytest-8.4.1.pyc +0 -0
  83. lumibot/strategies/__pycache__/__init__.cpython-312.pyc +0 -0
  84. lumibot/strategies/__pycache__/_strategy.cpython-312.pyc +0 -0
  85. lumibot/strategies/__pycache__/strategy.cpython-312.pyc +0 -0
  86. lumibot/strategies/__pycache__/strategy_executor.cpython-312.pyc +0 -0
  87. lumibot/strategies/_strategy.py +95 -27
  88. lumibot/strategies/strategy.py +5 -6
  89. lumibot/strategies/strategy_executor.py +2 -2
  90. lumibot/tools/__pycache__/__init__.cpython-312.pyc +0 -0
  91. lumibot/tools/__pycache__/alpaca_helpers.cpython-312.pyc +0 -0
  92. lumibot/tools/__pycache__/bitunix_helpers.cpython-312.pyc +0 -0
  93. lumibot/tools/__pycache__/black_scholes.cpython-312.pyc +0 -0
  94. lumibot/tools/__pycache__/ccxt_data_store.cpython-312.pyc +0 -0
  95. lumibot/tools/__pycache__/databento_helper.cpython-312.pyc +0 -0
  96. lumibot/tools/__pycache__/databento_helper_polars.cpython-312.pyc +0 -0
  97. lumibot/tools/__pycache__/debugers.cpython-312.pyc +0 -0
  98. lumibot/tools/__pycache__/decorators.cpython-312.pyc +0 -0
  99. lumibot/tools/__pycache__/helpers.cpython-312.pyc +0 -0
  100. lumibot/tools/__pycache__/indicators.cpython-312.pyc +0 -0
  101. lumibot/tools/__pycache__/lumibot_logger.cpython-312.pyc +0 -0
  102. lumibot/tools/__pycache__/pandas.cpython-312.pyc +0 -0
  103. lumibot/tools/__pycache__/polygon_helper.cpython-312.pyc +0 -0
  104. lumibot/tools/__pycache__/polygon_helper_async.cpython-312.pyc +0 -0
  105. lumibot/tools/__pycache__/polygon_helper_polars_optimized.cpython-312.pyc +0 -0
  106. lumibot/tools/__pycache__/projectx_helpers.cpython-312.pyc +0 -0
  107. lumibot/tools/__pycache__/schwab_helper.cpython-312.pyc +0 -0
  108. lumibot/tools/__pycache__/thetadata_helper.cpython-312.pyc +0 -0
  109. lumibot/tools/__pycache__/types.cpython-312.pyc +0 -0
  110. lumibot/tools/__pycache__/yahoo_helper.cpython-312.pyc +0 -0
  111. lumibot/tools/__pycache__/yahoo_helper_polars_optimized.cpython-312.pyc +0 -0
  112. lumibot/tools/databento_helper.py +384 -133
  113. lumibot/tools/databento_helper_polars.py +218 -156
  114. lumibot/tools/databento_roll.py +216 -0
  115. lumibot/tools/lumibot_logger.py +32 -17
  116. lumibot/tools/polygon_helper.py +65 -0
  117. lumibot/tools/thetadata_helper.py +588 -70
  118. lumibot/traders/__pycache__/__init__.cpython-312.pyc +0 -0
  119. lumibot/traders/__pycache__/trader.cpython-312.pyc +0 -0
  120. lumibot/traders/trader.py +1 -1
  121. lumibot/trading_builtins/__pycache__/__init__.cpython-312.pyc +0 -0
  122. lumibot/trading_builtins/__pycache__/custom_stream.cpython-312.pyc +0 -0
  123. lumibot/trading_builtins/__pycache__/safe_list.cpython-312.pyc +0 -0
  124. {lumibot-4.0.22.dist-info → lumibot-4.1.0.dist-info}/METADATA +1 -2
  125. {lumibot-4.0.22.dist-info → lumibot-4.1.0.dist-info}/RECORD +164 -46
  126. tests/backtest/check_timing_offset.py +198 -0
  127. tests/backtest/check_volume_spike.py +112 -0
  128. tests/backtest/comprehensive_comparison.py +166 -0
  129. tests/backtest/debug_comparison.py +91 -0
  130. tests/backtest/diagnose_price_difference.py +97 -0
  131. tests/backtest/direct_api_comparison.py +203 -0
  132. tests/backtest/profile_thetadata_vs_polygon.py +255 -0
  133. tests/backtest/root_cause_analysis.py +109 -0
  134. tests/backtest/test_accuracy_verification.py +244 -0
  135. tests/backtest/test_daily_data_timestamp_comparison.py +801 -0
  136. tests/backtest/test_databento.py +57 -0
  137. tests/backtest/test_databento_comprehensive_trading.py +564 -0
  138. tests/backtest/test_debug_avg_fill_price.py +112 -0
  139. tests/backtest/test_dividends.py +8 -3
  140. tests/backtest/test_example_strategies.py +54 -47
  141. tests/backtest/test_futures_edge_cases.py +451 -0
  142. tests/backtest/test_futures_single_trade.py +270 -0
  143. tests/backtest/test_futures_ultra_simple.py +191 -0
  144. tests/backtest/test_index_data_verification.py +348 -0
  145. tests/backtest/test_polygon.py +45 -24
  146. tests/backtest/test_thetadata.py +246 -60
  147. tests/backtest/test_thetadata_comprehensive.py +729 -0
  148. tests/backtest/test_thetadata_vs_polygon.py +557 -0
  149. tests/backtest/test_yahoo.py +1 -2
  150. tests/conftest.py +20 -0
  151. tests/test_backtesting_data_source_env.py +249 -0
  152. tests/test_backtesting_quiet_logs_complete.py +10 -11
  153. tests/test_databento_helper.py +73 -86
  154. tests/test_databento_live.py +10 -10
  155. tests/test_databento_timezone_fixes.py +21 -4
  156. tests/test_get_historical_prices.py +6 -6
  157. tests/test_options_helper.py +162 -40
  158. tests/test_polygon_helper.py +21 -13
  159. tests/test_quiet_logs_requirements.py +5 -5
  160. tests/test_thetadata_helper.py +487 -171
  161. tests/test_yahoo_data.py +125 -0
  162. {lumibot-4.0.22.dist-info → lumibot-4.1.0.dist-info}/LICENSE +0 -0
  163. {lumibot-4.0.22.dist-info → lumibot-4.1.0.dist-info}/WHEEL +0 -0
  164. {lumibot-4.0.22.dist-info → lumibot-4.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,636 @@
1
+ """Ultra-optimized DataBento backtesting using pure polars with zero pandas conversions.
2
+
3
+ This implementation:
4
+ 1. Uses polars columnar storage directly
5
+ 2. Lazy evaluation for maximum performance
6
+ 3. Efficient caching with parquet files
7
+ 4. Vectorized operations only
8
+ 5. Inherits from DataSourceBacktesting (proper architecture)
9
+ """
10
+
11
+ import os
12
+ import traceback
13
+ from datetime import datetime, timedelta
14
+ from decimal import Decimal
15
+ from typing import Dict, Optional, Union
16
+
17
+ import numpy as np
18
+ import polars as pl
19
+
20
+ from lumibot.data_sources import DataSourceBacktesting
21
+ from lumibot.data_sources.polars_mixin import PolarsMixin
22
+ from lumibot.entities import Asset, Bars
23
+ from lumibot.tools import databento_helper_polars, databento_helper
24
+ from lumibot.tools.lumibot_logger import get_logger
25
+
26
+ logger = get_logger(__name__)
27
+ START_BUFFER = timedelta(days=5)
28
+
29
+
30
+ class DataBentoDataPolarsBacktesting(PolarsMixin, DataSourceBacktesting):
31
+ """Ultra-optimized DataBento backtesting data source with pure polars."""
32
+
33
+ SOURCE = "DATABENTO"
34
+ MIN_TIMESTEP = "minute"
35
+ TIMESTEP_MAPPING = [
36
+ {"timestep": "minute", "representations": ["1m", "minute", "1 minute"]},
37
+ {"timestep": "hour", "representations": ["1h", "hour", "1 hour"]},
38
+ {"timestep": "day", "representations": ["1d", "day", "1 day"]},
39
+ ]
40
+
41
+ def __init__(
42
+ self,
43
+ datetime_start,
44
+ datetime_end,
45
+ api_key=None,
46
+ max_memory=None,
47
+ timeout=30,
48
+ max_retries=3,
49
+ **kwargs,
50
+ ):
51
+ super().__init__(
52
+ datetime_start=datetime_start,
53
+ datetime_end=datetime_end,
54
+ api_key=api_key,
55
+ **kwargs
56
+ )
57
+
58
+ self.name = "databento"
59
+ self._api_key = api_key or os.environ.get("DATABENTO_API_KEY")
60
+ self._timeout = timeout
61
+ self._max_retries = max_retries
62
+ self.MAX_STORAGE_BYTES = max_memory
63
+
64
+ # Initialize polars storage from mixin
65
+ self._init_polars_storage()
66
+
67
+ # DataBento-specific caches
68
+ self._eager_cache: Dict[Asset, pl.DataFrame] = {}
69
+
70
+ # Prefetch tracking - CRITICAL for performance
71
+ self._prefetch_cache: Dict[tuple, bool] = {}
72
+ self._prefetched_assets = set() # Track which assets have been fully loaded
73
+
74
+ # OPTIMIZATION: Iteration-level filtered bars cache (same as Pandas)
75
+ self._filtered_bars_cache = {} # {(asset_key, length, timestep, timeshift, dt): DataFrame}
76
+ self._bars_cache_datetime = None # Track when to invalidate bars cache
77
+
78
+ # Futures multiplier cache - track which assets have had multipliers fetched
79
+ self._multiplier_fetched_assets = set()
80
+
81
+ logger.info(f"DataBento backtesting initialized for period: {datetime_start} to {datetime_end}")
82
+
83
+ def _ensure_futures_multiplier(self, asset):
84
+ """
85
+ Ensure futures asset has correct multiplier set.
86
+
87
+ This method is idempotent and cached - safe to call multiple times.
88
+ Only fetches multiplier once per unique asset.
89
+
90
+ Design rationale:
91
+ - Futures multipliers must be fetched from data provider (e.g., DataBento)
92
+ - Asset class defaults to multiplier=1
93
+ - Data source is responsible for updating multiplier on first use
94
+ - Lazy fetching is more efficient than prefetching all possible assets
95
+
96
+ Parameters
97
+ ----------
98
+ asset : Asset
99
+ The asset to ensure has correct multiplier
100
+ """
101
+ # Skip if not a futures asset
102
+ if asset.asset_type not in (Asset.AssetType.FUTURE, Asset.AssetType.CONT_FUTURE):
103
+ return
104
+
105
+ # Skip if multiplier already set to non-default value
106
+ if asset.multiplier != 1:
107
+ return
108
+
109
+ # Create cache key to track which assets we've already processed
110
+ # Use symbol + asset_type + expiration to handle different contracts
111
+ cache_key = (asset.symbol, asset.asset_type, getattr(asset, 'expiration', None))
112
+
113
+ # Check if we already tried to fetch for this asset
114
+ if cache_key in self._multiplier_fetched_assets:
115
+ return # Already attempted (even if failed, don't retry every time)
116
+
117
+ # Mark as attempted to avoid redundant API calls
118
+ self._multiplier_fetched_assets.add(cache_key)
119
+
120
+ # Fetch and set multiplier from DataBento
121
+ try:
122
+ client = databento_helper.DataBentoClient(self._api_key)
123
+
124
+ # Resolve symbol based on asset type
125
+ if asset.asset_type == Asset.AssetType.CONT_FUTURE:
126
+ resolved_symbol = databento_helper._format_futures_symbol_for_databento(
127
+ asset, reference_date=self.datetime_start
128
+ )
129
+ else:
130
+ resolved_symbol = databento_helper._format_futures_symbol_for_databento(asset)
131
+
132
+ # Fetch multiplier from DataBento instrument definition
133
+ databento_helper._fetch_and_update_futures_multiplier(
134
+ client=client,
135
+ asset=asset,
136
+ resolved_symbol=resolved_symbol,
137
+ dataset="GLBX.MDP3",
138
+ reference_date=self.datetime_start
139
+ )
140
+
141
+ logger.info(f"Successfully set multiplier for {asset.symbol}: {asset.multiplier}")
142
+
143
+ except Exception as e:
144
+ logger.warning(f"Could not fetch multiplier for {asset.symbol}: {e}")
145
+
146
+ def _check_and_clear_bars_cache(self):
147
+ """
148
+ OPTIMIZATION: Clear iteration caches when datetime changes.
149
+ This prevents stale data from being returned across different backtest iterations.
150
+ """
151
+ current_dt = self.get_datetime()
152
+ if self._bars_cache_datetime != current_dt:
153
+ self._filtered_bars_cache.clear()
154
+ self._bars_cache_datetime = current_dt
155
+
156
+ def _enforce_storage_limit(self, data_store: Dict[Asset, pl.LazyFrame]):
157
+ """Enforce storage limit by removing least recently used data."""
158
+ # Use mixin's enforce method
159
+ self._enforce_storage_limit_polars(self.MAX_STORAGE_BYTES)
160
+
161
+ # Clean up DataBento-specific caches
162
+ if self.MAX_STORAGE_BYTES and len(self._eager_cache) > 0:
163
+ # Remove from eager cache too
164
+ assets_to_remove = [a for a in self._eager_cache.keys() if a not in data_store]
165
+ for asset in assets_to_remove:
166
+ del self._eager_cache[asset]
167
+
168
+ def _store_data(self, asset: Asset, data: pl.DataFrame) -> pl.LazyFrame:
169
+ """Store data efficiently using lazy frames.
170
+
171
+ Returns lazy frame for efficient subsequent operations.
172
+ """
173
+ # Use mixin's store method first
174
+ lazy_data = self._store_data_polars(asset, data)
175
+
176
+ if lazy_data is None:
177
+ return None
178
+
179
+ # Update the stored data
180
+ self._data_store[asset] = lazy_data
181
+
182
+ # Enforce storage limit
183
+ if self.MAX_STORAGE_BYTES:
184
+ self._enforce_storage_limit(self._data_store)
185
+
186
+ return lazy_data
187
+
188
+ def get_start_datetime_and_ts_unit(self, length, timestep, start_dt=None, start_buffer=timedelta(days=5)):
189
+ """
190
+ Get the start datetime for the data.
191
+
192
+ Parameters
193
+ ----------
194
+ length : int
195
+ The number of data points to get.
196
+ timestep : str
197
+ The timestep to use. For example, "minute" or "hour" or "day".
198
+ start_dt : datetime
199
+ The start datetime to use. If None, the current self.datetime_start will be used.
200
+ start_buffer : timedelta
201
+ The buffer to add to the start datetime.
202
+
203
+ Returns
204
+ -------
205
+ datetime
206
+ The start datetime.
207
+ str
208
+ The timestep unit.
209
+ """
210
+ # Convert timestep string to timedelta and get start datetime
211
+ td, ts_unit = self.convert_timestep_str_to_timedelta(timestep)
212
+ if ts_unit == "day":
213
+ weeks_requested = length // 5 # Full trading week is 5 days
214
+ extra_padding_days = weeks_requested * 3 # to account for 3day weekends
215
+ td = timedelta(days=length + extra_padding_days)
216
+ else:
217
+ td *= length
218
+ if start_dt is not None:
219
+ start_datetime = start_dt - td
220
+ else:
221
+ start_datetime = self.datetime_start - td
222
+ start_datetime = start_datetime - start_buffer
223
+ return start_datetime, ts_unit
224
+
225
+ def is_data_cached(self, asset: Asset, start_dt, end_dt, timestep: str) -> bool:
226
+ """
227
+ Check if data is already cached for the given parameters.
228
+
229
+ Parameters
230
+ ----------
231
+ asset : Asset
232
+ The asset to check
233
+ start_dt : datetime
234
+ Start datetime
235
+ end_dt : datetime
236
+ End datetime
237
+ timestep : str
238
+ Time granularity
239
+
240
+ Returns
241
+ -------
242
+ bool
243
+ True if data is cached, False otherwise
244
+ """
245
+ search_asset = asset
246
+ if isinstance(asset, tuple):
247
+ search_asset = asset
248
+
249
+ # Check if in data store
250
+ if search_asset not in self._data_store:
251
+ return False
252
+
253
+ # Check if in filtered cache for daily data
254
+ if timestep == "day":
255
+ cache_key = (search_asset, start_dt.date(), timestep)
256
+ if cache_key in self._filtered_data_cache:
257
+ return True
258
+
259
+ # Check prefetch cache
260
+ cache_key = (search_asset, start_dt.date(), end_dt.date(), timestep)
261
+ return cache_key in self._prefetch_cache
262
+
263
+ def _update_data(self, asset: Asset, quote: Asset, length: int, timestep: str, start_dt=None):
264
+ """
265
+ Get asset data and update the self._data_store dictionary.
266
+
267
+ Parameters
268
+ ----------
269
+ asset : Asset
270
+ The asset to get data for.
271
+ quote : Asset
272
+ The quote asset to use. For example, if asset is "SPY" and quote is "USD", the data will be for "SPY/USD".
273
+ length : int
274
+ The number of data points to get.
275
+ timestep : str
276
+ The timestep to use. For example, "minute" or "hour" or "day".
277
+ start_dt : datetime
278
+ The start datetime to use. If None, the current self.start_datetime will be used.
279
+ """
280
+ search_asset = asset
281
+ asset_separated = asset
282
+ quote_asset = quote if quote is not None else Asset("USD", "forex")
283
+
284
+ if isinstance(search_asset, tuple):
285
+ asset_separated, quote_asset = search_asset
286
+ else:
287
+ search_asset = (search_asset, quote_asset)
288
+
289
+ # CRITICAL: If asset was prefetched, don't fetch again!
290
+ if search_asset in self._prefetched_assets:
291
+ return
292
+
293
+ # Check if we already have data in the store
294
+ if search_asset in self._data_store:
295
+ # Data already loaded, mark as prefetched and return
296
+ self._prefetched_assets.add(search_asset)
297
+ return
298
+
299
+ # Get the start datetime and timestep unit (includes length*timestep + buffer)
300
+ # This matches Pandas logic: start_datetime = (start_dt - length*timestep) - START_BUFFER
301
+ start_datetime, ts_unit = self.get_start_datetime_and_ts_unit(
302
+ length, timestep, start_dt, start_buffer=START_BUFFER
303
+ )
304
+
305
+ # FIX: Ensure timezone-aware datetime for API call (matches Pandas behavior)
306
+ # Polars was passing naive datetime, causing DataBento to treat it as UTC instead of ET
307
+ # This caused fetching wrong data (18 hours off!)
308
+ start_datetime = self.to_default_timezone(start_datetime)
309
+
310
+ # FIX: Don't override start_datetime! Use the calculated value that includes bars + buffer
311
+ # The old code set start_datetime = self.datetime_start - START_BUFFER which was wrong
312
+ # It didn't account for the requested bar length, causing missing data
313
+ end_datetime = self.datetime_end + timedelta(days=1)
314
+
315
+ logger.info(f"Prefetching {asset_separated.symbol} data from {start_datetime.date()} to {end_datetime.date()}")
316
+
317
+ # Check if we have data for this asset
318
+ if search_asset in self._data_store:
319
+ # For daily timestep, use optimized caching strategy
320
+ if ts_unit == "day":
321
+ # Check if we need to clear cache for new date
322
+ current_date = self._datetime.date()
323
+
324
+ # Try to get from filtered cache first
325
+ cache_key = (search_asset, current_date, ts_unit)
326
+ if cache_key in self._filtered_data_cache:
327
+ result = self._filtered_data_cache[cache_key]
328
+ if len(result) >= length:
329
+ # Cache hit!
330
+ return
331
+
332
+ # Download data from DataBento using polars helper
333
+ try:
334
+ # CRITICAL FIX: Use start_datetime as reference_date to match Pandas behavior!
335
+ # Pandas passes reference_date=start (WITH buffer included) - see databento_helper.py line 797
336
+ # This determines which futures contract is active at that time
337
+ df = databento_helper_polars.get_price_data_from_databento_polars(
338
+ api_key=self._api_key,
339
+ asset=asset_separated,
340
+ start=start_datetime,
341
+ end=end_datetime,
342
+ timestep=timestep,
343
+ venue=None,
344
+ force_cache_update=False,
345
+ reference_date=start_datetime # MUST match Pandas: reference_date=start (WITH buffer)
346
+ )
347
+ except Exception as e:
348
+ # Handle all exceptions
349
+ logger.error(f"Error getting data from DataBento: {e}")
350
+ logger.error(traceback.format_exc())
351
+ # Mark as prefetched even on error to avoid retry loops
352
+ self._prefetched_assets.add(search_asset)
353
+ raise Exception("Error getting data from DataBento") from e
354
+
355
+ if (df is None) or len(df) == 0:
356
+ logger.warning(
357
+ f"DataBento returned no data: asset={getattr(asset_separated, 'symbol', asset_separated)} "
358
+ f"quote={getattr(quote_asset, 'symbol', quote_asset)} "
359
+ f"timestep={timestep} start={start_datetime.strftime('%Y-%m-%d %H:%M:%S')} "
360
+ f"end={end_datetime.strftime('%Y-%m-%d %H:%M:%S')} len=0"
361
+ )
362
+ # Mark as prefetched to avoid retry
363
+ self._prefetched_assets.add(search_asset)
364
+ return
365
+
366
+ # Store data
367
+ self._store_data(search_asset, df)
368
+ logger.info(f"Cached {len(df)} rows for {asset_separated.symbol}")
369
+
370
+ # Mark as prefetched
371
+ self._prefetched_assets.add(search_asset)
372
+
373
+ def _pull_source_symbol_bars(
374
+ self,
375
+ asset: Asset,
376
+ length: int,
377
+ timestep: str = "day",
378
+ timeshift: int = None,
379
+ quote: Asset = None,
380
+ exchange: str = None,
381
+ include_after_hours: bool = True,
382
+ ) -> Optional[pl.DataFrame]:
383
+ """Pull bars with maximum efficiency using pre-filtered cache."""
384
+
385
+ # OPTIMIZATION: Check iteration cache first
386
+ self._check_and_clear_bars_cache()
387
+ current_dt = self.get_datetime()
388
+
389
+ # Build search key - MUST match _update_data logic!
390
+ # Default quote to USD forex if not provided (matches _update_data)
391
+ search_asset = asset
392
+ quote_asset = quote if quote is not None else Asset("USD", "forex")
393
+
394
+ if isinstance(asset, tuple):
395
+ search_asset, quote_asset = asset
396
+ else:
397
+ search_asset = (asset, quote_asset)
398
+
399
+ # OPTIMIZATION: Build cache key and check filtered bars cache (same as Pandas)
400
+ timeshift_key = 0
401
+ if timeshift:
402
+ if isinstance(timeshift, int):
403
+ timeshift_key = timeshift
404
+ elif hasattr(timeshift, 'total_seconds'):
405
+ timeshift_key = int(timeshift.total_seconds() / 60)
406
+
407
+ bars_cache_key = (search_asset, length, timestep, timeshift_key, current_dt)
408
+ if bars_cache_key in self._filtered_bars_cache:
409
+ return self._filtered_bars_cache[bars_cache_key]
410
+
411
+ # For daily timestep, use optimized caching strategy
412
+ if timestep == "day":
413
+ current_date = self._datetime.date()
414
+ cache_key = (search_asset, current_date, timestep)
415
+
416
+ # Try cache first
417
+ if cache_key in self._filtered_data_cache:
418
+ result = self._filtered_data_cache[cache_key]
419
+ if len(result) >= length:
420
+ return result.tail(length)
421
+
422
+ # FIX: Pass None as start_dt to match Pandas behavior
423
+ # Pandas uses self.datetime_start as reference, not current iteration time
424
+ # This ensures we fetch enough historical data for all iterations
425
+ self._update_data(asset, quote, length, timestep, start_dt=None)
426
+
427
+ # Get lazy data - use the same search_asset key we already built
428
+ lazy_data = self._get_data_lazy(search_asset)
429
+ logger.info(f"[POLARS-DEBUG] _get_data_lazy returned: {lazy_data is not None}, search_asset={search_asset}")
430
+ logger.info(f"[POLARS-DEBUG] Data store keys: {list(self._data_store.keys())}")
431
+
432
+ if lazy_data is None:
433
+ logger.warning(f"[POLARS-DEBUG] lazy_data is None for search_asset={search_asset}")
434
+ return None
435
+
436
+ # Use lazy evaluation and collect only when needed
437
+ # Check if we have cached filtered data first
438
+ if timestep == "day":
439
+ current_date = self._datetime.date()
440
+ cache_key = (search_asset, current_date, timestep)
441
+ if cache_key in self._filtered_data_cache:
442
+ data = self._filtered_data_cache[cache_key]
443
+ else:
444
+ # Collect with filtering for efficiency
445
+ data = lazy_data.collect()
446
+ else:
447
+ # For minute data, collect on demand
448
+ data = lazy_data.collect()
449
+
450
+ logger.info(f"[POLARS-DEBUG] After collect: data shape={data.shape if data is not None else 'None'}")
451
+
452
+ # OPTIMIZATION: Direct filtering on eager DataFrame
453
+ current_dt = self.to_default_timezone(self._datetime)
454
+
455
+ # Determine end filter - CRITICAL: Must match pandas logic!
456
+ # For backtesting, we need to exclude the in-progress bar
457
+ # IMPORTANT: Use the current datetime directly, not minus 1 bar
458
+ # The filter uses < (not <=) to exclude the current bar
459
+ use_strict_less_than = False # Use < instead of <=
460
+
461
+ if timeshift:
462
+ # When timeshift is present, use <= with adjusted end_filter
463
+ if isinstance(timeshift, int):
464
+ # Match pandas implementation: interpret integer timeshift as minutes
465
+ timeshift = timedelta(minutes=timeshift)
466
+ if timestep == "day":
467
+ dt = self._datetime.replace(hour=23, minute=59, second=59, microsecond=999999)
468
+ end_filter = dt - timedelta(days=1) - timeshift
469
+ elif timestep == "hour":
470
+ end_filter = current_dt - timedelta(hours=1) - timeshift
471
+ else:
472
+ end_filter = current_dt - timedelta(minutes=1) - timeshift
473
+ else:
474
+ # No timeshift: use current_dt with < operator (matches Pandas behavior)
475
+ end_filter = current_dt
476
+ use_strict_less_than = True
477
+
478
+ logger.debug(f"Filtering {asset.symbol} data: current_dt={current_dt}, end_filter={end_filter}, timestep={timestep}, timeshift={timeshift}")
479
+
480
+ # Convert to lazy frame for filtering
481
+ lazy_data = data.lazy() if not hasattr(data, 'collect') else data
482
+ logger.info(f"[POLARS-DEBUG] Before filter: lazy_data type={type(lazy_data)}, end_filter={end_filter}, length={length}, use_strict_less_than={use_strict_less_than}")
483
+
484
+ # Use mixin's filter method
485
+ result = self._filter_data_polars(search_asset, lazy_data, end_filter, length, timestep, use_strict_less_than=use_strict_less_than)
486
+ logger.info(f"[POLARS-DEBUG] After filter: result shape={result.shape if result is not None else 'None'}")
487
+
488
+ if result is None:
489
+ return None
490
+
491
+ if len(result) < length:
492
+ logger.debug(
493
+ f"Requested {length} bars but only {len(result)} available "
494
+ f"for {asset.symbol} before {end_filter}"
495
+ )
496
+
497
+ logger.debug(f"Returning {len(result)} bars for {asset.symbol}")
498
+
499
+ # OPTIMIZATION: Cache the result before returning (same as Pandas)
500
+ if result is not None and not result.is_empty():
501
+ self._filtered_bars_cache[bars_cache_key] = result
502
+ else:
503
+ self._filtered_bars_cache[bars_cache_key] = None
504
+
505
+ return result
506
+
507
+ def _parse_source_symbol_bars(
508
+ self,
509
+ response: pl.DataFrame,
510
+ asset: Asset,
511
+ quote: Optional[Asset] = None,
512
+ length: Optional[int] = None,
513
+ return_polars: bool = False,
514
+ ) -> Bars:
515
+ """Parse bars from polars DataFrame."""
516
+ if quote is not None:
517
+ logger.warning(f"quote is not implemented for DataBentoData, but {quote} was passed as the quote")
518
+
519
+ # Use mixin's parse method
520
+ return self._parse_source_symbol_bars_polars(
521
+ response, asset, self.SOURCE, quote, length, return_polars=return_polars
522
+ )
523
+
524
+ def get_last_price(
525
+ self,
526
+ asset: Asset,
527
+ timestep: str = "minute",
528
+ quote: Optional[Asset] = None,
529
+ exchange: Optional[str] = None,
530
+ **kwargs
531
+ ) -> Union[float, Decimal, None]:
532
+ """Get last price with aggressive caching."""
533
+
534
+ if timestep is None:
535
+ timestep = self.get_timestep()
536
+
537
+ # Use mixin's cache check
538
+ current_datetime = self._datetime
539
+ cached_price = self._get_cached_last_price_polars(asset, current_datetime, timestep)
540
+ if cached_price is not None:
541
+ return cached_price
542
+
543
+ # Ensure futures have correct multiplier set
544
+ self._ensure_futures_multiplier(asset)
545
+
546
+ try:
547
+ dt = self.get_datetime()
548
+ self._update_data(asset, quote, 1, timestep, dt)
549
+ except Exception as e:
550
+ logger.error(f"Error get_last_price from DataBento: {e}")
551
+ logger.error(f"Error get_last_price from DataBento: {asset=} {quote=} {timestep=} {dt=} {e}")
552
+ self._cache_last_price_polars(asset, None, current_datetime, timestep)
553
+ return None
554
+
555
+ # Request a single completed bar (aligns with pandas implementation)
556
+ bars_data = self._pull_source_symbol_bars(
557
+ asset, 1, timestep=timestep, timeshift=None, quote=quote
558
+ )
559
+
560
+ if bars_data is None or len(bars_data) == 0:
561
+ logger.warning(f"[POLARS-DEBUG] ✗✗✗ NO BARS DATA for {asset.symbol} at {current_datetime}, timestep={timestep}")
562
+ logger.warning(f"[POLARS-DEBUG] Data store keys: {list(self._data_store.keys())}")
563
+ self._cache_last_price_polars(asset, None, current_datetime, timestep)
564
+ return None
565
+
566
+ # Use the close of the most recent completed bar (pandas parity)
567
+ if "close" not in bars_data.columns:
568
+ logger.warning(f"[POLARS-DEBUG] ✗✗✗ Close column missing for {asset.symbol}")
569
+ self._cache_last_price_polars(asset, None, current_datetime, timestep)
570
+ return None
571
+
572
+ last_close = bars_data.select(pl.col("close").tail(1)).item()
573
+
574
+ if last_close is None:
575
+ logger.warning(f"[POLARS-DEBUG] ✗✗✗ Unable to extract close price for {asset.symbol}")
576
+ self._cache_last_price_polars(asset, None, current_datetime, timestep)
577
+ return None
578
+
579
+ if isinstance(last_close, (np.int64, np.integer)):
580
+ price_value = Decimal(int(last_close))
581
+ elif isinstance(last_close, (np.float64, np.floating)):
582
+ price_value = float(last_close)
583
+ else:
584
+ price_value = float(last_close)
585
+
586
+ self._cache_last_price_polars(asset, price_value, current_datetime, timestep)
587
+ logger.info(f"[POLARS-DEBUG] Returning price from bars (close): {price_value}")
588
+ return price_value
589
+
590
+ def get_historical_prices(
591
+ self,
592
+ asset: Asset,
593
+ length: int,
594
+ timestep: str = None,
595
+ timeshift: Optional[timedelta] = None,
596
+ quote: Optional[Asset] = None,
597
+ exchange: Optional[str] = None,
598
+ include_after_hours: bool = False,
599
+ return_polars: bool = False,
600
+ ) -> Optional[Bars]:
601
+ """Get historical prices using polars."""
602
+ logger.info(f"[POLARS-DEBUG] get_historical_prices called: asset={asset.symbol}, length={length}, timestep={timestep}, datetime={self._datetime}")
603
+ if timestep is None:
604
+ timestep = self.get_timestep()
605
+
606
+ # Get bars data
607
+ bars_data = self._pull_source_symbol_bars(
608
+ asset,
609
+ length,
610
+ timestep=timestep,
611
+ timeshift=timeshift,
612
+ quote=quote,
613
+ include_after_hours=include_after_hours
614
+ )
615
+
616
+ if bars_data is None:
617
+ logger.warning(f"[POLARS-DEBUG] ✗✗✗ _pull_source_symbol_bars returned None for {asset.symbol}")
618
+ return None
619
+
620
+ logger.info(f"[POLARS-DEBUG] _pull_source_symbol_bars returned {len(bars_data)} bars")
621
+
622
+ # Create and return Bars object
623
+ result = self._parse_source_symbol_bars(
624
+ bars_data, asset, quote=quote, length=length, return_polars=return_polars
625
+ )
626
+ logger.info(f"[POLARS-DEBUG] Returning Bars object: {result is not None}")
627
+ return result
628
+
629
+ def get_chains(self, asset: Asset, quote: Asset = None, exchange: str = None):
630
+ """Get option chains - not implemented for DataBento."""
631
+ logger.warning("get_chains is not implemented for DataBentoData")
632
+ return None
633
+
634
+ def get_quote(self, asset: Asset) -> None:
635
+ """Get quote - not implemented for DataBento backtesting."""
636
+ return None