lumibot 4.0.23__py3-none-any.whl → 4.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lumibot might be problematic. Click here for more details.

Files changed (161) hide show
  1. lumibot/__pycache__/__init__.cpython-312.pyc +0 -0
  2. lumibot/__pycache__/constants.cpython-312.pyc +0 -0
  3. lumibot/__pycache__/credentials.cpython-312.pyc +0 -0
  4. lumibot/backtesting/__init__.py +6 -5
  5. lumibot/backtesting/__pycache__/__init__.cpython-312.pyc +0 -0
  6. lumibot/backtesting/__pycache__/alpaca_backtesting.cpython-312.pyc +0 -0
  7. lumibot/backtesting/__pycache__/alpha_vantage_backtesting.cpython-312.pyc +0 -0
  8. lumibot/backtesting/__pycache__/backtesting_broker.cpython-312.pyc +0 -0
  9. lumibot/backtesting/__pycache__/ccxt_backtesting.cpython-312.pyc +0 -0
  10. lumibot/backtesting/__pycache__/databento_backtesting.cpython-312.pyc +0 -0
  11. lumibot/backtesting/__pycache__/interactive_brokers_rest_backtesting.cpython-312.pyc +0 -0
  12. lumibot/backtesting/__pycache__/pandas_backtesting.cpython-312.pyc +0 -0
  13. lumibot/backtesting/__pycache__/polygon_backtesting.cpython-312.pyc +0 -0
  14. lumibot/backtesting/__pycache__/thetadata_backtesting.cpython-312.pyc +0 -0
  15. lumibot/backtesting/__pycache__/yahoo_backtesting.cpython-312.pyc +0 -0
  16. lumibot/backtesting/backtesting_broker.py +209 -9
  17. lumibot/backtesting/databento_backtesting.py +145 -24
  18. lumibot/backtesting/thetadata_backtesting.py +63 -42
  19. lumibot/brokers/__pycache__/__init__.cpython-312.pyc +0 -0
  20. lumibot/brokers/__pycache__/alpaca.cpython-312.pyc +0 -0
  21. lumibot/brokers/__pycache__/bitunix.cpython-312.pyc +0 -0
  22. lumibot/brokers/__pycache__/broker.cpython-312.pyc +0 -0
  23. lumibot/brokers/__pycache__/ccxt.cpython-312.pyc +0 -0
  24. lumibot/brokers/__pycache__/example_broker.cpython-312.pyc +0 -0
  25. lumibot/brokers/__pycache__/interactive_brokers.cpython-312.pyc +0 -0
  26. lumibot/brokers/__pycache__/interactive_brokers_rest.cpython-312.pyc +0 -0
  27. lumibot/brokers/__pycache__/projectx.cpython-312.pyc +0 -0
  28. lumibot/brokers/__pycache__/schwab.cpython-312.pyc +0 -0
  29. lumibot/brokers/__pycache__/tradier.cpython-312.pyc +0 -0
  30. lumibot/brokers/__pycache__/tradovate.cpython-312.pyc +0 -0
  31. lumibot/brokers/alpaca.py +11 -1
  32. lumibot/brokers/tradeovate.py +475 -0
  33. lumibot/components/grok_news_helper.py +284 -0
  34. lumibot/components/options_helper.py +90 -34
  35. lumibot/credentials.py +3 -0
  36. lumibot/data_sources/__pycache__/__init__.cpython-312.pyc +0 -0
  37. lumibot/data_sources/__pycache__/alpaca_data.cpython-312.pyc +0 -0
  38. lumibot/data_sources/__pycache__/alpha_vantage_data.cpython-312.pyc +0 -0
  39. lumibot/data_sources/__pycache__/bitunix_data.cpython-312.pyc +0 -0
  40. lumibot/data_sources/__pycache__/ccxt_backtesting_data.cpython-312.pyc +0 -0
  41. lumibot/data_sources/__pycache__/ccxt_data.cpython-312.pyc +0 -0
  42. lumibot/data_sources/__pycache__/data_source.cpython-312.pyc +0 -0
  43. lumibot/data_sources/__pycache__/data_source_backtesting.cpython-312.pyc +0 -0
  44. lumibot/data_sources/__pycache__/databento_data_polars_backtesting.cpython-312.pyc +0 -0
  45. lumibot/data_sources/__pycache__/databento_data_polars_live.cpython-312.pyc +0 -0
  46. lumibot/data_sources/__pycache__/example_broker_data.cpython-312.pyc +0 -0
  47. lumibot/data_sources/__pycache__/exceptions.cpython-312.pyc +0 -0
  48. lumibot/data_sources/__pycache__/interactive_brokers_data.cpython-312.pyc +0 -0
  49. lumibot/data_sources/__pycache__/interactive_brokers_rest_data.cpython-312.pyc +0 -0
  50. lumibot/data_sources/__pycache__/pandas_data.cpython-312.pyc +0 -0
  51. lumibot/data_sources/__pycache__/polars_mixin.cpython-312.pyc +0 -0
  52. lumibot/data_sources/__pycache__/polygon_data_polars.cpython-312.pyc +0 -0
  53. lumibot/data_sources/__pycache__/projectx_data.cpython-312.pyc +0 -0
  54. lumibot/data_sources/__pycache__/schwab_data.cpython-312.pyc +0 -0
  55. lumibot/data_sources/__pycache__/tradier_data.cpython-312.pyc +0 -0
  56. lumibot/data_sources/__pycache__/tradovate_data.cpython-312.pyc +0 -0
  57. lumibot/data_sources/__pycache__/yahoo_data_polars.cpython-312.pyc +0 -0
  58. lumibot/data_sources/data_source_backtesting.py +3 -5
  59. lumibot/data_sources/databento_data_polars_backtesting.py +194 -48
  60. lumibot/data_sources/pandas_data.py +6 -3
  61. lumibot/data_sources/polars_mixin.py +126 -21
  62. lumibot/data_sources/tradeovate_data.py +80 -0
  63. lumibot/data_sources/tradier_data.py +2 -1
  64. lumibot/entities/__pycache__/__init__.cpython-312.pyc +0 -0
  65. lumibot/entities/__pycache__/asset.cpython-312.pyc +0 -0
  66. lumibot/entities/__pycache__/bar.cpython-312.pyc +0 -0
  67. lumibot/entities/__pycache__/bars.cpython-312.pyc +0 -0
  68. lumibot/entities/__pycache__/chains.cpython-312.pyc +0 -0
  69. lumibot/entities/__pycache__/data.cpython-312.pyc +0 -0
  70. lumibot/entities/__pycache__/dataline.cpython-312.pyc +0 -0
  71. lumibot/entities/__pycache__/order.cpython-312.pyc +0 -0
  72. lumibot/entities/__pycache__/position.cpython-312.pyc +0 -0
  73. lumibot/entities/__pycache__/quote.cpython-312.pyc +0 -0
  74. lumibot/entities/__pycache__/trading_fee.cpython-312.pyc +0 -0
  75. lumibot/entities/asset.py +8 -0
  76. lumibot/entities/order.py +1 -1
  77. lumibot/entities/quote.py +14 -0
  78. lumibot/example_strategies/__pycache__/__init__.cpython-312.pyc +0 -0
  79. lumibot/example_strategies/__pycache__/test_broker_functions.cpython-312-pytest-8.4.1.pyc +0 -0
  80. lumibot/strategies/__pycache__/__init__.cpython-312.pyc +0 -0
  81. lumibot/strategies/__pycache__/_strategy.cpython-312.pyc +0 -0
  82. lumibot/strategies/__pycache__/strategy.cpython-312.pyc +0 -0
  83. lumibot/strategies/__pycache__/strategy_executor.cpython-312.pyc +0 -0
  84. lumibot/strategies/_strategy.py +95 -27
  85. lumibot/strategies/strategy.py +5 -6
  86. lumibot/strategies/strategy_executor.py +2 -2
  87. lumibot/tools/__pycache__/__init__.cpython-312.pyc +0 -0
  88. lumibot/tools/__pycache__/alpaca_helpers.cpython-312.pyc +0 -0
  89. lumibot/tools/__pycache__/bitunix_helpers.cpython-312.pyc +0 -0
  90. lumibot/tools/__pycache__/black_scholes.cpython-312.pyc +0 -0
  91. lumibot/tools/__pycache__/ccxt_data_store.cpython-312.pyc +0 -0
  92. lumibot/tools/__pycache__/databento_helper.cpython-312.pyc +0 -0
  93. lumibot/tools/__pycache__/databento_helper_polars.cpython-312.pyc +0 -0
  94. lumibot/tools/__pycache__/debugers.cpython-312.pyc +0 -0
  95. lumibot/tools/__pycache__/decorators.cpython-312.pyc +0 -0
  96. lumibot/tools/__pycache__/helpers.cpython-312.pyc +0 -0
  97. lumibot/tools/__pycache__/indicators.cpython-312.pyc +0 -0
  98. lumibot/tools/__pycache__/lumibot_logger.cpython-312.pyc +0 -0
  99. lumibot/tools/__pycache__/pandas.cpython-312.pyc +0 -0
  100. lumibot/tools/__pycache__/polygon_helper.cpython-312.pyc +0 -0
  101. lumibot/tools/__pycache__/polygon_helper_async.cpython-312.pyc +0 -0
  102. lumibot/tools/__pycache__/polygon_helper_polars_optimized.cpython-312.pyc +0 -0
  103. lumibot/tools/__pycache__/projectx_helpers.cpython-312.pyc +0 -0
  104. lumibot/tools/__pycache__/schwab_helper.cpython-312.pyc +0 -0
  105. lumibot/tools/__pycache__/thetadata_helper.cpython-312.pyc +0 -0
  106. lumibot/tools/__pycache__/types.cpython-312.pyc +0 -0
  107. lumibot/tools/__pycache__/yahoo_helper.cpython-312.pyc +0 -0
  108. lumibot/tools/__pycache__/yahoo_helper_polars_optimized.cpython-312.pyc +0 -0
  109. lumibot/tools/databento_helper.py +384 -133
  110. lumibot/tools/databento_helper_polars.py +218 -156
  111. lumibot/tools/databento_roll.py +216 -0
  112. lumibot/tools/lumibot_logger.py +32 -17
  113. lumibot/tools/polygon_helper.py +65 -0
  114. lumibot/tools/thetadata_helper.py +588 -70
  115. lumibot/traders/__pycache__/__init__.cpython-312.pyc +0 -0
  116. lumibot/traders/__pycache__/trader.cpython-312.pyc +0 -0
  117. lumibot/traders/trader.py +1 -1
  118. lumibot/trading_builtins/__pycache__/__init__.cpython-312.pyc +0 -0
  119. lumibot/trading_builtins/__pycache__/custom_stream.cpython-312.pyc +0 -0
  120. lumibot/trading_builtins/__pycache__/safe_list.cpython-312.pyc +0 -0
  121. lumibot-4.1.1.data/data/ThetaTerminal.jar +0 -0
  122. {lumibot-4.0.23.dist-info → lumibot-4.1.1.dist-info}/METADATA +1 -2
  123. {lumibot-4.0.23.dist-info → lumibot-4.1.1.dist-info}/RECORD +161 -44
  124. tests/backtest/check_timing_offset.py +198 -0
  125. tests/backtest/check_volume_spike.py +112 -0
  126. tests/backtest/comprehensive_comparison.py +166 -0
  127. tests/backtest/debug_comparison.py +91 -0
  128. tests/backtest/diagnose_price_difference.py +97 -0
  129. tests/backtest/direct_api_comparison.py +203 -0
  130. tests/backtest/profile_thetadata_vs_polygon.py +255 -0
  131. tests/backtest/root_cause_analysis.py +109 -0
  132. tests/backtest/test_accuracy_verification.py +244 -0
  133. tests/backtest/test_daily_data_timestamp_comparison.py +801 -0
  134. tests/backtest/test_databento.py +4 -0
  135. tests/backtest/test_databento_comprehensive_trading.py +564 -0
  136. tests/backtest/test_debug_avg_fill_price.py +112 -0
  137. tests/backtest/test_dividends.py +8 -3
  138. tests/backtest/test_example_strategies.py +54 -47
  139. tests/backtest/test_futures_edge_cases.py +451 -0
  140. tests/backtest/test_futures_single_trade.py +270 -0
  141. tests/backtest/test_futures_ultra_simple.py +191 -0
  142. tests/backtest/test_index_data_verification.py +348 -0
  143. tests/backtest/test_polygon.py +45 -24
  144. tests/backtest/test_thetadata.py +246 -60
  145. tests/backtest/test_thetadata_comprehensive.py +729 -0
  146. tests/backtest/test_thetadata_vs_polygon.py +557 -0
  147. tests/backtest/test_yahoo.py +1 -2
  148. tests/conftest.py +20 -0
  149. tests/test_backtesting_data_source_env.py +249 -0
  150. tests/test_backtesting_quiet_logs_complete.py +10 -11
  151. tests/test_databento_helper.py +76 -90
  152. tests/test_databento_timezone_fixes.py +21 -4
  153. tests/test_get_historical_prices.py +6 -6
  154. tests/test_options_helper.py +162 -40
  155. tests/test_polygon_helper.py +21 -13
  156. tests/test_quiet_logs_requirements.py +5 -5
  157. tests/test_thetadata_helper.py +487 -171
  158. tests/test_yahoo_data.py +125 -0
  159. {lumibot-4.0.23.dist-info → lumibot-4.1.1.dist-info}/LICENSE +0 -0
  160. {lumibot-4.0.23.dist-info → lumibot-4.1.1.dist-info}/WHEEL +0 -0
  161. {lumibot-4.0.23.dist-info → lumibot-4.1.1.dist-info}/top_level.txt +0 -0
@@ -20,7 +20,7 @@ import polars as pl
20
20
  from lumibot.data_sources import DataSourceBacktesting
21
21
  from lumibot.data_sources.polars_mixin import PolarsMixin
22
22
  from lumibot.entities import Asset, Bars
23
- from lumibot.tools import databento_helper_polars
23
+ from lumibot.tools import databento_helper_polars, databento_helper
24
24
  from lumibot.tools.lumibot_logger import get_logger
25
25
 
26
26
  logger = get_logger(__name__)
@@ -71,8 +71,88 @@ class DataBentoDataPolarsBacktesting(PolarsMixin, DataSourceBacktesting):
71
71
  self._prefetch_cache: Dict[tuple, bool] = {}
72
72
  self._prefetched_assets = set() # Track which assets have been fully loaded
73
73
 
74
+ # OPTIMIZATION: Iteration-level filtered bars cache (same as Pandas)
75
+ self._filtered_bars_cache = {} # {(asset_key, length, timestep, timeshift, dt): DataFrame}
76
+ self._bars_cache_datetime = None # Track when to invalidate bars cache
77
+
78
+ # Futures multiplier cache - track which assets have had multipliers fetched
79
+ self._multiplier_fetched_assets = set()
80
+
74
81
  logger.info(f"DataBento backtesting initialized for period: {datetime_start} to {datetime_end}")
75
82
 
83
+ def _ensure_futures_multiplier(self, asset):
84
+ """
85
+ Ensure futures asset has correct multiplier set.
86
+
87
+ This method is idempotent and cached - safe to call multiple times.
88
+ Only fetches multiplier once per unique asset.
89
+
90
+ Design rationale:
91
+ - Futures multipliers must be fetched from data provider (e.g., DataBento)
92
+ - Asset class defaults to multiplier=1
93
+ - Data source is responsible for updating multiplier on first use
94
+ - Lazy fetching is more efficient than prefetching all possible assets
95
+
96
+ Parameters
97
+ ----------
98
+ asset : Asset
99
+ The asset to ensure has correct multiplier
100
+ """
101
+ # Skip if not a futures asset
102
+ if asset.asset_type not in (Asset.AssetType.FUTURE, Asset.AssetType.CONT_FUTURE):
103
+ return
104
+
105
+ # Skip if multiplier already set to non-default value
106
+ if asset.multiplier != 1:
107
+ return
108
+
109
+ # Create cache key to track which assets we've already processed
110
+ # Use symbol + asset_type + expiration to handle different contracts
111
+ cache_key = (asset.symbol, asset.asset_type, getattr(asset, 'expiration', None))
112
+
113
+ # Check if we already tried to fetch for this asset
114
+ if cache_key in self._multiplier_fetched_assets:
115
+ return # Already attempted (even if failed, don't retry every time)
116
+
117
+ # Mark as attempted to avoid redundant API calls
118
+ self._multiplier_fetched_assets.add(cache_key)
119
+
120
+ # Fetch and set multiplier from DataBento
121
+ try:
122
+ client = databento_helper.DataBentoClient(self._api_key)
123
+
124
+ # Resolve symbol based on asset type
125
+ if asset.asset_type == Asset.AssetType.CONT_FUTURE:
126
+ resolved_symbol = databento_helper._format_futures_symbol_for_databento(
127
+ asset, reference_date=self.datetime_start
128
+ )
129
+ else:
130
+ resolved_symbol = databento_helper._format_futures_symbol_for_databento(asset)
131
+
132
+ # Fetch multiplier from DataBento instrument definition
133
+ databento_helper._fetch_and_update_futures_multiplier(
134
+ client=client,
135
+ asset=asset,
136
+ resolved_symbol=resolved_symbol,
137
+ dataset="GLBX.MDP3",
138
+ reference_date=self.datetime_start
139
+ )
140
+
141
+ logger.info(f"Successfully set multiplier for {asset.symbol}: {asset.multiplier}")
142
+
143
+ except Exception as e:
144
+ logger.warning(f"Could not fetch multiplier for {asset.symbol}: {e}")
145
+
146
+ def _check_and_clear_bars_cache(self):
147
+ """
148
+ OPTIMIZATION: Clear iteration caches when datetime changes.
149
+ This prevents stale data from being returned across different backtest iterations.
150
+ """
151
+ current_dt = self.get_datetime()
152
+ if self._bars_cache_datetime != current_dt:
153
+ self._filtered_bars_cache.clear()
154
+ self._bars_cache_datetime = current_dt
155
+
76
156
  def _enforce_storage_limit(self, data_store: Dict[Asset, pl.LazyFrame]):
77
157
  """Enforce storage limit by removing least recently used data."""
78
158
  # Use mixin's enforce method
@@ -216,13 +296,20 @@ class DataBentoDataPolarsBacktesting(PolarsMixin, DataSourceBacktesting):
216
296
  self._prefetched_assets.add(search_asset)
217
297
  return
218
298
 
219
- # Get the start datetime and timestep unit
299
+ # Get the start datetime and timestep unit (includes length*timestep + buffer)
300
+ # This matches Pandas logic: start_datetime = (start_dt - length*timestep) - START_BUFFER
220
301
  start_datetime, ts_unit = self.get_start_datetime_and_ts_unit(
221
302
  length, timestep, start_dt, start_buffer=START_BUFFER
222
303
  )
223
304
 
224
- # Fetch data for ENTIRE backtest period (like pandas does)
225
- start_datetime = self.datetime_start - START_BUFFER
305
+ # FIX: Ensure timezone-aware datetime for API call (matches Pandas behavior)
306
+ # Polars was passing naive datetime, causing DataBento to treat it as UTC instead of ET
307
+ # This caused fetching wrong data (18 hours off!)
308
+ start_datetime = self.to_default_timezone(start_datetime)
309
+
310
+ # FIX: Don't override start_datetime! Use the calculated value that includes bars + buffer
311
+ # The old code set start_datetime = self.datetime_start - START_BUFFER which was wrong
312
+ # It didn't account for the requested bar length, causing missing data
226
313
  end_datetime = self.datetime_end + timedelta(days=1)
227
314
 
228
315
  logger.info(f"Prefetching {asset_separated.symbol} data from {start_datetime.date()} to {end_datetime.date()}")
@@ -244,6 +331,9 @@ class DataBentoDataPolarsBacktesting(PolarsMixin, DataSourceBacktesting):
244
331
 
245
332
  # Download data from DataBento using polars helper
246
333
  try:
334
+ # CRITICAL FIX: Use start_datetime as reference_date to match Pandas behavior!
335
+ # Pandas passes reference_date=start (WITH buffer included) - see databento_helper.py line 797
336
+ # This determines which futures contract is active at that time
247
337
  df = databento_helper_polars.get_price_data_from_databento_polars(
248
338
  api_key=self._api_key,
249
339
  asset=asset_separated,
@@ -251,7 +341,8 @@ class DataBentoDataPolarsBacktesting(PolarsMixin, DataSourceBacktesting):
251
341
  end=end_datetime,
252
342
  timestep=timestep,
253
343
  venue=None,
254
- force_cache_update=False
344
+ force_cache_update=False,
345
+ reference_date=start_datetime # MUST match Pandas: reference_date=start (WITH buffer)
255
346
  )
256
347
  except Exception as e:
257
348
  # Handle all exceptions
@@ -291,10 +382,31 @@ class DataBentoDataPolarsBacktesting(PolarsMixin, DataSourceBacktesting):
291
382
  ) -> Optional[pl.DataFrame]:
292
383
  """Pull bars with maximum efficiency using pre-filtered cache."""
293
384
 
294
- # Build search key
295
- search_asset = asset if not isinstance(asset, tuple) else asset
296
- if quote:
297
- search_asset = (asset, quote)
385
+ # OPTIMIZATION: Check iteration cache first
386
+ self._check_and_clear_bars_cache()
387
+ current_dt = self.get_datetime()
388
+
389
+ # Build search key - MUST match _update_data logic!
390
+ # Default quote to USD forex if not provided (matches _update_data)
391
+ search_asset = asset
392
+ quote_asset = quote if quote is not None else Asset("USD", "forex")
393
+
394
+ if isinstance(asset, tuple):
395
+ search_asset, quote_asset = asset
396
+ else:
397
+ search_asset = (asset, quote_asset)
398
+
399
+ # OPTIMIZATION: Build cache key and check filtered bars cache (same as Pandas)
400
+ timeshift_key = 0
401
+ if timeshift:
402
+ if isinstance(timeshift, int):
403
+ timeshift_key = timeshift
404
+ elif hasattr(timeshift, 'total_seconds'):
405
+ timeshift_key = int(timeshift.total_seconds() / 60)
406
+
407
+ bars_cache_key = (search_asset, length, timestep, timeshift_key, current_dt)
408
+ if bars_cache_key in self._filtered_bars_cache:
409
+ return self._filtered_bars_cache[bars_cache_key]
298
410
 
299
411
  # For daily timestep, use optimized caching strategy
300
412
  if timestep == "day":
@@ -307,19 +419,18 @@ class DataBentoDataPolarsBacktesting(PolarsMixin, DataSourceBacktesting):
307
419
  if len(result) >= length:
308
420
  return result.tail(length)
309
421
 
310
- # Get the current datetime and calculate the start datetime
311
- current_dt = self.get_datetime()
312
- # Get data from DataBento
313
- self._update_data(asset, quote, length, timestep, current_dt)
314
-
315
- # Get lazy data
316
- search_asset = asset if not isinstance(asset, tuple) else asset
317
- if quote:
318
- search_asset = (asset, quote)
422
+ # FIX: Pass None as start_dt to match Pandas behavior
423
+ # Pandas uses self.datetime_start as reference, not current iteration time
424
+ # This ensures we fetch enough historical data for all iterations
425
+ self._update_data(asset, quote, length, timestep, start_dt=None)
319
426
 
427
+ # Get lazy data - use the same search_asset key we already built
320
428
  lazy_data = self._get_data_lazy(search_asset)
429
+ logger.info(f"[POLARS-DEBUG] _get_data_lazy returned: {lazy_data is not None}, search_asset={search_asset}")
430
+ logger.info(f"[POLARS-DEBUG] Data store keys: {list(self._data_store.keys())}")
321
431
 
322
432
  if lazy_data is None:
433
+ logger.warning(f"[POLARS-DEBUG] lazy_data is None for search_asset={search_asset}")
323
434
  return None
324
435
 
325
436
  # Use lazy evaluation and collect only when needed
@@ -336,28 +447,43 @@ class DataBentoDataPolarsBacktesting(PolarsMixin, DataSourceBacktesting):
336
447
  # For minute data, collect on demand
337
448
  data = lazy_data.collect()
338
449
 
450
+ logger.info(f"[POLARS-DEBUG] After collect: data shape={data.shape if data is not None else 'None'}")
451
+
339
452
  # OPTIMIZATION: Direct filtering on eager DataFrame
340
453
  current_dt = self.to_default_timezone(self._datetime)
341
454
 
342
- # Determine end filter
343
- if timestep == "day":
344
- dt = self._datetime.replace(hour=23, minute=59, second=59, microsecond=999999)
345
- end_filter = dt - timedelta(days=1)
346
- else:
347
- end_filter = current_dt
455
+ # Determine end filter - CRITICAL: Must match pandas logic!
456
+ # For backtesting, we need to exclude the in-progress bar
457
+ # IMPORTANT: Use the current datetime directly, not minus 1 bar
458
+ # The filter uses < (not <=) to exclude the current bar
459
+ use_strict_less_than = False # Use < instead of <=
348
460
 
349
461
  if timeshift:
462
+ # When timeshift is present, use <= with adjusted end_filter
350
463
  if isinstance(timeshift, int):
351
- timeshift = timedelta(days=timeshift)
352
- end_filter = end_filter - timeshift
464
+ # Match pandas implementation: interpret integer timeshift as minutes
465
+ timeshift = timedelta(minutes=timeshift)
466
+ if timestep == "day":
467
+ dt = self._datetime.replace(hour=23, minute=59, second=59, microsecond=999999)
468
+ end_filter = dt - timedelta(days=1) - timeshift
469
+ elif timestep == "hour":
470
+ end_filter = current_dt - timedelta(hours=1) - timeshift
471
+ else:
472
+ end_filter = current_dt - timedelta(minutes=1) - timeshift
473
+ else:
474
+ # No timeshift: use current_dt with < operator (matches Pandas behavior)
475
+ end_filter = current_dt
476
+ use_strict_less_than = True
353
477
 
354
478
  logger.debug(f"Filtering {asset.symbol} data: current_dt={current_dt}, end_filter={end_filter}, timestep={timestep}, timeshift={timeshift}")
355
479
 
356
480
  # Convert to lazy frame for filtering
357
481
  lazy_data = data.lazy() if not hasattr(data, 'collect') else data
482
+ logger.info(f"[POLARS-DEBUG] Before filter: lazy_data type={type(lazy_data)}, end_filter={end_filter}, length={length}, use_strict_less_than={use_strict_less_than}")
358
483
 
359
484
  # Use mixin's filter method
360
- result = self._filter_data_polars(search_asset, lazy_data, end_filter, length, timestep)
485
+ result = self._filter_data_polars(search_asset, lazy_data, end_filter, length, timestep, use_strict_less_than=use_strict_less_than)
486
+ logger.info(f"[POLARS-DEBUG] After filter: result shape={result.shape if result is not None else 'None'}")
361
487
 
362
488
  if result is None:
363
489
  return None
@@ -370,6 +496,12 @@ class DataBentoDataPolarsBacktesting(PolarsMixin, DataSourceBacktesting):
370
496
 
371
497
  logger.debug(f"Returning {len(result)} bars for {asset.symbol}")
372
498
 
499
+ # OPTIMIZATION: Cache the result before returning (same as Pandas)
500
+ if result is not None and not result.is_empty():
501
+ self._filtered_bars_cache[bars_cache_key] = result
502
+ else:
503
+ self._filtered_bars_cache[bars_cache_key] = None
504
+
373
505
  return result
374
506
 
375
507
  def _parse_source_symbol_bars(
@@ -408,6 +540,9 @@ class DataBentoDataPolarsBacktesting(PolarsMixin, DataSourceBacktesting):
408
540
  if cached_price is not None:
409
541
  return cached_price
410
542
 
543
+ # Ensure futures have correct multiplier set
544
+ self._ensure_futures_multiplier(asset)
545
+
411
546
  try:
412
547
  dt = self.get_datetime()
413
548
  self._update_data(asset, quote, 1, timestep, dt)
@@ -417,34 +552,40 @@ class DataBentoDataPolarsBacktesting(PolarsMixin, DataSourceBacktesting):
417
552
  self._cache_last_price_polars(asset, None, current_datetime, timestep)
418
553
  return None
419
554
 
420
- # Get price efficiently
421
- # For daily data, don't apply additional timeshift since _pull_source_symbol_bars
422
- # already handles getting the previous day's data
423
- # Only request 1 bar for efficiency (matching pandas implementation)
424
- timeshift = None if timestep == "day" else timedelta(days=-1)
425
- length = 1
426
-
555
+ # Request a single completed bar (aligns with pandas implementation)
427
556
  bars_data = self._pull_source_symbol_bars(
428
- asset, length, timestep=timestep, timeshift=timeshift, quote=quote
557
+ asset, 1, timestep=timestep, timeshift=None, quote=quote
429
558
  )
430
559
 
431
560
  if bars_data is None or len(bars_data) == 0:
432
- logger.warning(f"No bars data for {asset.symbol} at {current_datetime}")
561
+ logger.warning(f"[POLARS-DEBUG] ✗✗✗ NO BARS DATA for {asset.symbol} at {current_datetime}, timestep={timestep}")
562
+ logger.warning(f"[POLARS-DEBUG] Data store keys: {list(self._data_store.keys())}")
433
563
  self._cache_last_price_polars(asset, None, current_datetime, timestep)
434
564
  return None
435
565
 
436
- # Direct column access - since we only request 1 bar, take the first (and only) element
437
- open_price = bars_data["open"][0]
566
+ # Use the close of the most recent completed bar (pandas parity)
567
+ if "close" not in bars_data.columns:
568
+ logger.warning(f"[POLARS-DEBUG] ✗✗✗ Close column missing for {asset.symbol}")
569
+ self._cache_last_price_polars(asset, None, current_datetime, timestep)
570
+ return None
438
571
 
439
- # Convert if needed
440
- if isinstance(open_price, (np.int64, np.integer)):
441
- open_price = Decimal(int(open_price))
442
- elif isinstance(open_price, (np.float64, np.floating)):
443
- open_price = float(open_price)
572
+ last_close = bars_data.select(pl.col("close").tail(1)).item()
444
573
 
445
- # Use mixin's cache method
446
- self._cache_last_price_polars(asset, open_price, current_datetime, timestep)
447
- return open_price
574
+ if last_close is None:
575
+ logger.warning(f"[POLARS-DEBUG] ✗✗✗ Unable to extract close price for {asset.symbol}")
576
+ self._cache_last_price_polars(asset, None, current_datetime, timestep)
577
+ return None
578
+
579
+ if isinstance(last_close, (np.int64, np.integer)):
580
+ price_value = Decimal(int(last_close))
581
+ elif isinstance(last_close, (np.float64, np.floating)):
582
+ price_value = float(last_close)
583
+ else:
584
+ price_value = float(last_close)
585
+
586
+ self._cache_last_price_polars(asset, price_value, current_datetime, timestep)
587
+ logger.info(f"[POLARS-DEBUG] Returning price from bars (close): {price_value}")
588
+ return price_value
448
589
 
449
590
  def get_historical_prices(
450
591
  self,
@@ -458,7 +599,7 @@ class DataBentoDataPolarsBacktesting(PolarsMixin, DataSourceBacktesting):
458
599
  return_polars: bool = False,
459
600
  ) -> Optional[Bars]:
460
601
  """Get historical prices using polars."""
461
- logger.debug(f"get_historical_prices called for {asset.symbol}")
602
+ logger.info(f"[POLARS-DEBUG] get_historical_prices called: asset={asset.symbol}, length={length}, timestep={timestep}, datetime={self._datetime}")
462
603
  if timestep is None:
463
604
  timestep = self.get_timestep()
464
605
 
@@ -473,12 +614,17 @@ class DataBentoDataPolarsBacktesting(PolarsMixin, DataSourceBacktesting):
473
614
  )
474
615
 
475
616
  if bars_data is None:
617
+ logger.warning(f"[POLARS-DEBUG] ✗✗✗ _pull_source_symbol_bars returned None for {asset.symbol}")
476
618
  return None
477
619
 
620
+ logger.info(f"[POLARS-DEBUG] _pull_source_symbol_bars returned {len(bars_data)} bars")
621
+
478
622
  # Create and return Bars object
479
- return self._parse_source_symbol_bars(
623
+ result = self._parse_source_symbol_bars(
480
624
  bars_data, asset, quote=quote, length=length, return_polars=return_polars
481
625
  )
626
+ logger.info(f"[POLARS-DEBUG] Returning Bars object: {result is not None}")
627
+ return result
482
628
 
483
629
  def get_chains(self, asset: Asset, quote: Asset = None, exchange: str = None):
484
630
  """Get option chains - not implemented for DataBento."""
@@ -103,8 +103,8 @@ class PandasData(DataSourceBacktesting):
103
103
  df = pd.DataFrame(range(len(dt_index)), index=dt_index)
104
104
  df = df.sort_index()
105
105
 
106
- # Create a column for the date portion only
107
- df["dates"] = df.index.date
106
+ # Create a column for the date portion only (normalize to date, keeping as datetime64 type)
107
+ df["dates"] = df.index.normalize()
108
108
 
109
109
  # Merge with the trading calendar on the 'dates' column to get market open/close times.
110
110
  # Use a left join to keep all rows from the original index.
@@ -145,7 +145,8 @@ class PandasData(DataSourceBacktesting):
145
145
 
146
146
  else:
147
147
  pcal.columns = ["datetime"]
148
- pcal["date"] = pcal["datetime"].dt.date
148
+ # Normalize to date but keep as datetime64 type (not date objects)
149
+ pcal["date"] = pcal["datetime"].dt.normalize()
149
150
  result = pcal.groupby("date").agg(
150
151
  market_open=(
151
152
  "datetime",
@@ -290,6 +291,8 @@ class PandasData(DataSourceBacktesting):
290
291
  ask=ohlcv_bid_ask_dict.get('ask'),
291
292
  volume=ohlcv_bid_ask_dict.get('volume'),
292
293
  timestamp=dt,
294
+ bid_size=ohlcv_bid_ask_dict.get('bid_size'),
295
+ ask_size=ohlcv_bid_ask_dict.get('ask_size'),
293
296
  raw_data=ohlcv_bid_ask_dict
294
297
  )
295
298
  else:
@@ -72,17 +72,19 @@ class PolarsMixin:
72
72
 
73
73
  def _get_data_lazy(self, asset: Asset) -> Optional[pl.LazyFrame]:
74
74
  """Get lazy frame for asset.
75
-
75
+
76
76
  Parameters
77
77
  ----------
78
- asset : Asset
79
- The asset to get data for
80
-
78
+ asset : Asset or tuple
79
+ The asset to get data for (can be a tuple of (asset, quote))
80
+
81
81
  Returns
82
82
  -------
83
83
  Optional[pl.LazyFrame]
84
84
  The lazy frame or None if not found
85
85
  """
86
+ # CRITICAL FIX: Handle both Asset and (Asset, quote) tuple keys
87
+ # The data store uses tuple keys (asset, quote), so we need to look up by that key
86
88
  return self._data_store.get(asset)
87
89
 
88
90
  def _parse_source_symbol_bars_polars(
@@ -95,7 +97,7 @@ class PolarsMixin:
95
97
  return_polars: bool = False
96
98
  ) -> Bars:
97
99
  """Parse bars from polars DataFrame.
98
-
100
+
99
101
  Parameters
100
102
  ----------
101
103
  response : pl.DataFrame
@@ -108,7 +110,7 @@ class PolarsMixin:
108
110
  The quote asset for forex/crypto
109
111
  length : Optional[int]
110
112
  Limit the number of bars
111
-
113
+
112
114
  Returns
113
115
  -------
114
116
  Bars
@@ -121,6 +123,21 @@ class PolarsMixin:
121
123
  if length and len(response) > length:
122
124
  response = response.tail(length)
123
125
 
126
+ # Filter to only keep OHLCV + datetime columns (remove DataBento metadata like rtype, publisher_id, etc.)
127
+ # Required columns for strategies
128
+ required_cols = ['open', 'high', 'low', 'close', 'volume']
129
+ optional_cols = ['datetime', 'timestamp', 'date', 'time', 'dividend', 'stock_splits', 'symbol']
130
+
131
+ # Determine which columns to keep
132
+ keep_cols = []
133
+ for col in response.columns:
134
+ if col in required_cols or col in optional_cols:
135
+ keep_cols.append(col)
136
+
137
+ # Select only the relevant columns
138
+ if keep_cols:
139
+ response = response.select(keep_cols)
140
+
124
141
  # Create bars object
125
142
  bars = Bars(response, source, asset, raw=response, quote=quote, return_polars=return_polars)
126
143
  return bars
@@ -209,22 +226,45 @@ class PolarsMixin:
209
226
  self._last_price_cache[cache_key] = price
210
227
 
211
228
  def _convert_datetime_for_filtering(self, dt: Any) -> datetime:
212
- """Convert datetime to naive datetime for filtering.
213
-
229
+ """Convert datetime to naive UTC datetime for filtering.
230
+
231
+ CRITICAL FIX: Must convert to UTC BEFORE stripping timezone!
232
+ If we strip timezone from ET datetime, we lose 5 hours of data.
233
+
234
+ Example:
235
+ - Input: 2024-01-02 18:00:00-05:00 (ET)
236
+ - Convert to UTC: 2024-01-02 23:00:00+00:00
237
+ - Strip timezone: 2024-01-02 23:00:00 (naive UTC)
238
+
239
+ OLD BUGGY CODE:
240
+ - Input: 2024-01-02 18:00:00-05:00 (ET)
241
+ - Strip timezone: 2024-01-02 18:00:00 (naive, loses timezone!)
242
+ - Compare to cached data in naive UTC: WRONG by 5 hours!
243
+
214
244
  Parameters
215
245
  ----------
216
246
  dt : Any
217
247
  Datetime-like object
218
-
248
+
219
249
  Returns
220
250
  -------
221
251
  datetime
222
- Naive datetime object
252
+ Naive UTC datetime object
223
253
  """
224
- if hasattr(dt, 'tz_localize'):
225
- return dt.tz_localize(None)
254
+ from datetime import timezone
255
+
256
+ # First convert to UTC if timezone-aware
257
+ if hasattr(dt, 'tzinfo') and dt.tzinfo is not None:
258
+ # Convert to UTC
259
+ dt_utc = dt.astimezone(timezone.utc)
260
+ # Then strip timezone
261
+ return dt_utc.replace(tzinfo=None)
262
+ elif hasattr(dt, 'tz_localize'):
263
+ # Pandas Timestamp
264
+ return dt.tz_convert('UTC').tz_localize(None)
226
265
  elif hasattr(dt, 'replace'):
227
- return dt.replace(tzinfo=None)
266
+ # Already naive
267
+ return dt
228
268
  else:
229
269
  return dt
230
270
 
@@ -283,10 +323,11 @@ class PolarsMixin:
283
323
  lazy_data: pl.LazyFrame,
284
324
  end_filter: datetime,
285
325
  length: int,
286
- timestep: str = "minute"
326
+ timestep: str = "minute",
327
+ use_strict_less_than: bool = False
287
328
  ) -> Optional[pl.DataFrame]:
288
329
  """Filter data up to end_filter and return last length rows.
289
-
330
+
290
331
  Parameters
291
332
  ----------
292
333
  asset : Asset
@@ -299,15 +340,23 @@ class PolarsMixin:
299
340
  Number of rows to return
300
341
  timestep : str
301
342
  Timestep for caching strategy
302
-
343
+ use_strict_less_than : bool
344
+ If True, use < instead of <= for filtering (matches Pandas behavior without timeshift)
345
+
303
346
  Returns
304
347
  -------
305
348
  Optional[pl.DataFrame]
306
349
  Filtered dataframe or None
307
350
  """
351
+ # DEBUG
352
+ logger.debug(f"[POLARS FILTER] end_filter={end_filter}, tzinfo={end_filter.tzinfo if hasattr(end_filter, 'tzinfo') else 'N/A'}, length={length}")
353
+
308
354
  # Convert end_filter to naive
309
355
  end_filter_naive = self._convert_datetime_for_filtering(end_filter)
310
356
 
357
+ # DEBUG
358
+ logger.debug(f"[POLARS FILTER] end_filter_naive={end_filter_naive}")
359
+
311
360
  # For daily timestep, use caching
312
361
  if timestep == "day":
313
362
  current_date = end_filter.date() if hasattr(end_filter, 'date') else end_filter
@@ -335,11 +384,37 @@ class PolarsMixin:
335
384
  return None
336
385
 
337
386
  # Filter and collect
387
+ # CRITICAL FIX: Keep timezone info! Match the DataFrame's timezone
388
+ # Get the DataFrame column's timezone from schema
389
+ dt_dtype = schema[dt_col]
390
+
391
+ # Convert filter to match DataFrame's timezone
392
+ if hasattr(dt_dtype, 'time_zone') and dt_dtype.time_zone:
393
+ # DataFrame has timezone, convert filter to match
394
+ import pytz
395
+ df_tz = pytz.timezone(dt_dtype.time_zone)
396
+ end_filter_with_tz = pytz.utc.localize(end_filter_naive).astimezone(df_tz)
397
+ else:
398
+ # DataFrame is naive, use UTC
399
+ from datetime import timezone as tz
400
+ end_filter_with_tz = datetime.combine(
401
+ end_filter_naive.date(),
402
+ end_filter_naive.time(),
403
+ tzinfo=tz.utc
404
+ )
405
+
406
+ # CRITICAL FIX: Deduplicate before caching
407
+ # Use < or <= based on use_strict_less_than flag
408
+ if use_strict_less_than:
409
+ filter_expr = pl.col(dt_col) < end_filter_with_tz
410
+ else:
411
+ filter_expr = pl.col(dt_col) <= end_filter_with_tz
412
+
338
413
  result = (
339
414
  lazy_data
340
- .with_columns(pl.col(dt_col).cast(pl.Datetime("us")))
341
- .filter(pl.col(dt_col) <= end_filter_naive)
415
+ .filter(filter_expr)
342
416
  .sort(dt_col)
417
+ .unique(subset=[dt_col], keep='last', maintain_order=True)
343
418
  .tail(fetch_length)
344
419
  .collect()
345
420
  )
@@ -362,11 +437,41 @@ class PolarsMixin:
362
437
  logger.error("No datetime column found")
363
438
  return None
364
439
 
365
- return (
440
+ # CRITICAL FIX: Keep timezone info during filtering!
441
+ # Match the DataFrame's timezone to avoid comparison errors
442
+ # Get the DataFrame column's timezone from schema
443
+ dt_dtype = schema[dt_col]
444
+
445
+ # Convert filter to match DataFrame's timezone
446
+ if hasattr(dt_dtype, 'time_zone') and dt_dtype.time_zone:
447
+ # DataFrame has timezone, convert filter to match
448
+ import pytz
449
+ df_tz = pytz.timezone(dt_dtype.time_zone)
450
+ end_filter_with_tz = pytz.utc.localize(end_filter_naive).astimezone(df_tz)
451
+ else:
452
+ # DataFrame is naive, use UTC
453
+ from datetime import timezone as tz
454
+ end_filter_with_tz = datetime.combine(
455
+ end_filter_naive.date(),
456
+ end_filter_naive.time(),
457
+ tzinfo=tz.utc
458
+ )
459
+
460
+ # CRITICAL FIX: Deduplicate before returning
461
+ # Sometimes lazy operations can create duplicates
462
+ # Use < or <= based on use_strict_less_than flag
463
+ if use_strict_less_than:
464
+ filter_expr = pl.col(dt_col) < end_filter_with_tz
465
+ else:
466
+ filter_expr = pl.col(dt_col) <= end_filter_with_tz
467
+
468
+ result = (
366
469
  lazy_data
367
- .with_columns(pl.col(dt_col).cast(pl.Datetime("us")))
368
- .filter(pl.col(dt_col) <= end_filter_naive)
470
+ .filter(filter_expr)
369
471
  .sort(dt_col)
472
+ .unique(subset=[dt_col], keep='last', maintain_order=True)
370
473
  .tail(length)
371
474
  .collect()
372
475
  )
476
+
477
+ return result