lumibot 4.0.23__py3-none-any.whl → 4.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lumibot might be problematic. Click here for more details.
- lumibot/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/__pycache__/constants.cpython-312.pyc +0 -0
- lumibot/__pycache__/credentials.cpython-312.pyc +0 -0
- lumibot/backtesting/__init__.py +6 -5
- lumibot/backtesting/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/alpaca_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/alpha_vantage_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/backtesting_broker.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/ccxt_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/databento_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/interactive_brokers_rest_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/pandas_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/polygon_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/thetadata_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/yahoo_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/backtesting_broker.py +209 -9
- lumibot/backtesting/databento_backtesting.py +145 -24
- lumibot/backtesting/thetadata_backtesting.py +63 -42
- lumibot/brokers/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/alpaca.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/bitunix.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/broker.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/ccxt.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/example_broker.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/interactive_brokers.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/interactive_brokers_rest.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/projectx.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/schwab.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/tradier.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/tradovate.cpython-312.pyc +0 -0
- lumibot/brokers/alpaca.py +11 -1
- lumibot/brokers/tradeovate.py +475 -0
- lumibot/components/grok_news_helper.py +284 -0
- lumibot/components/options_helper.py +90 -34
- lumibot/credentials.py +3 -0
- lumibot/data_sources/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/alpaca_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/alpha_vantage_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/bitunix_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/ccxt_backtesting_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/ccxt_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/data_source.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/data_source_backtesting.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/databento_data_polars_backtesting.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/databento_data_polars_live.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/example_broker_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/exceptions.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/interactive_brokers_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/interactive_brokers_rest_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/pandas_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/polars_mixin.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/polygon_data_polars.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/projectx_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/schwab_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/tradier_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/tradovate_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/yahoo_data_polars.cpython-312.pyc +0 -0
- lumibot/data_sources/data_source_backtesting.py +3 -5
- lumibot/data_sources/databento_data_polars_backtesting.py +194 -48
- lumibot/data_sources/pandas_data.py +6 -3
- lumibot/data_sources/polars_mixin.py +126 -21
- lumibot/data_sources/tradeovate_data.py +80 -0
- lumibot/data_sources/tradier_data.py +2 -1
- lumibot/entities/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/asset.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/bar.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/bars.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/chains.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/data.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/dataline.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/order.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/position.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/quote.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/trading_fee.cpython-312.pyc +0 -0
- lumibot/entities/asset.py +8 -0
- lumibot/entities/order.py +1 -1
- lumibot/entities/quote.py +14 -0
- lumibot/example_strategies/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/example_strategies/__pycache__/test_broker_functions.cpython-312-pytest-8.4.1.pyc +0 -0
- lumibot/strategies/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/strategies/__pycache__/_strategy.cpython-312.pyc +0 -0
- lumibot/strategies/__pycache__/strategy.cpython-312.pyc +0 -0
- lumibot/strategies/__pycache__/strategy_executor.cpython-312.pyc +0 -0
- lumibot/strategies/_strategy.py +95 -27
- lumibot/strategies/strategy.py +5 -6
- lumibot/strategies/strategy_executor.py +2 -2
- lumibot/tools/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/alpaca_helpers.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/bitunix_helpers.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/black_scholes.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/ccxt_data_store.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/databento_helper.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/databento_helper_polars.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/debugers.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/decorators.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/helpers.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/indicators.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/lumibot_logger.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/pandas.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/polygon_helper.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/polygon_helper_async.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/polygon_helper_polars_optimized.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/projectx_helpers.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/schwab_helper.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/thetadata_helper.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/types.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/yahoo_helper.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/yahoo_helper_polars_optimized.cpython-312.pyc +0 -0
- lumibot/tools/databento_helper.py +384 -133
- lumibot/tools/databento_helper_polars.py +218 -156
- lumibot/tools/databento_roll.py +216 -0
- lumibot/tools/lumibot_logger.py +32 -17
- lumibot/tools/polygon_helper.py +65 -0
- lumibot/tools/thetadata_helper.py +588 -70
- lumibot/traders/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/traders/__pycache__/trader.cpython-312.pyc +0 -0
- lumibot/traders/trader.py +1 -1
- lumibot/trading_builtins/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/trading_builtins/__pycache__/custom_stream.cpython-312.pyc +0 -0
- lumibot/trading_builtins/__pycache__/safe_list.cpython-312.pyc +0 -0
- lumibot-4.1.1.data/data/ThetaTerminal.jar +0 -0
- {lumibot-4.0.23.dist-info → lumibot-4.1.1.dist-info}/METADATA +1 -2
- {lumibot-4.0.23.dist-info → lumibot-4.1.1.dist-info}/RECORD +161 -44
- tests/backtest/check_timing_offset.py +198 -0
- tests/backtest/check_volume_spike.py +112 -0
- tests/backtest/comprehensive_comparison.py +166 -0
- tests/backtest/debug_comparison.py +91 -0
- tests/backtest/diagnose_price_difference.py +97 -0
- tests/backtest/direct_api_comparison.py +203 -0
- tests/backtest/profile_thetadata_vs_polygon.py +255 -0
- tests/backtest/root_cause_analysis.py +109 -0
- tests/backtest/test_accuracy_verification.py +244 -0
- tests/backtest/test_daily_data_timestamp_comparison.py +801 -0
- tests/backtest/test_databento.py +4 -0
- tests/backtest/test_databento_comprehensive_trading.py +564 -0
- tests/backtest/test_debug_avg_fill_price.py +112 -0
- tests/backtest/test_dividends.py +8 -3
- tests/backtest/test_example_strategies.py +54 -47
- tests/backtest/test_futures_edge_cases.py +451 -0
- tests/backtest/test_futures_single_trade.py +270 -0
- tests/backtest/test_futures_ultra_simple.py +191 -0
- tests/backtest/test_index_data_verification.py +348 -0
- tests/backtest/test_polygon.py +45 -24
- tests/backtest/test_thetadata.py +246 -60
- tests/backtest/test_thetadata_comprehensive.py +729 -0
- tests/backtest/test_thetadata_vs_polygon.py +557 -0
- tests/backtest/test_yahoo.py +1 -2
- tests/conftest.py +20 -0
- tests/test_backtesting_data_source_env.py +249 -0
- tests/test_backtesting_quiet_logs_complete.py +10 -11
- tests/test_databento_helper.py +76 -90
- tests/test_databento_timezone_fixes.py +21 -4
- tests/test_get_historical_prices.py +6 -6
- tests/test_options_helper.py +162 -40
- tests/test_polygon_helper.py +21 -13
- tests/test_quiet_logs_requirements.py +5 -5
- tests/test_thetadata_helper.py +487 -171
- tests/test_yahoo_data.py +125 -0
- {lumibot-4.0.23.dist-info → lumibot-4.1.1.dist-info}/LICENSE +0 -0
- {lumibot-4.0.23.dist-info → lumibot-4.1.1.dist-info}/WHEEL +0 -0
- {lumibot-4.0.23.dist-info → lumibot-4.1.1.dist-info}/top_level.txt +0 -0
|
@@ -20,7 +20,7 @@ import polars as pl
|
|
|
20
20
|
from lumibot.data_sources import DataSourceBacktesting
|
|
21
21
|
from lumibot.data_sources.polars_mixin import PolarsMixin
|
|
22
22
|
from lumibot.entities import Asset, Bars
|
|
23
|
-
from lumibot.tools import databento_helper_polars
|
|
23
|
+
from lumibot.tools import databento_helper_polars, databento_helper
|
|
24
24
|
from lumibot.tools.lumibot_logger import get_logger
|
|
25
25
|
|
|
26
26
|
logger = get_logger(__name__)
|
|
@@ -71,8 +71,88 @@ class DataBentoDataPolarsBacktesting(PolarsMixin, DataSourceBacktesting):
|
|
|
71
71
|
self._prefetch_cache: Dict[tuple, bool] = {}
|
|
72
72
|
self._prefetched_assets = set() # Track which assets have been fully loaded
|
|
73
73
|
|
|
74
|
+
# OPTIMIZATION: Iteration-level filtered bars cache (same as Pandas)
|
|
75
|
+
self._filtered_bars_cache = {} # {(asset_key, length, timestep, timeshift, dt): DataFrame}
|
|
76
|
+
self._bars_cache_datetime = None # Track when to invalidate bars cache
|
|
77
|
+
|
|
78
|
+
# Futures multiplier cache - track which assets have had multipliers fetched
|
|
79
|
+
self._multiplier_fetched_assets = set()
|
|
80
|
+
|
|
74
81
|
logger.info(f"DataBento backtesting initialized for period: {datetime_start} to {datetime_end}")
|
|
75
82
|
|
|
83
|
+
def _ensure_futures_multiplier(self, asset):
|
|
84
|
+
"""
|
|
85
|
+
Ensure futures asset has correct multiplier set.
|
|
86
|
+
|
|
87
|
+
This method is idempotent and cached - safe to call multiple times.
|
|
88
|
+
Only fetches multiplier once per unique asset.
|
|
89
|
+
|
|
90
|
+
Design rationale:
|
|
91
|
+
- Futures multipliers must be fetched from data provider (e.g., DataBento)
|
|
92
|
+
- Asset class defaults to multiplier=1
|
|
93
|
+
- Data source is responsible for updating multiplier on first use
|
|
94
|
+
- Lazy fetching is more efficient than prefetching all possible assets
|
|
95
|
+
|
|
96
|
+
Parameters
|
|
97
|
+
----------
|
|
98
|
+
asset : Asset
|
|
99
|
+
The asset to ensure has correct multiplier
|
|
100
|
+
"""
|
|
101
|
+
# Skip if not a futures asset
|
|
102
|
+
if asset.asset_type not in (Asset.AssetType.FUTURE, Asset.AssetType.CONT_FUTURE):
|
|
103
|
+
return
|
|
104
|
+
|
|
105
|
+
# Skip if multiplier already set to non-default value
|
|
106
|
+
if asset.multiplier != 1:
|
|
107
|
+
return
|
|
108
|
+
|
|
109
|
+
# Create cache key to track which assets we've already processed
|
|
110
|
+
# Use symbol + asset_type + expiration to handle different contracts
|
|
111
|
+
cache_key = (asset.symbol, asset.asset_type, getattr(asset, 'expiration', None))
|
|
112
|
+
|
|
113
|
+
# Check if we already tried to fetch for this asset
|
|
114
|
+
if cache_key in self._multiplier_fetched_assets:
|
|
115
|
+
return # Already attempted (even if failed, don't retry every time)
|
|
116
|
+
|
|
117
|
+
# Mark as attempted to avoid redundant API calls
|
|
118
|
+
self._multiplier_fetched_assets.add(cache_key)
|
|
119
|
+
|
|
120
|
+
# Fetch and set multiplier from DataBento
|
|
121
|
+
try:
|
|
122
|
+
client = databento_helper.DataBentoClient(self._api_key)
|
|
123
|
+
|
|
124
|
+
# Resolve symbol based on asset type
|
|
125
|
+
if asset.asset_type == Asset.AssetType.CONT_FUTURE:
|
|
126
|
+
resolved_symbol = databento_helper._format_futures_symbol_for_databento(
|
|
127
|
+
asset, reference_date=self.datetime_start
|
|
128
|
+
)
|
|
129
|
+
else:
|
|
130
|
+
resolved_symbol = databento_helper._format_futures_symbol_for_databento(asset)
|
|
131
|
+
|
|
132
|
+
# Fetch multiplier from DataBento instrument definition
|
|
133
|
+
databento_helper._fetch_and_update_futures_multiplier(
|
|
134
|
+
client=client,
|
|
135
|
+
asset=asset,
|
|
136
|
+
resolved_symbol=resolved_symbol,
|
|
137
|
+
dataset="GLBX.MDP3",
|
|
138
|
+
reference_date=self.datetime_start
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
logger.info(f"Successfully set multiplier for {asset.symbol}: {asset.multiplier}")
|
|
142
|
+
|
|
143
|
+
except Exception as e:
|
|
144
|
+
logger.warning(f"Could not fetch multiplier for {asset.symbol}: {e}")
|
|
145
|
+
|
|
146
|
+
def _check_and_clear_bars_cache(self):
|
|
147
|
+
"""
|
|
148
|
+
OPTIMIZATION: Clear iteration caches when datetime changes.
|
|
149
|
+
This prevents stale data from being returned across different backtest iterations.
|
|
150
|
+
"""
|
|
151
|
+
current_dt = self.get_datetime()
|
|
152
|
+
if self._bars_cache_datetime != current_dt:
|
|
153
|
+
self._filtered_bars_cache.clear()
|
|
154
|
+
self._bars_cache_datetime = current_dt
|
|
155
|
+
|
|
76
156
|
def _enforce_storage_limit(self, data_store: Dict[Asset, pl.LazyFrame]):
|
|
77
157
|
"""Enforce storage limit by removing least recently used data."""
|
|
78
158
|
# Use mixin's enforce method
|
|
@@ -216,13 +296,20 @@ class DataBentoDataPolarsBacktesting(PolarsMixin, DataSourceBacktesting):
|
|
|
216
296
|
self._prefetched_assets.add(search_asset)
|
|
217
297
|
return
|
|
218
298
|
|
|
219
|
-
# Get the start datetime and timestep unit
|
|
299
|
+
# Get the start datetime and timestep unit (includes length*timestep + buffer)
|
|
300
|
+
# This matches Pandas logic: start_datetime = (start_dt - length*timestep) - START_BUFFER
|
|
220
301
|
start_datetime, ts_unit = self.get_start_datetime_and_ts_unit(
|
|
221
302
|
length, timestep, start_dt, start_buffer=START_BUFFER
|
|
222
303
|
)
|
|
223
304
|
|
|
224
|
-
#
|
|
225
|
-
|
|
305
|
+
# FIX: Ensure timezone-aware datetime for API call (matches Pandas behavior)
|
|
306
|
+
# Polars was passing naive datetime, causing DataBento to treat it as UTC instead of ET
|
|
307
|
+
# This caused fetching wrong data (18 hours off!)
|
|
308
|
+
start_datetime = self.to_default_timezone(start_datetime)
|
|
309
|
+
|
|
310
|
+
# FIX: Don't override start_datetime! Use the calculated value that includes bars + buffer
|
|
311
|
+
# The old code set start_datetime = self.datetime_start - START_BUFFER which was wrong
|
|
312
|
+
# It didn't account for the requested bar length, causing missing data
|
|
226
313
|
end_datetime = self.datetime_end + timedelta(days=1)
|
|
227
314
|
|
|
228
315
|
logger.info(f"Prefetching {asset_separated.symbol} data from {start_datetime.date()} to {end_datetime.date()}")
|
|
@@ -244,6 +331,9 @@ class DataBentoDataPolarsBacktesting(PolarsMixin, DataSourceBacktesting):
|
|
|
244
331
|
|
|
245
332
|
# Download data from DataBento using polars helper
|
|
246
333
|
try:
|
|
334
|
+
# CRITICAL FIX: Use start_datetime as reference_date to match Pandas behavior!
|
|
335
|
+
# Pandas passes reference_date=start (WITH buffer included) - see databento_helper.py line 797
|
|
336
|
+
# This determines which futures contract is active at that time
|
|
247
337
|
df = databento_helper_polars.get_price_data_from_databento_polars(
|
|
248
338
|
api_key=self._api_key,
|
|
249
339
|
asset=asset_separated,
|
|
@@ -251,7 +341,8 @@ class DataBentoDataPolarsBacktesting(PolarsMixin, DataSourceBacktesting):
|
|
|
251
341
|
end=end_datetime,
|
|
252
342
|
timestep=timestep,
|
|
253
343
|
venue=None,
|
|
254
|
-
force_cache_update=False
|
|
344
|
+
force_cache_update=False,
|
|
345
|
+
reference_date=start_datetime # MUST match Pandas: reference_date=start (WITH buffer)
|
|
255
346
|
)
|
|
256
347
|
except Exception as e:
|
|
257
348
|
# Handle all exceptions
|
|
@@ -291,10 +382,31 @@ class DataBentoDataPolarsBacktesting(PolarsMixin, DataSourceBacktesting):
|
|
|
291
382
|
) -> Optional[pl.DataFrame]:
|
|
292
383
|
"""Pull bars with maximum efficiency using pre-filtered cache."""
|
|
293
384
|
|
|
294
|
-
#
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
385
|
+
# OPTIMIZATION: Check iteration cache first
|
|
386
|
+
self._check_and_clear_bars_cache()
|
|
387
|
+
current_dt = self.get_datetime()
|
|
388
|
+
|
|
389
|
+
# Build search key - MUST match _update_data logic!
|
|
390
|
+
# Default quote to USD forex if not provided (matches _update_data)
|
|
391
|
+
search_asset = asset
|
|
392
|
+
quote_asset = quote if quote is not None else Asset("USD", "forex")
|
|
393
|
+
|
|
394
|
+
if isinstance(asset, tuple):
|
|
395
|
+
search_asset, quote_asset = asset
|
|
396
|
+
else:
|
|
397
|
+
search_asset = (asset, quote_asset)
|
|
398
|
+
|
|
399
|
+
# OPTIMIZATION: Build cache key and check filtered bars cache (same as Pandas)
|
|
400
|
+
timeshift_key = 0
|
|
401
|
+
if timeshift:
|
|
402
|
+
if isinstance(timeshift, int):
|
|
403
|
+
timeshift_key = timeshift
|
|
404
|
+
elif hasattr(timeshift, 'total_seconds'):
|
|
405
|
+
timeshift_key = int(timeshift.total_seconds() / 60)
|
|
406
|
+
|
|
407
|
+
bars_cache_key = (search_asset, length, timestep, timeshift_key, current_dt)
|
|
408
|
+
if bars_cache_key in self._filtered_bars_cache:
|
|
409
|
+
return self._filtered_bars_cache[bars_cache_key]
|
|
298
410
|
|
|
299
411
|
# For daily timestep, use optimized caching strategy
|
|
300
412
|
if timestep == "day":
|
|
@@ -307,19 +419,18 @@ class DataBentoDataPolarsBacktesting(PolarsMixin, DataSourceBacktesting):
|
|
|
307
419
|
if len(result) >= length:
|
|
308
420
|
return result.tail(length)
|
|
309
421
|
|
|
310
|
-
#
|
|
311
|
-
|
|
312
|
-
#
|
|
313
|
-
self._update_data(asset, quote, length, timestep,
|
|
314
|
-
|
|
315
|
-
# Get lazy data
|
|
316
|
-
search_asset = asset if not isinstance(asset, tuple) else asset
|
|
317
|
-
if quote:
|
|
318
|
-
search_asset = (asset, quote)
|
|
422
|
+
# FIX: Pass None as start_dt to match Pandas behavior
|
|
423
|
+
# Pandas uses self.datetime_start as reference, not current iteration time
|
|
424
|
+
# This ensures we fetch enough historical data for all iterations
|
|
425
|
+
self._update_data(asset, quote, length, timestep, start_dt=None)
|
|
319
426
|
|
|
427
|
+
# Get lazy data - use the same search_asset key we already built
|
|
320
428
|
lazy_data = self._get_data_lazy(search_asset)
|
|
429
|
+
logger.info(f"[POLARS-DEBUG] _get_data_lazy returned: {lazy_data is not None}, search_asset={search_asset}")
|
|
430
|
+
logger.info(f"[POLARS-DEBUG] Data store keys: {list(self._data_store.keys())}")
|
|
321
431
|
|
|
322
432
|
if lazy_data is None:
|
|
433
|
+
logger.warning(f"[POLARS-DEBUG] lazy_data is None for search_asset={search_asset}")
|
|
323
434
|
return None
|
|
324
435
|
|
|
325
436
|
# Use lazy evaluation and collect only when needed
|
|
@@ -336,28 +447,43 @@ class DataBentoDataPolarsBacktesting(PolarsMixin, DataSourceBacktesting):
|
|
|
336
447
|
# For minute data, collect on demand
|
|
337
448
|
data = lazy_data.collect()
|
|
338
449
|
|
|
450
|
+
logger.info(f"[POLARS-DEBUG] After collect: data shape={data.shape if data is not None else 'None'}")
|
|
451
|
+
|
|
339
452
|
# OPTIMIZATION: Direct filtering on eager DataFrame
|
|
340
453
|
current_dt = self.to_default_timezone(self._datetime)
|
|
341
454
|
|
|
342
|
-
# Determine end filter
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
end_filter = current_dt
|
|
455
|
+
# Determine end filter - CRITICAL: Must match pandas logic!
|
|
456
|
+
# For backtesting, we need to exclude the in-progress bar
|
|
457
|
+
# IMPORTANT: Use the current datetime directly, not minus 1 bar
|
|
458
|
+
# The filter uses < (not <=) to exclude the current bar
|
|
459
|
+
use_strict_less_than = False # Use < instead of <=
|
|
348
460
|
|
|
349
461
|
if timeshift:
|
|
462
|
+
# When timeshift is present, use <= with adjusted end_filter
|
|
350
463
|
if isinstance(timeshift, int):
|
|
351
|
-
timeshift
|
|
352
|
-
|
|
464
|
+
# Match pandas implementation: interpret integer timeshift as minutes
|
|
465
|
+
timeshift = timedelta(minutes=timeshift)
|
|
466
|
+
if timestep == "day":
|
|
467
|
+
dt = self._datetime.replace(hour=23, minute=59, second=59, microsecond=999999)
|
|
468
|
+
end_filter = dt - timedelta(days=1) - timeshift
|
|
469
|
+
elif timestep == "hour":
|
|
470
|
+
end_filter = current_dt - timedelta(hours=1) - timeshift
|
|
471
|
+
else:
|
|
472
|
+
end_filter = current_dt - timedelta(minutes=1) - timeshift
|
|
473
|
+
else:
|
|
474
|
+
# No timeshift: use current_dt with < operator (matches Pandas behavior)
|
|
475
|
+
end_filter = current_dt
|
|
476
|
+
use_strict_less_than = True
|
|
353
477
|
|
|
354
478
|
logger.debug(f"Filtering {asset.symbol} data: current_dt={current_dt}, end_filter={end_filter}, timestep={timestep}, timeshift={timeshift}")
|
|
355
479
|
|
|
356
480
|
# Convert to lazy frame for filtering
|
|
357
481
|
lazy_data = data.lazy() if not hasattr(data, 'collect') else data
|
|
482
|
+
logger.info(f"[POLARS-DEBUG] Before filter: lazy_data type={type(lazy_data)}, end_filter={end_filter}, length={length}, use_strict_less_than={use_strict_less_than}")
|
|
358
483
|
|
|
359
484
|
# Use mixin's filter method
|
|
360
|
-
result = self._filter_data_polars(search_asset, lazy_data, end_filter, length, timestep)
|
|
485
|
+
result = self._filter_data_polars(search_asset, lazy_data, end_filter, length, timestep, use_strict_less_than=use_strict_less_than)
|
|
486
|
+
logger.info(f"[POLARS-DEBUG] After filter: result shape={result.shape if result is not None else 'None'}")
|
|
361
487
|
|
|
362
488
|
if result is None:
|
|
363
489
|
return None
|
|
@@ -370,6 +496,12 @@ class DataBentoDataPolarsBacktesting(PolarsMixin, DataSourceBacktesting):
|
|
|
370
496
|
|
|
371
497
|
logger.debug(f"Returning {len(result)} bars for {asset.symbol}")
|
|
372
498
|
|
|
499
|
+
# OPTIMIZATION: Cache the result before returning (same as Pandas)
|
|
500
|
+
if result is not None and not result.is_empty():
|
|
501
|
+
self._filtered_bars_cache[bars_cache_key] = result
|
|
502
|
+
else:
|
|
503
|
+
self._filtered_bars_cache[bars_cache_key] = None
|
|
504
|
+
|
|
373
505
|
return result
|
|
374
506
|
|
|
375
507
|
def _parse_source_symbol_bars(
|
|
@@ -408,6 +540,9 @@ class DataBentoDataPolarsBacktesting(PolarsMixin, DataSourceBacktesting):
|
|
|
408
540
|
if cached_price is not None:
|
|
409
541
|
return cached_price
|
|
410
542
|
|
|
543
|
+
# Ensure futures have correct multiplier set
|
|
544
|
+
self._ensure_futures_multiplier(asset)
|
|
545
|
+
|
|
411
546
|
try:
|
|
412
547
|
dt = self.get_datetime()
|
|
413
548
|
self._update_data(asset, quote, 1, timestep, dt)
|
|
@@ -417,34 +552,40 @@ class DataBentoDataPolarsBacktesting(PolarsMixin, DataSourceBacktesting):
|
|
|
417
552
|
self._cache_last_price_polars(asset, None, current_datetime, timestep)
|
|
418
553
|
return None
|
|
419
554
|
|
|
420
|
-
#
|
|
421
|
-
# For daily data, don't apply additional timeshift since _pull_source_symbol_bars
|
|
422
|
-
# already handles getting the previous day's data
|
|
423
|
-
# Only request 1 bar for efficiency (matching pandas implementation)
|
|
424
|
-
timeshift = None if timestep == "day" else timedelta(days=-1)
|
|
425
|
-
length = 1
|
|
426
|
-
|
|
555
|
+
# Request a single completed bar (aligns with pandas implementation)
|
|
427
556
|
bars_data = self._pull_source_symbol_bars(
|
|
428
|
-
asset,
|
|
557
|
+
asset, 1, timestep=timestep, timeshift=None, quote=quote
|
|
429
558
|
)
|
|
430
559
|
|
|
431
560
|
if bars_data is None or len(bars_data) == 0:
|
|
432
|
-
logger.warning(f"
|
|
561
|
+
logger.warning(f"[POLARS-DEBUG] ✗✗✗ NO BARS DATA for {asset.symbol} at {current_datetime}, timestep={timestep}")
|
|
562
|
+
logger.warning(f"[POLARS-DEBUG] Data store keys: {list(self._data_store.keys())}")
|
|
433
563
|
self._cache_last_price_polars(asset, None, current_datetime, timestep)
|
|
434
564
|
return None
|
|
435
565
|
|
|
436
|
-
#
|
|
437
|
-
|
|
566
|
+
# Use the close of the most recent completed bar (pandas parity)
|
|
567
|
+
if "close" not in bars_data.columns:
|
|
568
|
+
logger.warning(f"[POLARS-DEBUG] ✗✗✗ Close column missing for {asset.symbol}")
|
|
569
|
+
self._cache_last_price_polars(asset, None, current_datetime, timestep)
|
|
570
|
+
return None
|
|
438
571
|
|
|
439
|
-
|
|
440
|
-
if isinstance(open_price, (np.int64, np.integer)):
|
|
441
|
-
open_price = Decimal(int(open_price))
|
|
442
|
-
elif isinstance(open_price, (np.float64, np.floating)):
|
|
443
|
-
open_price = float(open_price)
|
|
572
|
+
last_close = bars_data.select(pl.col("close").tail(1)).item()
|
|
444
573
|
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
574
|
+
if last_close is None:
|
|
575
|
+
logger.warning(f"[POLARS-DEBUG] ✗✗✗ Unable to extract close price for {asset.symbol}")
|
|
576
|
+
self._cache_last_price_polars(asset, None, current_datetime, timestep)
|
|
577
|
+
return None
|
|
578
|
+
|
|
579
|
+
if isinstance(last_close, (np.int64, np.integer)):
|
|
580
|
+
price_value = Decimal(int(last_close))
|
|
581
|
+
elif isinstance(last_close, (np.float64, np.floating)):
|
|
582
|
+
price_value = float(last_close)
|
|
583
|
+
else:
|
|
584
|
+
price_value = float(last_close)
|
|
585
|
+
|
|
586
|
+
self._cache_last_price_polars(asset, price_value, current_datetime, timestep)
|
|
587
|
+
logger.info(f"[POLARS-DEBUG] Returning price from bars (close): {price_value}")
|
|
588
|
+
return price_value
|
|
448
589
|
|
|
449
590
|
def get_historical_prices(
|
|
450
591
|
self,
|
|
@@ -458,7 +599,7 @@ class DataBentoDataPolarsBacktesting(PolarsMixin, DataSourceBacktesting):
|
|
|
458
599
|
return_polars: bool = False,
|
|
459
600
|
) -> Optional[Bars]:
|
|
460
601
|
"""Get historical prices using polars."""
|
|
461
|
-
logger.
|
|
602
|
+
logger.info(f"[POLARS-DEBUG] get_historical_prices called: asset={asset.symbol}, length={length}, timestep={timestep}, datetime={self._datetime}")
|
|
462
603
|
if timestep is None:
|
|
463
604
|
timestep = self.get_timestep()
|
|
464
605
|
|
|
@@ -473,12 +614,17 @@ class DataBentoDataPolarsBacktesting(PolarsMixin, DataSourceBacktesting):
|
|
|
473
614
|
)
|
|
474
615
|
|
|
475
616
|
if bars_data is None:
|
|
617
|
+
logger.warning(f"[POLARS-DEBUG] ✗✗✗ _pull_source_symbol_bars returned None for {asset.symbol}")
|
|
476
618
|
return None
|
|
477
619
|
|
|
620
|
+
logger.info(f"[POLARS-DEBUG] _pull_source_symbol_bars returned {len(bars_data)} bars")
|
|
621
|
+
|
|
478
622
|
# Create and return Bars object
|
|
479
|
-
|
|
623
|
+
result = self._parse_source_symbol_bars(
|
|
480
624
|
bars_data, asset, quote=quote, length=length, return_polars=return_polars
|
|
481
625
|
)
|
|
626
|
+
logger.info(f"[POLARS-DEBUG] Returning Bars object: {result is not None}")
|
|
627
|
+
return result
|
|
482
628
|
|
|
483
629
|
def get_chains(self, asset: Asset, quote: Asset = None, exchange: str = None):
|
|
484
630
|
"""Get option chains - not implemented for DataBento."""
|
|
@@ -103,8 +103,8 @@ class PandasData(DataSourceBacktesting):
|
|
|
103
103
|
df = pd.DataFrame(range(len(dt_index)), index=dt_index)
|
|
104
104
|
df = df.sort_index()
|
|
105
105
|
|
|
106
|
-
# Create a column for the date portion only
|
|
107
|
-
df["dates"] = df.index.
|
|
106
|
+
# Create a column for the date portion only (normalize to date, keeping as datetime64 type)
|
|
107
|
+
df["dates"] = df.index.normalize()
|
|
108
108
|
|
|
109
109
|
# Merge with the trading calendar on the 'dates' column to get market open/close times.
|
|
110
110
|
# Use a left join to keep all rows from the original index.
|
|
@@ -145,7 +145,8 @@ class PandasData(DataSourceBacktesting):
|
|
|
145
145
|
|
|
146
146
|
else:
|
|
147
147
|
pcal.columns = ["datetime"]
|
|
148
|
-
|
|
148
|
+
# Normalize to date but keep as datetime64 type (not date objects)
|
|
149
|
+
pcal["date"] = pcal["datetime"].dt.normalize()
|
|
149
150
|
result = pcal.groupby("date").agg(
|
|
150
151
|
market_open=(
|
|
151
152
|
"datetime",
|
|
@@ -290,6 +291,8 @@ class PandasData(DataSourceBacktesting):
|
|
|
290
291
|
ask=ohlcv_bid_ask_dict.get('ask'),
|
|
291
292
|
volume=ohlcv_bid_ask_dict.get('volume'),
|
|
292
293
|
timestamp=dt,
|
|
294
|
+
bid_size=ohlcv_bid_ask_dict.get('bid_size'),
|
|
295
|
+
ask_size=ohlcv_bid_ask_dict.get('ask_size'),
|
|
293
296
|
raw_data=ohlcv_bid_ask_dict
|
|
294
297
|
)
|
|
295
298
|
else:
|
|
@@ -72,17 +72,19 @@ class PolarsMixin:
|
|
|
72
72
|
|
|
73
73
|
def _get_data_lazy(self, asset: Asset) -> Optional[pl.LazyFrame]:
|
|
74
74
|
"""Get lazy frame for asset.
|
|
75
|
-
|
|
75
|
+
|
|
76
76
|
Parameters
|
|
77
77
|
----------
|
|
78
|
-
asset : Asset
|
|
79
|
-
The asset to get data for
|
|
80
|
-
|
|
78
|
+
asset : Asset or tuple
|
|
79
|
+
The asset to get data for (can be a tuple of (asset, quote))
|
|
80
|
+
|
|
81
81
|
Returns
|
|
82
82
|
-------
|
|
83
83
|
Optional[pl.LazyFrame]
|
|
84
84
|
The lazy frame or None if not found
|
|
85
85
|
"""
|
|
86
|
+
# CRITICAL FIX: Handle both Asset and (Asset, quote) tuple keys
|
|
87
|
+
# The data store uses tuple keys (asset, quote), so we need to look up by that key
|
|
86
88
|
return self._data_store.get(asset)
|
|
87
89
|
|
|
88
90
|
def _parse_source_symbol_bars_polars(
|
|
@@ -95,7 +97,7 @@ class PolarsMixin:
|
|
|
95
97
|
return_polars: bool = False
|
|
96
98
|
) -> Bars:
|
|
97
99
|
"""Parse bars from polars DataFrame.
|
|
98
|
-
|
|
100
|
+
|
|
99
101
|
Parameters
|
|
100
102
|
----------
|
|
101
103
|
response : pl.DataFrame
|
|
@@ -108,7 +110,7 @@ class PolarsMixin:
|
|
|
108
110
|
The quote asset for forex/crypto
|
|
109
111
|
length : Optional[int]
|
|
110
112
|
Limit the number of bars
|
|
111
|
-
|
|
113
|
+
|
|
112
114
|
Returns
|
|
113
115
|
-------
|
|
114
116
|
Bars
|
|
@@ -121,6 +123,21 @@ class PolarsMixin:
|
|
|
121
123
|
if length and len(response) > length:
|
|
122
124
|
response = response.tail(length)
|
|
123
125
|
|
|
126
|
+
# Filter to only keep OHLCV + datetime columns (remove DataBento metadata like rtype, publisher_id, etc.)
|
|
127
|
+
# Required columns for strategies
|
|
128
|
+
required_cols = ['open', 'high', 'low', 'close', 'volume']
|
|
129
|
+
optional_cols = ['datetime', 'timestamp', 'date', 'time', 'dividend', 'stock_splits', 'symbol']
|
|
130
|
+
|
|
131
|
+
# Determine which columns to keep
|
|
132
|
+
keep_cols = []
|
|
133
|
+
for col in response.columns:
|
|
134
|
+
if col in required_cols or col in optional_cols:
|
|
135
|
+
keep_cols.append(col)
|
|
136
|
+
|
|
137
|
+
# Select only the relevant columns
|
|
138
|
+
if keep_cols:
|
|
139
|
+
response = response.select(keep_cols)
|
|
140
|
+
|
|
124
141
|
# Create bars object
|
|
125
142
|
bars = Bars(response, source, asset, raw=response, quote=quote, return_polars=return_polars)
|
|
126
143
|
return bars
|
|
@@ -209,22 +226,45 @@ class PolarsMixin:
|
|
|
209
226
|
self._last_price_cache[cache_key] = price
|
|
210
227
|
|
|
211
228
|
def _convert_datetime_for_filtering(self, dt: Any) -> datetime:
|
|
212
|
-
"""Convert datetime to naive datetime for filtering.
|
|
213
|
-
|
|
229
|
+
"""Convert datetime to naive UTC datetime for filtering.
|
|
230
|
+
|
|
231
|
+
CRITICAL FIX: Must convert to UTC BEFORE stripping timezone!
|
|
232
|
+
If we strip timezone from ET datetime, we lose 5 hours of data.
|
|
233
|
+
|
|
234
|
+
Example:
|
|
235
|
+
- Input: 2024-01-02 18:00:00-05:00 (ET)
|
|
236
|
+
- Convert to UTC: 2024-01-02 23:00:00+00:00
|
|
237
|
+
- Strip timezone: 2024-01-02 23:00:00 (naive UTC)
|
|
238
|
+
|
|
239
|
+
OLD BUGGY CODE:
|
|
240
|
+
- Input: 2024-01-02 18:00:00-05:00 (ET)
|
|
241
|
+
- Strip timezone: 2024-01-02 18:00:00 (naive, loses timezone!)
|
|
242
|
+
- Compare to cached data in naive UTC: WRONG by 5 hours!
|
|
243
|
+
|
|
214
244
|
Parameters
|
|
215
245
|
----------
|
|
216
246
|
dt : Any
|
|
217
247
|
Datetime-like object
|
|
218
|
-
|
|
248
|
+
|
|
219
249
|
Returns
|
|
220
250
|
-------
|
|
221
251
|
datetime
|
|
222
|
-
Naive datetime object
|
|
252
|
+
Naive UTC datetime object
|
|
223
253
|
"""
|
|
224
|
-
|
|
225
|
-
|
|
254
|
+
from datetime import timezone
|
|
255
|
+
|
|
256
|
+
# First convert to UTC if timezone-aware
|
|
257
|
+
if hasattr(dt, 'tzinfo') and dt.tzinfo is not None:
|
|
258
|
+
# Convert to UTC
|
|
259
|
+
dt_utc = dt.astimezone(timezone.utc)
|
|
260
|
+
# Then strip timezone
|
|
261
|
+
return dt_utc.replace(tzinfo=None)
|
|
262
|
+
elif hasattr(dt, 'tz_localize'):
|
|
263
|
+
# Pandas Timestamp
|
|
264
|
+
return dt.tz_convert('UTC').tz_localize(None)
|
|
226
265
|
elif hasattr(dt, 'replace'):
|
|
227
|
-
|
|
266
|
+
# Already naive
|
|
267
|
+
return dt
|
|
228
268
|
else:
|
|
229
269
|
return dt
|
|
230
270
|
|
|
@@ -283,10 +323,11 @@ class PolarsMixin:
|
|
|
283
323
|
lazy_data: pl.LazyFrame,
|
|
284
324
|
end_filter: datetime,
|
|
285
325
|
length: int,
|
|
286
|
-
timestep: str = "minute"
|
|
326
|
+
timestep: str = "minute",
|
|
327
|
+
use_strict_less_than: bool = False
|
|
287
328
|
) -> Optional[pl.DataFrame]:
|
|
288
329
|
"""Filter data up to end_filter and return last length rows.
|
|
289
|
-
|
|
330
|
+
|
|
290
331
|
Parameters
|
|
291
332
|
----------
|
|
292
333
|
asset : Asset
|
|
@@ -299,15 +340,23 @@ class PolarsMixin:
|
|
|
299
340
|
Number of rows to return
|
|
300
341
|
timestep : str
|
|
301
342
|
Timestep for caching strategy
|
|
302
|
-
|
|
343
|
+
use_strict_less_than : bool
|
|
344
|
+
If True, use < instead of <= for filtering (matches Pandas behavior without timeshift)
|
|
345
|
+
|
|
303
346
|
Returns
|
|
304
347
|
-------
|
|
305
348
|
Optional[pl.DataFrame]
|
|
306
349
|
Filtered dataframe or None
|
|
307
350
|
"""
|
|
351
|
+
# DEBUG
|
|
352
|
+
logger.debug(f"[POLARS FILTER] end_filter={end_filter}, tzinfo={end_filter.tzinfo if hasattr(end_filter, 'tzinfo') else 'N/A'}, length={length}")
|
|
353
|
+
|
|
308
354
|
# Convert end_filter to naive
|
|
309
355
|
end_filter_naive = self._convert_datetime_for_filtering(end_filter)
|
|
310
356
|
|
|
357
|
+
# DEBUG
|
|
358
|
+
logger.debug(f"[POLARS FILTER] end_filter_naive={end_filter_naive}")
|
|
359
|
+
|
|
311
360
|
# For daily timestep, use caching
|
|
312
361
|
if timestep == "day":
|
|
313
362
|
current_date = end_filter.date() if hasattr(end_filter, 'date') else end_filter
|
|
@@ -335,11 +384,37 @@ class PolarsMixin:
|
|
|
335
384
|
return None
|
|
336
385
|
|
|
337
386
|
# Filter and collect
|
|
387
|
+
# CRITICAL FIX: Keep timezone info! Match the DataFrame's timezone
|
|
388
|
+
# Get the DataFrame column's timezone from schema
|
|
389
|
+
dt_dtype = schema[dt_col]
|
|
390
|
+
|
|
391
|
+
# Convert filter to match DataFrame's timezone
|
|
392
|
+
if hasattr(dt_dtype, 'time_zone') and dt_dtype.time_zone:
|
|
393
|
+
# DataFrame has timezone, convert filter to match
|
|
394
|
+
import pytz
|
|
395
|
+
df_tz = pytz.timezone(dt_dtype.time_zone)
|
|
396
|
+
end_filter_with_tz = pytz.utc.localize(end_filter_naive).astimezone(df_tz)
|
|
397
|
+
else:
|
|
398
|
+
# DataFrame is naive, use UTC
|
|
399
|
+
from datetime import timezone as tz
|
|
400
|
+
end_filter_with_tz = datetime.combine(
|
|
401
|
+
end_filter_naive.date(),
|
|
402
|
+
end_filter_naive.time(),
|
|
403
|
+
tzinfo=tz.utc
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
# CRITICAL FIX: Deduplicate before caching
|
|
407
|
+
# Use < or <= based on use_strict_less_than flag
|
|
408
|
+
if use_strict_less_than:
|
|
409
|
+
filter_expr = pl.col(dt_col) < end_filter_with_tz
|
|
410
|
+
else:
|
|
411
|
+
filter_expr = pl.col(dt_col) <= end_filter_with_tz
|
|
412
|
+
|
|
338
413
|
result = (
|
|
339
414
|
lazy_data
|
|
340
|
-
.
|
|
341
|
-
.filter(pl.col(dt_col) <= end_filter_naive)
|
|
415
|
+
.filter(filter_expr)
|
|
342
416
|
.sort(dt_col)
|
|
417
|
+
.unique(subset=[dt_col], keep='last', maintain_order=True)
|
|
343
418
|
.tail(fetch_length)
|
|
344
419
|
.collect()
|
|
345
420
|
)
|
|
@@ -362,11 +437,41 @@ class PolarsMixin:
|
|
|
362
437
|
logger.error("No datetime column found")
|
|
363
438
|
return None
|
|
364
439
|
|
|
365
|
-
|
|
440
|
+
# CRITICAL FIX: Keep timezone info during filtering!
|
|
441
|
+
# Match the DataFrame's timezone to avoid comparison errors
|
|
442
|
+
# Get the DataFrame column's timezone from schema
|
|
443
|
+
dt_dtype = schema[dt_col]
|
|
444
|
+
|
|
445
|
+
# Convert filter to match DataFrame's timezone
|
|
446
|
+
if hasattr(dt_dtype, 'time_zone') and dt_dtype.time_zone:
|
|
447
|
+
# DataFrame has timezone, convert filter to match
|
|
448
|
+
import pytz
|
|
449
|
+
df_tz = pytz.timezone(dt_dtype.time_zone)
|
|
450
|
+
end_filter_with_tz = pytz.utc.localize(end_filter_naive).astimezone(df_tz)
|
|
451
|
+
else:
|
|
452
|
+
# DataFrame is naive, use UTC
|
|
453
|
+
from datetime import timezone as tz
|
|
454
|
+
end_filter_with_tz = datetime.combine(
|
|
455
|
+
end_filter_naive.date(),
|
|
456
|
+
end_filter_naive.time(),
|
|
457
|
+
tzinfo=tz.utc
|
|
458
|
+
)
|
|
459
|
+
|
|
460
|
+
# CRITICAL FIX: Deduplicate before returning
|
|
461
|
+
# Sometimes lazy operations can create duplicates
|
|
462
|
+
# Use < or <= based on use_strict_less_than flag
|
|
463
|
+
if use_strict_less_than:
|
|
464
|
+
filter_expr = pl.col(dt_col) < end_filter_with_tz
|
|
465
|
+
else:
|
|
466
|
+
filter_expr = pl.col(dt_col) <= end_filter_with_tz
|
|
467
|
+
|
|
468
|
+
result = (
|
|
366
469
|
lazy_data
|
|
367
|
-
.
|
|
368
|
-
.filter(pl.col(dt_col) <= end_filter_naive)
|
|
470
|
+
.filter(filter_expr)
|
|
369
471
|
.sort(dt_col)
|
|
472
|
+
.unique(subset=[dt_col], keep='last', maintain_order=True)
|
|
370
473
|
.tail(length)
|
|
371
474
|
.collect()
|
|
372
475
|
)
|
|
476
|
+
|
|
477
|
+
return result
|