lumibot 4.1.2__py3-none-any.whl → 4.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lumibot might be problematic. Click here for more details.
- lumibot/backtesting/__init__.py +19 -5
- lumibot/backtesting/backtesting_broker.py +98 -18
- lumibot/backtesting/databento_backtesting.py +5 -686
- lumibot/backtesting/databento_backtesting_pandas.py +738 -0
- lumibot/backtesting/databento_backtesting_polars.py +860 -546
- lumibot/backtesting/fix_debug.py +37 -0
- lumibot/backtesting/thetadata_backtesting.py +9 -355
- lumibot/backtesting/thetadata_backtesting_pandas.py +1178 -0
- lumibot/brokers/alpaca.py +8 -1
- lumibot/brokers/schwab.py +12 -2
- lumibot/credentials.py +13 -0
- lumibot/data_sources/__init__.py +5 -8
- lumibot/data_sources/data_source.py +6 -2
- lumibot/data_sources/data_source_backtesting.py +30 -0
- lumibot/data_sources/databento_data.py +5 -390
- lumibot/data_sources/databento_data_pandas.py +440 -0
- lumibot/data_sources/databento_data_polars.py +15 -9
- lumibot/data_sources/pandas_data.py +30 -17
- lumibot/data_sources/polars_data.py +986 -0
- lumibot/data_sources/polars_mixin.py +472 -96
- lumibot/data_sources/polygon_data_polars.py +5 -0
- lumibot/data_sources/yahoo_data.py +9 -2
- lumibot/data_sources/yahoo_data_polars.py +5 -0
- lumibot/entities/__init__.py +15 -0
- lumibot/entities/asset.py +5 -28
- lumibot/entities/bars.py +89 -20
- lumibot/entities/data.py +29 -6
- lumibot/entities/data_polars.py +668 -0
- lumibot/entities/position.py +38 -4
- lumibot/strategies/_strategy.py +31 -9
- lumibot/strategies/strategy.py +61 -49
- lumibot/tools/backtest_cache.py +284 -0
- lumibot/tools/databento_helper.py +65 -42
- lumibot/tools/databento_helper_polars.py +748 -778
- lumibot/tools/futures_roll.py +251 -0
- lumibot/tools/indicators.py +135 -104
- lumibot/tools/polars_utils.py +142 -0
- lumibot/tools/thetadata_helper.py +1068 -134
- {lumibot-4.1.2.dist-info → lumibot-4.2.0.dist-info}/METADATA +9 -1
- {lumibot-4.1.2.dist-info → lumibot-4.2.0.dist-info}/RECORD +72 -148
- tests/backtest/test_databento.py +37 -6
- tests/backtest/test_databento_comprehensive_trading.py +70 -87
- tests/backtest/test_databento_parity.py +31 -7
- tests/backtest/test_debug_avg_fill_price.py +1 -1
- tests/backtest/test_example_strategies.py +11 -1
- tests/backtest/test_futures_edge_cases.py +96 -63
- tests/backtest/test_futures_single_trade.py +2 -2
- tests/backtest/test_futures_ultra_simple.py +2 -2
- tests/backtest/test_polars_lru_eviction.py +470 -0
- tests/backtest/test_yahoo.py +42 -0
- tests/test_asset.py +4 -4
- tests/test_backtest_cache_manager.py +149 -0
- tests/test_backtesting_data_source_env.py +50 -10
- tests/test_continuous_futures_resolution.py +60 -48
- tests/test_data_polars_parity.py +160 -0
- tests/test_databento_asset_validation.py +23 -5
- tests/test_databento_backtesting.py +1 -1
- tests/test_databento_backtesting_polars.py +312 -192
- tests/test_databento_data.py +220 -463
- tests/test_databento_helper.py +6 -1
- tests/test_databento_live.py +10 -10
- tests/test_futures_roll.py +38 -0
- tests/test_indicator_subplots.py +101 -0
- tests/test_market_infinite_loop_bug.py +77 -3
- tests/test_polars_resample.py +67 -0
- tests/test_polygon_helper.py +46 -0
- tests/test_thetadata_backwards_compat.py +97 -0
- tests/test_thetadata_helper.py +222 -23
- tests/test_thetadata_pandas_verification.py +186 -0
- lumibot/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/__pycache__/constants.cpython-312.pyc +0 -0
- lumibot/__pycache__/credentials.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/alpaca_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/alpha_vantage_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/backtesting_broker.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/ccxt_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/databento_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/interactive_brokers_rest_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/pandas_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/polygon_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/thetadata_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/yahoo_backtesting.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/alpaca.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/bitunix.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/broker.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/ccxt.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/example_broker.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/interactive_brokers.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/interactive_brokers_rest.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/projectx.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/schwab.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/tradier.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/tradovate.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/alpaca_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/alpha_vantage_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/bitunix_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/ccxt_backtesting_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/ccxt_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/data_source.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/data_source_backtesting.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/databento_data_polars_backtesting.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/databento_data_polars_live.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/example_broker_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/exceptions.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/interactive_brokers_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/interactive_brokers_rest_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/pandas_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/polars_mixin.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/polygon_data_polars.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/projectx_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/schwab_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/tradier_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/tradovate_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/yahoo_data_polars.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/asset.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/bar.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/bars.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/chains.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/data.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/dataline.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/order.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/position.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/quote.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/trading_fee.cpython-312.pyc +0 -0
- lumibot/example_strategies/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/example_strategies/__pycache__/test_broker_functions.cpython-312-pytest-8.4.1.pyc +0 -0
- lumibot/strategies/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/strategies/__pycache__/_strategy.cpython-312.pyc +0 -0
- lumibot/strategies/__pycache__/strategy.cpython-312.pyc +0 -0
- lumibot/strategies/__pycache__/strategy_executor.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/alpaca_helpers.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/bitunix_helpers.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/black_scholes.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/ccxt_data_store.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/databento_helper.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/databento_helper_polars.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/debugers.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/decorators.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/helpers.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/indicators.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/lumibot_logger.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/pandas.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/polygon_helper.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/polygon_helper_async.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/polygon_helper_polars_optimized.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/projectx_helpers.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/schwab_helper.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/thetadata_helper.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/types.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/yahoo_helper.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/yahoo_helper_polars_optimized.cpython-312.pyc +0 -0
- lumibot/traders/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/traders/__pycache__/trader.cpython-312.pyc +0 -0
- lumibot/trading_builtins/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/trading_builtins/__pycache__/custom_stream.cpython-312.pyc +0 -0
- lumibot/trading_builtins/__pycache__/safe_list.cpython-312.pyc +0 -0
- {lumibot-4.1.2.dist-info → lumibot-4.2.0.dist-info}/WHEEL +0 -0
- {lumibot-4.1.2.dist-info → lumibot-4.2.0.dist-info}/licenses/LICENSE +0 -0
- {lumibot-4.1.2.dist-info → lumibot-4.2.0.dist-info}/top_level.txt +0 -0
|
@@ -1,36 +1,39 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
from datetime import timedelta
|
|
1
|
+
import traceback
|
|
2
|
+
from datetime import datetime, timedelta
|
|
4
3
|
|
|
4
|
+
import pandas as pd
|
|
5
5
|
import polars as pl
|
|
6
|
-
from polars.datatypes import Datetime as PlDatetime
|
|
7
|
-
import pytz
|
|
8
6
|
|
|
9
|
-
from lumibot
|
|
10
|
-
from lumibot.
|
|
11
|
-
from lumibot.
|
|
12
|
-
from lumibot.
|
|
7
|
+
from lumibot import LUMIBOT_DEFAULT_PYTZ
|
|
8
|
+
from lumibot.data_sources import PolarsData
|
|
9
|
+
from lumibot.entities import Asset, Data, Quote
|
|
10
|
+
from lumibot.entities.data_polars import DataPolars
|
|
11
|
+
from lumibot.tools import databento_helper_polars as databento_helper
|
|
12
|
+
from lumibot.tools.databento_helper_polars import DataBentoAuthenticationError
|
|
13
|
+
from lumibot.tools.helpers import to_datetime_aware
|
|
14
|
+
from termcolor import colored
|
|
13
15
|
|
|
16
|
+
from lumibot.tools.lumibot_logger import get_logger
|
|
14
17
|
logger = get_logger(__name__)
|
|
15
18
|
|
|
19
|
+
# Conversion tracking for optimization analysis
|
|
20
|
+
def _log_conversion(operation, from_type, to_type, location):
|
|
21
|
+
"""Log DataFrame conversions to track optimization progress."""
|
|
22
|
+
logger.debug(f"[CONVERSION] {operation} | {from_type} → {to_type} | {location}")
|
|
23
|
+
|
|
16
24
|
START_BUFFER = timedelta(days=5)
|
|
17
25
|
|
|
18
26
|
|
|
19
|
-
class DataBentoDataBacktestingPolars(
|
|
27
|
+
class DataBentoDataBacktestingPolars(PolarsData):
|
|
20
28
|
"""
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
This class
|
|
24
|
-
|
|
29
|
+
Backtesting implementation of DataBento data source
|
|
30
|
+
|
|
31
|
+
This class extends PolarsData to provide DataBento-specific backtesting functionality.
|
|
32
|
+
Currently identical to pandas version - will be incrementally optimized to use Polars.
|
|
25
33
|
"""
|
|
26
34
|
|
|
27
|
-
SOURCE
|
|
28
|
-
|
|
29
|
-
TIMESTEP_MAPPING = [
|
|
30
|
-
{"timestep": "minute", "representations": ["1m", "minute", "1 minute"]},
|
|
31
|
-
{"timestep": "hour", "representations": ["1h", "hour", "1 hour"]},
|
|
32
|
-
{"timestep": "day", "representations": ["1d", "day", "1 day"]},
|
|
33
|
-
]
|
|
35
|
+
# Override SOURCE so broker recognizes this as DataBento and applies correct timeshift
|
|
36
|
+
SOURCE = "DATABENTO_POLARS"
|
|
34
37
|
|
|
35
38
|
def __init__(
|
|
36
39
|
self,
|
|
@@ -40,12 +43,10 @@ class DataBentoDataBacktestingPolars(DataSourceBacktesting):
|
|
|
40
43
|
api_key=None,
|
|
41
44
|
timeout=30,
|
|
42
45
|
max_retries=3,
|
|
43
|
-
max_memory=None,
|
|
44
|
-
enable_cache=True,
|
|
45
46
|
**kwargs,
|
|
46
47
|
):
|
|
47
48
|
"""
|
|
48
|
-
Initialize DataBento backtesting data source
|
|
49
|
+
Initialize DataBento backtesting data source
|
|
49
50
|
|
|
50
51
|
Parameters
|
|
51
52
|
----------
|
|
@@ -54,556 +55,719 @@ class DataBentoDataBacktestingPolars(DataSourceBacktesting):
|
|
|
54
55
|
datetime_end : datetime
|
|
55
56
|
End datetime for backtesting period
|
|
56
57
|
pandas_data : dict, optional
|
|
57
|
-
Pre-loaded pandas data
|
|
58
|
+
Pre-loaded pandas data
|
|
58
59
|
api_key : str
|
|
59
60
|
DataBento API key
|
|
60
61
|
timeout : int, optional
|
|
61
62
|
API request timeout in seconds, default 30
|
|
62
63
|
max_retries : int, optional
|
|
63
64
|
Maximum number of API retry attempts, default 3
|
|
64
|
-
max_memory : int, optional
|
|
65
|
-
Maximum memory usage in bytes for data storage
|
|
66
|
-
enable_cache : bool, optional
|
|
67
|
-
Enable caching of fetched data, default True
|
|
68
65
|
**kwargs
|
|
69
66
|
Additional parameters passed to parent class
|
|
70
67
|
"""
|
|
71
|
-
# Initialize parent
|
|
72
68
|
super().__init__(
|
|
73
69
|
datetime_start=datetime_start,
|
|
74
70
|
datetime_end=datetime_end,
|
|
71
|
+
pandas_data=pandas_data,
|
|
75
72
|
api_key=api_key,
|
|
76
73
|
**kwargs
|
|
77
74
|
)
|
|
78
75
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
import os
|
|
82
|
-
self._api_key = api_key or os.environ.get("DATABENTO_API_KEY")
|
|
83
|
-
if not self._api_key:
|
|
84
|
-
logger.error("DataBento API key not provided and DATABENTO_API_KEY environment variable not set")
|
|
85
|
-
else:
|
|
86
|
-
logger.info(f"DataBento API key loaded: {bool(self._api_key)}")
|
|
76
|
+
# Store DataBento-specific configuration
|
|
77
|
+
self._api_key = api_key
|
|
87
78
|
self._timeout = timeout
|
|
88
79
|
self._max_retries = max_retries
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
#
|
|
93
|
-
self.
|
|
94
|
-
self._eager_cache = {} # Asset -> pl.DataFrame
|
|
80
|
+
|
|
81
|
+
# Track which assets we've already fetched to avoid redundant requests
|
|
82
|
+
self._prefetched_assets = set()
|
|
83
|
+
# Track data requests to avoid repeated log messages
|
|
84
|
+
self._logged_requests = set()
|
|
95
85
|
|
|
96
|
-
#
|
|
97
|
-
|
|
98
|
-
self.
|
|
86
|
+
# OPTIMIZATION: Iteration-level caching to avoid redundant filtering
|
|
87
|
+
# Cache filtered DataFrames per iteration (datetime)
|
|
88
|
+
self._filtered_bars_cache = {} # {(asset_key, length, timestep, timeshift, dt): DataFrame}
|
|
89
|
+
self._last_price_cache = {} # {(asset_key, dt): price}
|
|
90
|
+
self._cache_datetime = None # Track when to invalidate cache
|
|
99
91
|
|
|
100
|
-
#
|
|
101
|
-
self.
|
|
92
|
+
# Track which futures assets we've fetched multipliers for (to avoid redundant API calls)
|
|
93
|
+
self._multiplier_fetched_assets = set()
|
|
102
94
|
|
|
103
|
-
#
|
|
104
|
-
|
|
95
|
+
# Verify DataBento availability
|
|
96
|
+
if not databento_helper.DATABENTO_AVAILABLE:
|
|
97
|
+
logger.error("DataBento package not available. Please install with: pip install databento")
|
|
98
|
+
raise ImportError("DataBento package not available")
|
|
105
99
|
|
|
106
|
-
|
|
107
|
-
self._cache_metadata = {} # cache_key -> {'min_dt': datetime, 'max_dt': datetime, 'count': int}
|
|
100
|
+
logger.debug(f"DataBento backtesting initialized for period: {datetime_start} to {datetime_end}")
|
|
108
101
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
self._store_data(asset, df)
|
|
102
|
+
def _check_and_clear_cache(self):
|
|
103
|
+
"""
|
|
104
|
+
OPTIMIZATION: Clear iteration caches when datetime changes.
|
|
105
|
+
This ensures fresh filtering for each new iteration while reusing
|
|
106
|
+
results within the same iteration.
|
|
107
|
+
"""
|
|
108
|
+
current_dt = self.get_datetime()
|
|
109
|
+
if self._cache_datetime != current_dt:
|
|
110
|
+
self._filtered_bars_cache.clear()
|
|
111
|
+
self._last_price_cache.clear()
|
|
112
|
+
self._cache_datetime = current_dt
|
|
121
113
|
|
|
122
|
-
def
|
|
123
|
-
"""
|
|
124
|
-
|
|
125
|
-
return None
|
|
126
|
-
if hasattr(dt, 'tzinfo') and dt.tzinfo is not None:
|
|
127
|
-
return dt.replace(tzinfo=None)
|
|
128
|
-
return dt
|
|
129
|
-
|
|
130
|
-
def _ensure_strategy_timezone(self, df: pl.DataFrame, column: str = "datetime") -> pl.DataFrame:
|
|
131
|
-
"""Ensure dataframe datetime column aligns with the strategy timezone."""
|
|
132
|
-
if df is None or column not in df.columns:
|
|
133
|
-
return df
|
|
134
|
-
|
|
135
|
-
dtype = df.schema.get(column)
|
|
136
|
-
strategy_tz = self.tzinfo.zone if hasattr(self.tzinfo, "zone") else str(self.tzinfo)
|
|
137
|
-
expr = pl.col(column)
|
|
138
|
-
|
|
139
|
-
if isinstance(dtype, PlDatetime):
|
|
140
|
-
if dtype.time_zone is None:
|
|
141
|
-
expr = expr.dt.replace_time_zone(strategy_tz)
|
|
142
|
-
elif dtype.time_zone != strategy_tz:
|
|
143
|
-
expr = expr.dt.convert_time_zone(strategy_tz)
|
|
144
|
-
else:
|
|
145
|
-
expr = expr.cast(pl.Datetime(time_unit="ns")).dt.replace_time_zone(strategy_tz)
|
|
114
|
+
def _ensure_futures_multiplier(self, asset):
|
|
115
|
+
"""
|
|
116
|
+
Ensure futures asset has correct multiplier set.
|
|
146
117
|
|
|
147
|
-
|
|
118
|
+
This method is idempotent and cached - safe to call multiple times.
|
|
119
|
+
Only fetches multiplier once per unique asset.
|
|
148
120
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
"Volume": "volume", "Dividends": "dividend", "Stock Splits": "stock_splits",
|
|
155
|
-
"Adj Close": "adj_close", "index": "datetime", "Date": "datetime"
|
|
156
|
-
}
|
|
121
|
+
Design rationale:
|
|
122
|
+
- Futures multipliers must be fetched from data provider (e.g., DataBento)
|
|
123
|
+
- Asset class defaults to multiplier=1
|
|
124
|
+
- Data source is responsible for updating multiplier on first use
|
|
125
|
+
- Lazy fetching is more efficient than prefetching all possible assets
|
|
157
126
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
127
|
+
Parameters
|
|
128
|
+
----------
|
|
129
|
+
asset : Asset
|
|
130
|
+
The asset to ensure has correct multiplier
|
|
131
|
+
"""
|
|
132
|
+
# Skip if not a futures asset
|
|
133
|
+
if asset.asset_type not in (Asset.AssetType.FUTURE, Asset.AssetType.CONT_FUTURE):
|
|
134
|
+
return
|
|
161
135
|
|
|
162
|
-
|
|
136
|
+
# Skip if multiplier already set to non-default value
|
|
137
|
+
if asset.multiplier != 1:
|
|
138
|
+
return
|
|
163
139
|
|
|
164
|
-
#
|
|
165
|
-
|
|
140
|
+
# Create cache key to track which assets we've already processed
|
|
141
|
+
# Use symbol + asset_type + expiration to handle different contracts
|
|
142
|
+
cache_key = (asset.symbol, asset.asset_type, getattr(asset, 'expiration', None))
|
|
166
143
|
|
|
167
|
-
#
|
|
168
|
-
self.
|
|
144
|
+
# Check if we already tried to fetch for this asset
|
|
145
|
+
if cache_key in self._multiplier_fetched_assets:
|
|
146
|
+
return # Already attempted (even if failed, don't retry every time)
|
|
169
147
|
|
|
170
|
-
#
|
|
171
|
-
|
|
148
|
+
# Mark as attempted to avoid redundant API calls
|
|
149
|
+
self._multiplier_fetched_assets.add(cache_key)
|
|
172
150
|
|
|
173
|
-
#
|
|
151
|
+
# Fetch and set multiplier from DataBento
|
|
174
152
|
try:
|
|
175
|
-
|
|
176
|
-
self._column_indices[asset] = {col: i for i, col in enumerate(schema.names())}
|
|
177
|
-
except:
|
|
178
|
-
# Fallback: collect a tiny sample for column info
|
|
179
|
-
sample = lazy_data.limit(1).collect()
|
|
180
|
-
self._column_indices[asset] = {col: i for i, col in enumerate(sample.columns)}
|
|
153
|
+
client = databento_helper.DataBentoClient(self._api_key)
|
|
181
154
|
|
|
182
|
-
|
|
183
|
-
|
|
155
|
+
# Resolve symbol based on asset type
|
|
156
|
+
if asset.asset_type == Asset.AssetType.CONT_FUTURE:
|
|
157
|
+
resolved_symbol = databento_helper._format_futures_symbol_for_databento(
|
|
158
|
+
asset, reference_date=self.datetime_start
|
|
159
|
+
)
|
|
160
|
+
else:
|
|
161
|
+
resolved_symbol = databento_helper._format_futures_symbol_for_databento(asset)
|
|
184
162
|
|
|
185
|
-
|
|
163
|
+
# Fetch multiplier from DataBento instrument definition
|
|
164
|
+
databento_helper._fetch_and_update_futures_multiplier(
|
|
165
|
+
client=client,
|
|
166
|
+
asset=asset,
|
|
167
|
+
resolved_symbol=resolved_symbol,
|
|
168
|
+
dataset="GLBX.MDP3",
|
|
169
|
+
reference_date=self.datetime_start
|
|
170
|
+
)
|
|
186
171
|
|
|
187
|
-
|
|
188
|
-
"""Enforce storage limit by removing least recently used data."""
|
|
189
|
-
if not self.MAX_STORAGE_BYTES:
|
|
190
|
-
return
|
|
172
|
+
logger.debug(f"Successfully set multiplier for {asset.symbol}: {asset.multiplier}")
|
|
191
173
|
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
174
|
+
except DataBentoAuthenticationError as e:
|
|
175
|
+
logger.error(colored(f"DataBento authentication failed while fetching multiplier for {asset.symbol}: {e}", "red"))
|
|
176
|
+
raise
|
|
177
|
+
except Exception as e:
|
|
178
|
+
logger.warning(f"Could not fetch multiplier for {asset.symbol}: {e}")
|
|
195
179
|
|
|
196
|
-
|
|
180
|
+
def prefetch_data(self, assets, timestep="minute"):
|
|
181
|
+
"""
|
|
182
|
+
Prefetch all required data for the specified assets for the entire backtest period.
|
|
183
|
+
This reduces redundant API calls and log spam during backtesting.
|
|
184
|
+
|
|
185
|
+
Parameters
|
|
186
|
+
----------
|
|
187
|
+
assets : list of Asset
|
|
188
|
+
List of assets to prefetch data for
|
|
189
|
+
timestep : str, optional
|
|
190
|
+
Timestep to fetch (default: "minute")
|
|
191
|
+
"""
|
|
192
|
+
if not assets:
|
|
193
|
+
return
|
|
194
|
+
|
|
195
|
+
logger.debug(f"Prefetching DataBento data for {len(assets)} assets...")
|
|
196
|
+
|
|
197
|
+
for asset in assets:
|
|
198
|
+
# Create search key for the asset
|
|
199
|
+
quote_asset = Asset("USD", "forex")
|
|
200
|
+
search_asset = (asset, quote_asset)
|
|
201
|
+
|
|
202
|
+
# Skip if already prefetched
|
|
203
|
+
if search_asset in self._prefetched_assets:
|
|
204
|
+
continue
|
|
205
|
+
|
|
197
206
|
try:
|
|
198
|
-
#
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
#
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
estimated_storage += estimated_bytes
|
|
215
|
-
items_with_sizes.append((asset, estimated_bytes))
|
|
216
|
-
except:
|
|
217
|
-
# If estimation fails, use default
|
|
218
|
-
items_with_sizes.append((asset, 100000)) # 100KB default
|
|
219
|
-
|
|
220
|
-
logger.debug(f"Estimated storage: {estimated_storage:,} bytes for {len(data_store)} items")
|
|
221
|
-
|
|
222
|
-
# Remove items if over limit
|
|
223
|
-
if estimated_storage > self.MAX_STORAGE_BYTES:
|
|
224
|
-
# Sort by size and remove largest first
|
|
225
|
-
items_with_sizes.sort(key=lambda x: x[1], reverse=True)
|
|
226
|
-
for asset, _ in items_with_sizes[:len(items_with_sizes)//2]:
|
|
227
|
-
if asset in data_store:
|
|
228
|
-
del data_store[asset]
|
|
229
|
-
if asset in self._eager_cache:
|
|
230
|
-
del self._eager_cache[asset]
|
|
231
|
-
if asset in self._column_indices:
|
|
232
|
-
del self._column_indices[asset]
|
|
233
|
-
if asset in self._filtered_data_cache:
|
|
234
|
-
# Clear related cache entries
|
|
235
|
-
keys_to_remove = [k for k in self._filtered_data_cache if k[0] == asset]
|
|
236
|
-
for k in keys_to_remove:
|
|
237
|
-
del self._filtered_data_cache[k]
|
|
238
|
-
logger.debug(f"Storage limit exceeded. Evicted data for {asset}")
|
|
239
|
-
|
|
240
|
-
def _convert_to_polars(self, df, asset=None):
|
|
241
|
-
"""Convert pandas DataFrame or raw data to polars DataFrame efficiently."""
|
|
242
|
-
if df is None:
|
|
243
|
-
return None
|
|
244
|
-
|
|
245
|
-
if isinstance(df, pl.DataFrame):
|
|
246
|
-
return df
|
|
247
|
-
|
|
248
|
-
# Convert pandas to polars
|
|
249
|
-
try:
|
|
250
|
-
if hasattr(df, 'index') and hasattr(df.index, 'name'):
|
|
251
|
-
pl_df = pl.from_pandas(df.reset_index())
|
|
252
|
-
else:
|
|
253
|
-
pl_df = pl.from_pandas(df)
|
|
254
|
-
|
|
255
|
-
# Ensure datetime column exists
|
|
256
|
-
datetime_cols = ['datetime', 'timestamp', 'ts_event', 'time']
|
|
257
|
-
datetime_col = None
|
|
258
|
-
for col in datetime_cols:
|
|
259
|
-
if col in pl_df.columns:
|
|
260
|
-
datetime_col = col
|
|
261
|
-
break
|
|
207
|
+
# Calculate start with buffer for better data coverage
|
|
208
|
+
start_datetime = self.datetime_start - START_BUFFER
|
|
209
|
+
end_datetime = self.datetime_end + timedelta(days=1)
|
|
210
|
+
|
|
211
|
+
logger.debug(f"Fetching {asset.symbol} data from {start_datetime.date()} to {end_datetime.date()}")
|
|
212
|
+
|
|
213
|
+
# Get data from DataBento for entire period
|
|
214
|
+
df = databento_helper.get_price_data_from_databento(
|
|
215
|
+
api_key=self._api_key,
|
|
216
|
+
asset=asset,
|
|
217
|
+
start=start_datetime,
|
|
218
|
+
end=end_datetime,
|
|
219
|
+
timestep=timestep,
|
|
220
|
+
venue=None,
|
|
221
|
+
force_cache_update=False
|
|
222
|
+
)
|
|
262
223
|
|
|
263
|
-
|
|
264
|
-
|
|
224
|
+
is_empty = False
|
|
225
|
+
if df is None:
|
|
226
|
+
is_empty = True
|
|
227
|
+
elif hasattr(df, "empty"):
|
|
228
|
+
is_empty = df.empty
|
|
229
|
+
elif hasattr(df, "is_empty"):
|
|
230
|
+
is_empty = df.is_empty()
|
|
231
|
+
|
|
232
|
+
if is_empty:
|
|
233
|
+
# For empty data, create an empty Data object with proper timezone handling
|
|
234
|
+
empty_df = pd.DataFrame(columns=['open', 'high', 'low', 'close', 'volume'])
|
|
235
|
+
# Create an empty DatetimeIndex with proper timezone
|
|
236
|
+
empty_df.index = pd.DatetimeIndex([], tz=LUMIBOT_DEFAULT_PYTZ, name='datetime')
|
|
237
|
+
|
|
238
|
+
data_obj = Data(
|
|
239
|
+
asset,
|
|
240
|
+
df=empty_df,
|
|
241
|
+
timestep=timestep,
|
|
242
|
+
quote=quote_asset,
|
|
243
|
+
# Explicitly set dates to avoid timezone issues
|
|
244
|
+
date_start=None,
|
|
245
|
+
date_end=None
|
|
246
|
+
)
|
|
247
|
+
self.pandas_data[search_asset] = data_obj
|
|
248
|
+
else:
|
|
249
|
+
pandas_df = df.to_pandas() if hasattr(df, "to_pandas") else df
|
|
250
|
+
# Create Data object and store
|
|
251
|
+
data_obj = Data(
|
|
252
|
+
asset,
|
|
253
|
+
df=pandas_df,
|
|
254
|
+
timestep=timestep,
|
|
255
|
+
quote=quote_asset,
|
|
256
|
+
)
|
|
257
|
+
self.pandas_data[search_asset] = data_obj
|
|
258
|
+
cached_len = len(pandas_df) if hasattr(pandas_df, "__len__") else 0
|
|
259
|
+
logger.debug(f"Cached {cached_len} rows for {asset.symbol}")
|
|
260
|
+
|
|
261
|
+
# Mark as prefetched
|
|
262
|
+
self._prefetched_assets.add(search_asset)
|
|
263
|
+
|
|
264
|
+
except DataBentoAuthenticationError as e:
|
|
265
|
+
logger.error(colored(f"DataBento authentication failed while prefetching {asset.symbol}: {e}", "red"))
|
|
266
|
+
raise
|
|
267
|
+
except Exception as e:
|
|
268
|
+
logger.error(f"Error prefetching data for {asset.symbol}: {str(e)}")
|
|
269
|
+
logger.error(traceback.format_exc())
|
|
270
|
+
|
|
271
|
+
def _update_pandas_data(self, asset, quote, length, timestep, start_dt=None):
|
|
272
|
+
"""
|
|
273
|
+
Get asset data and update the self.pandas_data dictionary.
|
|
265
274
|
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
logger.error(f"Error converting to polars DataFrame: {e}")
|
|
269
|
-
return None
|
|
275
|
+
This method retrieves historical data from DataBento and caches it for backtesting use.
|
|
276
|
+
If data has already been prefetched, it skips redundant API calls.
|
|
270
277
|
|
|
278
|
+
Parameters
|
|
279
|
+
----------
|
|
280
|
+
asset : Asset
|
|
281
|
+
The asset to get data for.
|
|
282
|
+
quote : Asset
|
|
283
|
+
The quote asset to use. For DataBento, this is typically not used.
|
|
284
|
+
length : int
|
|
285
|
+
The number of data points to get.
|
|
286
|
+
timestep : str
|
|
287
|
+
The timestep to use. For example, "minute", "hour", or "day".
|
|
288
|
+
start_dt : datetime, optional
|
|
289
|
+
The start datetime to use. If None, the current self.datetime_start will be used.
|
|
290
|
+
"""
|
|
291
|
+
search_asset = asset
|
|
292
|
+
asset_separated = asset
|
|
293
|
+
quote_asset = quote if quote is not None else Asset("USD", "forex")
|
|
271
294
|
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
timeshift=None,
|
|
278
|
-
quote=None,
|
|
279
|
-
exchange=None,
|
|
280
|
-
include_after_hours=True,
|
|
281
|
-
return_polars=False,
|
|
282
|
-
):
|
|
283
|
-
logger.info(
|
|
284
|
-
"[get_historical_prices] Getting historical prices for %s, length=%s, timestep=%s, current_dt=%s, datetime_start=%s",
|
|
285
|
-
asset.symbol,
|
|
286
|
-
length,
|
|
287
|
-
timestep,
|
|
288
|
-
self.get_datetime(),
|
|
289
|
-
self.datetime_start,
|
|
290
|
-
)
|
|
295
|
+
# Handle tuple assets (asset, quote pairs)
|
|
296
|
+
if isinstance(search_asset, tuple):
|
|
297
|
+
asset_separated, quote_asset = search_asset
|
|
298
|
+
else:
|
|
299
|
+
search_asset = (search_asset, quote_asset)
|
|
291
300
|
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
error_msg = (
|
|
295
|
-
f"DataBento only supports futures assets. Received '{asset.asset_type}' for '{asset.symbol}'"
|
|
296
|
-
)
|
|
297
|
-
logger.error(error_msg)
|
|
298
|
-
raise ValueError(error_msg)
|
|
301
|
+
# Ensure futures have correct multiplier set
|
|
302
|
+
self._ensure_futures_multiplier(asset_separated)
|
|
299
303
|
|
|
300
|
-
|
|
304
|
+
# If this asset was already prefetched, we don't need to do anything
|
|
305
|
+
if search_asset in self._prefetched_assets:
|
|
306
|
+
logger.debug(f"[CACHE HIT] Asset {asset_separated.symbol} already prefetched")
|
|
307
|
+
return
|
|
301
308
|
|
|
302
|
-
|
|
303
|
-
if
|
|
304
|
-
|
|
309
|
+
# Check if we already have adequate data for this asset
|
|
310
|
+
if search_asset in self.pandas_data:
|
|
311
|
+
logger.debug(f"[CACHE CHECK] Checking existing data for {asset_separated.symbol}")
|
|
312
|
+
asset_data = self.pandas_data[search_asset]
|
|
313
|
+
|
|
314
|
+
# OPTIMIZATION: For DataPolars, check polars_df directly without converting to pandas
|
|
315
|
+
if isinstance(asset_data, DataPolars):
|
|
316
|
+
# Use polars DataFrame directly to avoid conversion overhead
|
|
317
|
+
polars_df = asset_data.polars_df
|
|
318
|
+
if polars_df.height > 0:
|
|
319
|
+
# Get datetime bounds from polars DataFrame
|
|
320
|
+
data_start_datetime = polars_df["datetime"].min()
|
|
321
|
+
data_end_datetime = polars_df["datetime"].max()
|
|
322
|
+
|
|
323
|
+
# Convert polars datetime to pandas Timestamp
|
|
324
|
+
data_start_datetime = pd.Timestamp(data_start_datetime)
|
|
325
|
+
data_end_datetime = pd.Timestamp(data_end_datetime)
|
|
326
|
+
|
|
327
|
+
# Convert UTC to default timezone for proper comparison
|
|
328
|
+
if data_start_datetime.tz is not None:
|
|
329
|
+
data_start_datetime = data_start_datetime.tz_convert(LUMIBOT_DEFAULT_PYTZ)
|
|
330
|
+
else:
|
|
331
|
+
data_start_datetime = data_start_datetime.tz_localize(LUMIBOT_DEFAULT_PYTZ)
|
|
305
332
|
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
elif timestep == "hour":
|
|
334
|
-
buffer_hours = max(24, length // 2)
|
|
335
|
-
start_dt = current_dt_naive_utc - timedelta(hours=length + buffer_hours)
|
|
336
|
-
end_dt = min(current_dt_naive_utc + timedelta(days=30), future_end_naive)
|
|
337
|
-
coverage_buffer = timedelta(hours=6)
|
|
338
|
-
bar_delta = timedelta(hours=1)
|
|
339
|
-
else:
|
|
340
|
-
buffer_minutes = max(720, length + 100)
|
|
341
|
-
start_dt = current_dt_naive_utc - timedelta(minutes=buffer_minutes)
|
|
342
|
-
end_dt = min(current_dt_naive_utc + timedelta(days=3), future_end_naive)
|
|
343
|
-
coverage_buffer = timedelta(minutes=30)
|
|
344
|
-
bar_delta = timedelta(minutes=1)
|
|
345
|
-
|
|
346
|
-
start_dt = self._to_naive_datetime(start_dt)
|
|
347
|
-
end_dt = self._to_naive_datetime(end_dt)
|
|
348
|
-
|
|
349
|
-
# Guarantee the requested window spans at least a full bar to avoid inverted ranges
|
|
350
|
-
min_required_end = start_dt + bar_delta
|
|
351
|
-
if end_dt <= start_dt:
|
|
352
|
-
end_dt = min_required_end
|
|
353
|
-
elif end_dt < min_required_end:
|
|
354
|
-
end_dt = min_required_end
|
|
355
|
-
|
|
356
|
-
cached_df = None
|
|
357
|
-
coverage_ok = False
|
|
358
|
-
if cache_key in self._filtered_data_cache:
|
|
359
|
-
cached_df = self._ensure_strategy_timezone(self._filtered_data_cache[cache_key])
|
|
360
|
-
self._filtered_data_cache[cache_key] = cached_df
|
|
361
|
-
|
|
362
|
-
metadata = self._cache_metadata.get(cache_key)
|
|
363
|
-
if metadata:
|
|
364
|
-
cached_min = self._to_naive_datetime(metadata.get("min_dt"))
|
|
365
|
-
cached_max = self._to_naive_datetime(metadata.get("max_dt"))
|
|
333
|
+
if data_end_datetime.tz is not None:
|
|
334
|
+
data_end_datetime = data_end_datetime.tz_convert(LUMIBOT_DEFAULT_PYTZ)
|
|
335
|
+
else:
|
|
336
|
+
data_end_datetime = data_end_datetime.tz_localize(LUMIBOT_DEFAULT_PYTZ)
|
|
337
|
+
|
|
338
|
+
data_timestep = asset_data.timestep
|
|
339
|
+
|
|
340
|
+
if data_timestep == timestep:
|
|
341
|
+
# Use timezone-aware timestamps for comparison
|
|
342
|
+
data_start_tz = data_start_datetime
|
|
343
|
+
data_end_tz = data_end_datetime
|
|
344
|
+
|
|
345
|
+
start_datetime, _ = self.get_start_datetime_and_ts_unit(
|
|
346
|
+
length, timestep, start_dt, start_buffer=START_BUFFER
|
|
347
|
+
)
|
|
348
|
+
start_tz = to_datetime_aware(start_datetime)
|
|
349
|
+
|
|
350
|
+
# start_tz already includes START_BUFFER from get_start_datetime_and_ts_unit
|
|
351
|
+
needed_start = start_tz
|
|
352
|
+
needed_end = self.datetime_end
|
|
353
|
+
|
|
354
|
+
if data_start_tz <= needed_start and data_end_tz >= needed_end:
|
|
355
|
+
# Data is already sufficient - return without converting to pandas!
|
|
356
|
+
logger.debug(f"[CACHE HIT] Data sufficient for {asset_separated.symbol}, returning early")
|
|
357
|
+
return
|
|
358
|
+
else:
|
|
359
|
+
logger.debug(f"[CACHE MISS] Data insufficient - need: {needed_start} to {needed_end}, have: {data_start_tz} to {data_end_tz}")
|
|
366
360
|
else:
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
361
|
+
# For pandas Data objects, use the regular .df property
|
|
362
|
+
asset_data_df = asset_data.df
|
|
363
|
+
|
|
364
|
+
# Only check if we have actual data (not empty DataFrame)
|
|
365
|
+
if not asset_data_df.empty and len(asset_data_df.index) > 0:
|
|
366
|
+
data_start_datetime = asset_data_df.index[0]
|
|
367
|
+
data_end_datetime = asset_data_df.index[-1]
|
|
368
|
+
|
|
369
|
+
# Get the timestep of the existing data
|
|
370
|
+
data_timestep = asset_data.timestep
|
|
371
|
+
|
|
372
|
+
# If the timestep matches, check if we have sufficient coverage
|
|
373
|
+
if data_timestep == timestep:
|
|
374
|
+
# Ensure both datetimes are timezone-aware for comparison
|
|
375
|
+
data_start_tz = to_datetime_aware(data_start_datetime)
|
|
376
|
+
data_end_tz = to_datetime_aware(data_end_datetime)
|
|
377
|
+
|
|
378
|
+
# Get the start datetime with buffer
|
|
379
|
+
start_datetime, _ = self.get_start_datetime_and_ts_unit(
|
|
380
|
+
length, timestep, start_dt, start_buffer=START_BUFFER
|
|
381
|
+
)
|
|
382
|
+
start_tz = to_datetime_aware(start_datetime)
|
|
383
|
+
|
|
384
|
+
# start_tz already includes START_BUFFER from get_start_datetime_and_ts_unit
|
|
385
|
+
needed_start = start_tz
|
|
386
|
+
needed_end = self.datetime_end
|
|
387
|
+
|
|
388
|
+
if data_start_tz <= needed_start and data_end_tz >= needed_end:
|
|
389
|
+
# Data is already sufficient - return silently
|
|
390
|
+
return
|
|
391
|
+
|
|
392
|
+
# We need to fetch new data from DataBento
|
|
393
|
+
# Create a unique key for logging to avoid spam
|
|
394
|
+
log_key = f"{asset_separated.symbol}_{timestep}"
|
|
395
|
+
|
|
396
|
+
try:
|
|
397
|
+
# Only log fetch message once per asset/timestep combination
|
|
398
|
+
if log_key not in self._logged_requests:
|
|
399
|
+
logger.debug(f"Fetching {timestep} data for {asset_separated.symbol}")
|
|
400
|
+
self._logged_requests.add(log_key)
|
|
401
|
+
|
|
402
|
+
# Get the start datetime and timestep unit
|
|
403
|
+
start_datetime, ts_unit = self.get_start_datetime_and_ts_unit(
|
|
404
|
+
length, timestep, start_dt, start_buffer=START_BUFFER
|
|
389
405
|
)
|
|
390
406
|
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
if isinstance(timeshift, int) and timeshift > 0:
|
|
394
|
-
allow_current_bar = True
|
|
395
|
-
elif isinstance(timeshift, timedelta) and timeshift.total_seconds() > 0:
|
|
396
|
-
allow_current_bar = True
|
|
397
|
-
|
|
398
|
-
cutoff_dt = effective_dt if allow_current_bar else effective_dt - bar_delta
|
|
399
|
-
|
|
400
|
-
df_result = (
|
|
401
|
-
cached_df.lazy()
|
|
402
|
-
.filter(pl.col("datetime") <= pl.lit(cutoff_dt))
|
|
403
|
-
.sort("datetime")
|
|
404
|
-
.tail(length)
|
|
405
|
-
.collect()
|
|
406
|
-
)
|
|
407
|
-
|
|
408
|
-
if df_result.height >= length:
|
|
409
|
-
return Bars(
|
|
410
|
-
df=df_result,
|
|
411
|
-
source=self.SOURCE,
|
|
412
|
-
asset=asset,
|
|
413
|
-
quote=quote,
|
|
414
|
-
return_polars=return_polars,
|
|
415
|
-
)
|
|
416
|
-
else:
|
|
417
|
-
logger.debug(
|
|
418
|
-
"Cache coverage insufficient for %s (%s); requesting additional data.",
|
|
419
|
-
asset.symbol,
|
|
420
|
-
timestep,
|
|
421
|
-
)
|
|
407
|
+
# Calculate end datetime (use current backtest end or a bit beyond)
|
|
408
|
+
end_datetime = self.datetime_end + timedelta(days=1)
|
|
422
409
|
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
410
|
+
# NOTE: Sliding window clamping is disabled during initial data fetch
|
|
411
|
+
# to ensure we have sufficient data for the entire backtest period.
|
|
412
|
+
# Runtime trimming is handled by _trim_cached_data() which is called
|
|
413
|
+
# periodically during get_historical_prices().
|
|
414
|
+
#
|
|
415
|
+
# Premature clamping here causes accuracy issues when strategies request
|
|
416
|
+
# more lookback than the window size (e.g., 500 bars with 5000 bar window)
|
|
429
417
|
|
|
430
|
-
|
|
431
|
-
|
|
418
|
+
# Get data from DataBento (returns polars DataFrame by default)
|
|
419
|
+
_log_conversion("FETCH", "DataBento", "polars", "_update_pandas_data")
|
|
420
|
+
df = databento_helper.get_price_data_from_databento(
|
|
432
421
|
api_key=self._api_key,
|
|
433
|
-
asset=
|
|
434
|
-
start=
|
|
435
|
-
end=
|
|
436
|
-
timestep=
|
|
437
|
-
venue=
|
|
438
|
-
|
|
422
|
+
asset=asset_separated,
|
|
423
|
+
start=start_datetime,
|
|
424
|
+
end=end_datetime,
|
|
425
|
+
timestep=ts_unit,
|
|
426
|
+
venue=None, # Could add venue support later
|
|
427
|
+
force_cache_update=False,
|
|
428
|
+
return_polars=True # Fetch as polars for optimal performance
|
|
439
429
|
)
|
|
440
430
|
|
|
441
|
-
if
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
431
|
+
# Check if DataFrame is empty (works for both pandas and polars)
|
|
432
|
+
is_empty = df is None or (hasattr(df, 'is_empty') and df.is_empty()) or (hasattr(df, 'empty') and df.empty)
|
|
433
|
+
|
|
434
|
+
if is_empty:
|
|
435
|
+
# For empty data, create an empty Data object with proper timezone handling
|
|
436
|
+
# to maintain backward compatibility with tests
|
|
437
|
+
empty_df = pd.DataFrame(columns=['open', 'high', 'low', 'close', 'volume'])
|
|
438
|
+
# Create an empty DatetimeIndex with proper timezone
|
|
439
|
+
empty_df.index = pd.DatetimeIndex([], tz=LUMIBOT_DEFAULT_PYTZ, name='datetime')
|
|
440
|
+
|
|
441
|
+
data_obj = Data(
|
|
442
|
+
asset_separated,
|
|
443
|
+
df=empty_df,
|
|
444
|
+
timestep=ts_unit,
|
|
445
|
+
quote=quote_asset,
|
|
446
|
+
# Use timezone-aware dates to avoid timezone issues
|
|
447
|
+
date_start=LUMIBOT_DEFAULT_PYTZ.localize(datetime(2000, 1, 1)),
|
|
448
|
+
date_end=LUMIBOT_DEFAULT_PYTZ.localize(datetime(2000, 1, 1))
|
|
445
449
|
)
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
450
|
+
self.pandas_data[search_asset] = data_obj
|
|
451
|
+
return
|
|
452
|
+
|
|
453
|
+
# Handle polars DataFrame (has 'datetime' column) or pandas DataFrame (has datetime index)
|
|
454
|
+
if isinstance(df, pl.DataFrame):
|
|
455
|
+
_log_conversion("STORE", "polars", "DataPolars", "_update_pandas_data")
|
|
456
|
+
logger.debug(f"[POLARS] Storing polars DataFrame for {asset_separated.symbol}: {df.height} rows")
|
|
457
|
+
# Create DataPolars object with polars DataFrame (keeps polars end-to-end)
|
|
458
|
+
data_obj = DataPolars(
|
|
459
|
+
asset_separated,
|
|
460
|
+
df=df,
|
|
461
|
+
timestep=ts_unit,
|
|
462
|
+
quote=quote_asset,
|
|
451
463
|
)
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
self._filtered_data_cache[cache_key] = combined_df
|
|
464
|
-
|
|
465
|
-
cache_min = combined_df.lazy().select(pl.col("datetime").min()).collect().item()
|
|
466
|
-
cache_max = combined_df.lazy().select(pl.col("datetime").max()).collect().item()
|
|
467
|
-
cache_min = self._to_naive_datetime(cache_min)
|
|
468
|
-
cache_max = self._to_naive_datetime(cache_max)
|
|
469
|
-
self._cache_metadata[cache_key] = {
|
|
470
|
-
"min_dt": cache_min,
|
|
471
|
-
"max_dt": cache_max,
|
|
472
|
-
"count": combined_df.height,
|
|
473
|
-
}
|
|
474
|
-
df_to_use = combined_df
|
|
475
|
-
else:
|
|
476
|
-
df_to_use = df
|
|
477
|
-
|
|
478
|
-
allow_current_bar = getattr(self, "_include_current_bar_for_orders", False)
|
|
479
|
-
if isinstance(timeshift, int) and timeshift > 0:
|
|
480
|
-
allow_current_bar = True
|
|
481
|
-
elif isinstance(timeshift, timedelta) and timeshift.total_seconds() > 0:
|
|
482
|
-
allow_current_bar = True
|
|
483
|
-
|
|
484
|
-
cutoff_dt_api = effective_dt if allow_current_bar else effective_dt - bar_delta
|
|
485
|
-
|
|
486
|
-
df_result = (
|
|
487
|
-
df_to_use.lazy()
|
|
488
|
-
.filter(pl.col("datetime") <= pl.lit(cutoff_dt_api))
|
|
489
|
-
.sort("datetime")
|
|
490
|
-
.tail(length)
|
|
491
|
-
.collect()
|
|
492
|
-
)
|
|
493
|
-
|
|
494
|
-
if df_result.is_empty():
|
|
495
|
-
logger.warning(
|
|
496
|
-
"No data available for %s up to %s",
|
|
497
|
-
asset.symbol,
|
|
498
|
-
effective_dt,
|
|
464
|
+
elif isinstance(df, pd.DataFrame):
|
|
465
|
+
# Ensure the pandas DataFrame has a datetime index
|
|
466
|
+
if not isinstance(df.index, pd.DatetimeIndex):
|
|
467
|
+
logger.error(f"DataBento data for {asset_separated.symbol} doesn't have datetime index")
|
|
468
|
+
return
|
|
469
|
+
# Create Data object with pandas DataFrame
|
|
470
|
+
data_obj = Data(
|
|
471
|
+
asset_separated,
|
|
472
|
+
df=df,
|
|
473
|
+
timestep=ts_unit,
|
|
474
|
+
quote=quote_asset,
|
|
499
475
|
)
|
|
500
|
-
|
|
476
|
+
else:
|
|
477
|
+
logger.error(f"Unexpected DataFrame type: {type(df)}")
|
|
478
|
+
return
|
|
501
479
|
|
|
502
|
-
|
|
503
|
-
df=df_result,
|
|
504
|
-
source=self.SOURCE,
|
|
505
|
-
asset=asset,
|
|
506
|
-
quote=quote,
|
|
507
|
-
return_polars=return_polars,
|
|
508
|
-
tzinfo=self.tzinfo,
|
|
509
|
-
)
|
|
480
|
+
self.pandas_data[search_asset] = data_obj
|
|
510
481
|
|
|
482
|
+
except DataBentoAuthenticationError as e:
|
|
483
|
+
logger.error(colored(f"DataBento authentication failed for {asset_separated.symbol}: {e}", "red"))
|
|
484
|
+
raise
|
|
511
485
|
except Exception as e:
|
|
512
|
-
logger.error(f"Error
|
|
513
|
-
|
|
486
|
+
logger.error(f"Error updating pandas data for {asset_separated.symbol}: {str(e)}")
|
|
487
|
+
logger.error(traceback.format_exc())
|
|
514
488
|
|
|
515
489
|
def get_last_price(self, asset, quote=None, exchange=None):
|
|
516
490
|
"""
|
|
517
|
-
Get the last
|
|
518
|
-
|
|
491
|
+
Get the last price for an asset at the current backtest time
|
|
492
|
+
|
|
519
493
|
Parameters
|
|
520
494
|
----------
|
|
521
495
|
asset : Asset
|
|
522
|
-
|
|
496
|
+
Asset to get the price for
|
|
523
497
|
quote : Asset, optional
|
|
524
|
-
Quote asset (not used
|
|
498
|
+
Quote asset (not typically used with DataBento)
|
|
525
499
|
exchange : str, optional
|
|
526
|
-
Exchange
|
|
527
|
-
|
|
500
|
+
Exchange filter
|
|
501
|
+
|
|
528
502
|
Returns
|
|
529
503
|
-------
|
|
530
|
-
float or None
|
|
531
|
-
Last
|
|
504
|
+
float, Decimal, or None
|
|
505
|
+
Last price at current backtest time
|
|
532
506
|
"""
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
cached_price = self._last_price_cache[cache_key]
|
|
537
|
-
logger.debug(f"Using cached last price for {asset.symbol}: {cached_price}")
|
|
538
|
-
return cached_price
|
|
539
|
-
|
|
540
|
-
logger.debug(f"Getting last price for {asset.symbol}")
|
|
541
|
-
|
|
542
|
-
# Try to get from lazy data first (more memory efficient)
|
|
543
|
-
if asset in self._data_store:
|
|
544
|
-
lazy_df = self._data_store[asset]
|
|
545
|
-
|
|
546
|
-
# Get current time for filtering
|
|
507
|
+
try:
|
|
508
|
+
# OPTIMIZATION: Check cache first
|
|
509
|
+
self._check_and_clear_cache()
|
|
547
510
|
current_dt = self.get_datetime()
|
|
548
511
|
|
|
549
|
-
#
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
else:
|
|
553
|
-
current_dt_naive = current_dt
|
|
554
|
-
|
|
555
|
-
# Get last price with single lazy operation
|
|
556
|
-
try:
|
|
557
|
-
cutoff_dt_lp = current_dt_naive - timedelta(minutes=1)
|
|
558
|
-
last_price = (
|
|
559
|
-
lazy_df
|
|
560
|
-
.filter(pl.col('datetime') <= pl.lit(cutoff_dt_lp))
|
|
561
|
-
.select(pl.col('close').tail(1))
|
|
562
|
-
.collect()
|
|
563
|
-
.item()
|
|
564
|
-
)
|
|
512
|
+
# Try to get data from our cached pandas_data first
|
|
513
|
+
search_asset = asset
|
|
514
|
+
quote_asset = quote if quote is not None else Asset("USD", "forex")
|
|
565
515
|
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
if
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
516
|
+
if isinstance(search_asset, tuple):
|
|
517
|
+
asset_separated, quote_asset = search_asset
|
|
518
|
+
else:
|
|
519
|
+
search_asset = (search_asset, quote_asset)
|
|
520
|
+
asset_separated = asset
|
|
521
|
+
|
|
522
|
+
# Ensure futures have correct multiplier set
|
|
523
|
+
self._ensure_futures_multiplier(asset_separated)
|
|
524
|
+
|
|
525
|
+
# OPTIMIZATION: Check iteration cache
|
|
526
|
+
cache_key = (search_asset, current_dt)
|
|
527
|
+
if cache_key in self._last_price_cache:
|
|
528
|
+
return self._last_price_cache[cache_key]
|
|
529
|
+
|
|
530
|
+
if search_asset not in self.pandas_data:
|
|
531
|
+
fetch_timestep = getattr(self, '_timestep', self.MIN_TIMESTEP if hasattr(self, 'MIN_TIMESTEP') else 'minute')
|
|
532
|
+
self._update_pandas_data(asset_separated, quote_asset, length=10, timestep=fetch_timestep)
|
|
533
|
+
|
|
534
|
+
if search_asset in self.pandas_data:
|
|
535
|
+
asset_data = self.pandas_data[search_asset]
|
|
536
|
+
|
|
537
|
+
# OPTIMIZATION: If asset_data is DataPolars, work with polars directly to avoid conversion
|
|
538
|
+
if isinstance(asset_data, DataPolars):
|
|
539
|
+
polars_df = asset_data.polars_df
|
|
540
|
+
|
|
541
|
+
if polars_df.height > 0 and 'close' in polars_df.columns:
|
|
542
|
+
# Ensure current_dt is timezone-aware for comparison
|
|
543
|
+
current_dt_aware = to_datetime_aware(current_dt)
|
|
544
|
+
|
|
545
|
+
# Step back one bar so only fully closed bars are visible
|
|
546
|
+
bar_delta = timedelta(minutes=1)
|
|
547
|
+
if asset_data.timestep == "hour":
|
|
548
|
+
bar_delta = timedelta(hours=1)
|
|
549
|
+
elif asset_data.timestep == "day":
|
|
550
|
+
bar_delta = timedelta(days=1)
|
|
551
|
+
|
|
552
|
+
cutoff_dt = current_dt_aware - bar_delta
|
|
553
|
+
|
|
554
|
+
# Convert to UTC for polars comparison (polars DataFrame datetime is in UTC)
|
|
555
|
+
polars_tz = polars_df["datetime"].dtype.time_zone
|
|
556
|
+
if polars_tz:
|
|
557
|
+
cutoff_dt_compat = pd.Timestamp(cutoff_dt).tz_convert(polars_tz)
|
|
558
|
+
current_dt_compat = pd.Timestamp(current_dt_aware).tz_convert(polars_tz)
|
|
559
|
+
else:
|
|
560
|
+
cutoff_dt_compat = cutoff_dt
|
|
561
|
+
current_dt_compat = current_dt_aware
|
|
562
|
+
|
|
563
|
+
# Filter using polars operations (no conversion!)
|
|
564
|
+
filtered_df = polars_df.filter(pl.col("datetime") <= cutoff_dt_compat)
|
|
565
|
+
|
|
566
|
+
# If we have no prior bar (e.g., first iteration), allow the current timestamp
|
|
567
|
+
if filtered_df.height == 0:
|
|
568
|
+
filtered_df = polars_df.filter(pl.col("datetime") <= current_dt_compat)
|
|
569
|
+
|
|
570
|
+
if filtered_df.height > 0:
|
|
571
|
+
last_price = filtered_df['close'][-1]
|
|
572
|
+
if not pd.isna(last_price):
|
|
573
|
+
price = float(last_price)
|
|
574
|
+
# OPTIMIZATION: Cache the result
|
|
575
|
+
self._last_price_cache[cache_key] = price
|
|
576
|
+
return price
|
|
577
|
+
else:
|
|
578
|
+
# For regular Data objects, use pandas operations
|
|
579
|
+
df = asset_data.df
|
|
580
|
+
|
|
581
|
+
if not df.empty and 'close' in df.columns:
|
|
582
|
+
# Ensure current_dt is timezone-aware for comparison
|
|
583
|
+
current_dt_aware = to_datetime_aware(current_dt)
|
|
584
|
+
|
|
585
|
+
# Step back one bar so only fully closed bars are visible
|
|
586
|
+
bar_delta = timedelta(minutes=1)
|
|
587
|
+
if asset_data.timestep == "hour":
|
|
588
|
+
bar_delta = timedelta(hours=1)
|
|
589
|
+
elif asset_data.timestep == "day":
|
|
590
|
+
bar_delta = timedelta(days=1)
|
|
591
|
+
|
|
592
|
+
cutoff_dt = current_dt_aware - bar_delta
|
|
593
|
+
|
|
594
|
+
# Filter to data up to current backtest time (exclude current bar unless broker overrides)
|
|
595
|
+
filtered_df = df[df.index <= cutoff_dt]
|
|
596
|
+
|
|
597
|
+
# If we have no prior bar (e.g., first iteration), allow the current timestamp
|
|
598
|
+
if filtered_df.empty:
|
|
599
|
+
filtered_df = df[df.index <= current_dt_aware]
|
|
600
|
+
|
|
601
|
+
if not filtered_df.empty:
|
|
602
|
+
last_price = filtered_df['close'].iloc[-1]
|
|
603
|
+
if not pd.isna(last_price):
|
|
604
|
+
price = float(last_price)
|
|
605
|
+
# OPTIMIZATION: Cache the result
|
|
606
|
+
self._last_price_cache[cache_key] = price
|
|
607
|
+
return price
|
|
608
|
+
|
|
609
|
+
# If no cached data, try to get recent data
|
|
610
|
+
logger.warning(f"No cached data for {asset.symbol}, attempting direct fetch")
|
|
611
|
+
return databento_helper.get_last_price_from_databento(
|
|
612
|
+
api_key=self._api_key,
|
|
613
|
+
asset=asset_separated,
|
|
614
|
+
venue=exchange
|
|
615
|
+
)
|
|
616
|
+
|
|
617
|
+
except DataBentoAuthenticationError as e:
|
|
618
|
+
logger.error(colored(f"DataBento authentication failed while getting last price for {asset.symbol}: {e}", "red"))
|
|
619
|
+
raise
|
|
620
|
+
except Exception as e:
|
|
621
|
+
logger.error(f"Error getting last price for {asset.symbol}: {e}")
|
|
622
|
+
return None
|
|
589
623
|
|
|
590
624
|
def get_chains(self, asset, quote=None):
|
|
591
|
-
"""
|
|
625
|
+
"""
|
|
626
|
+
Get option chains for an asset
|
|
627
|
+
|
|
628
|
+
DataBento doesn't provide options chain data, so this returns an empty dict.
|
|
629
|
+
|
|
630
|
+
Parameters
|
|
631
|
+
----------
|
|
632
|
+
asset : Asset
|
|
633
|
+
Asset to get chains for
|
|
634
|
+
quote : Asset, optional
|
|
635
|
+
Quote asset
|
|
636
|
+
|
|
637
|
+
Returns
|
|
638
|
+
-------
|
|
639
|
+
dict
|
|
640
|
+
Empty dictionary
|
|
641
|
+
"""
|
|
592
642
|
logger.warning("DataBento does not provide options chain data")
|
|
593
643
|
return {}
|
|
594
644
|
|
|
595
645
|
def get_quote(self, asset, quote=None):
|
|
596
|
-
"""
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
646
|
+
"""Return a Quote object using cached bars or a direct fetch."""
|
|
647
|
+
try:
|
|
648
|
+
search_asset = asset if isinstance(asset, tuple) else (asset, Asset("USD", "forex"))
|
|
649
|
+
asset_data = self.pandas_data.get(search_asset)
|
|
650
|
+
df = None
|
|
651
|
+
if isinstance(asset_data, DataPolars):
|
|
652
|
+
df = asset_data.polars_df
|
|
653
|
+
elif asset_data is not None:
|
|
654
|
+
df = asset_data.polars_df if hasattr(asset_data, "polars_df") else asset_data.df
|
|
655
|
+
if df is None:
|
|
656
|
+
default_timestep = getattr(self, "_timestep", self.MIN_TIMESTEP if hasattr(self, "MIN_TIMESTEP") else "minute")
|
|
657
|
+
df = self._pull_source_symbol_bars(asset, length=1, timestep=default_timestep)
|
|
658
|
+
bid = ask = price = volume = mid = None
|
|
659
|
+
if isinstance(df, pl.DataFrame) and df.height > 0:
|
|
660
|
+
row = df.row(0, named=True)
|
|
661
|
+
bid = row.get("bid")
|
|
662
|
+
ask = row.get("ask")
|
|
663
|
+
price = row.get("close")
|
|
664
|
+
volume = row.get("volume")
|
|
665
|
+
elif isinstance(df, pd.DataFrame) and not df.empty:
|
|
666
|
+
row = df.iloc[-1]
|
|
667
|
+
bid = row.get("bid")
|
|
668
|
+
ask = row.get("ask")
|
|
669
|
+
price = row.get("close")
|
|
670
|
+
volume = row.get("volume")
|
|
671
|
+
if bid is not None and ask is not None:
|
|
672
|
+
mid = float(bid + ask) / 2.0
|
|
673
|
+
quote_obj = Quote(
|
|
674
|
+
asset if not isinstance(asset, tuple) else asset[0],
|
|
675
|
+
price=float(price) if price is not None else None,
|
|
676
|
+
bid=float(bid) if bid is not None else None,
|
|
677
|
+
ask=float(ask) if ask is not None else None,
|
|
678
|
+
volume=float(volume) if volume is not None else None,
|
|
679
|
+
mid_price=mid,
|
|
680
|
+
raw_data={"bid": bid, "ask": ask, "price": price},
|
|
681
|
+
)
|
|
682
|
+
quote_obj.source = "polars"
|
|
683
|
+
return quote_obj
|
|
684
|
+
except DataBentoAuthenticationError as exc:
|
|
685
|
+
logger.error(colored(f"DataBento authentication failed while getting quote for {asset}: {exc}", "red"))
|
|
686
|
+
raise
|
|
687
|
+
except Exception as exc:
|
|
688
|
+
logger.error(f"Error getting quote for {asset}: {exc}")
|
|
689
|
+
return Quote(asset if not isinstance(asset, tuple) else asset[0], raw_data={})
|
|
690
|
+
|
|
691
|
+
def _get_bars_dict(self, assets, length, timestep, timeshift=None):
|
|
692
|
+
"""
|
|
693
|
+
Override parent method to handle DataBento-specific data retrieval
|
|
694
|
+
|
|
695
|
+
Parameters
|
|
696
|
+
----------
|
|
697
|
+
assets : list
|
|
698
|
+
List of assets to get data for
|
|
699
|
+
length : int
|
|
700
|
+
Number of bars to retrieve
|
|
701
|
+
timestep : str
|
|
702
|
+
Timestep for the data
|
|
703
|
+
timeshift : timedelta, optional
|
|
704
|
+
Time shift to apply
|
|
705
|
+
|
|
706
|
+
Returns
|
|
707
|
+
-------
|
|
708
|
+
dict
|
|
709
|
+
Dictionary mapping assets to their bar data
|
|
710
|
+
"""
|
|
711
|
+
result = {}
|
|
712
|
+
|
|
713
|
+
for asset in assets:
|
|
714
|
+
try:
|
|
715
|
+
# Update pandas data if needed
|
|
716
|
+
self._update_pandas_data(asset, None, length, timestep)
|
|
717
|
+
|
|
718
|
+
# Get data from pandas_data
|
|
719
|
+
search_asset = asset
|
|
720
|
+
if not isinstance(search_asset, tuple):
|
|
721
|
+
search_asset = (search_asset, Asset("USD", "forex"))
|
|
722
|
+
|
|
723
|
+
if search_asset in self.pandas_data:
|
|
724
|
+
asset_data = self.pandas_data[search_asset]
|
|
725
|
+
df = asset_data.df
|
|
726
|
+
|
|
727
|
+
if not df.empty:
|
|
728
|
+
# Apply timeshift if specified
|
|
729
|
+
current_dt = self.get_datetime()
|
|
730
|
+
shift_seconds = 0
|
|
731
|
+
if timeshift:
|
|
732
|
+
if isinstance(timeshift, int):
|
|
733
|
+
shift_seconds = timeshift * 60
|
|
734
|
+
current_dt = current_dt - timedelta(minutes=timeshift)
|
|
735
|
+
else:
|
|
736
|
+
shift_seconds = timeshift.total_seconds()
|
|
737
|
+
current_dt = current_dt - timeshift
|
|
738
|
+
|
|
739
|
+
# Ensure current_dt is timezone-aware for comparison
|
|
740
|
+
current_dt_aware = to_datetime_aware(current_dt)
|
|
741
|
+
|
|
742
|
+
# Filter data up to current backtest time (exclude current bar unless broker overrides)
|
|
743
|
+
include_current = getattr(self, "_include_current_bar_for_orders", False)
|
|
744
|
+
allow_current = include_current or shift_seconds > 0
|
|
745
|
+
mask = df.index <= current_dt_aware if allow_current else df.index < current_dt_aware
|
|
746
|
+
filtered_df = df[mask]
|
|
747
|
+
|
|
748
|
+
# Take the last 'length' bars
|
|
749
|
+
result_df = filtered_df.tail(length)
|
|
750
|
+
|
|
751
|
+
if not result_df.empty:
|
|
752
|
+
result[asset] = result_df
|
|
753
|
+
else:
|
|
754
|
+
logger.warning(f"No data available for {asset.symbol} at {current_dt}")
|
|
755
|
+
result[asset] = None
|
|
756
|
+
else:
|
|
757
|
+
logger.warning(f"Empty data for {asset.symbol}")
|
|
758
|
+
result[asset] = None
|
|
759
|
+
else:
|
|
760
|
+
logger.warning(f"No data found for {asset.symbol}")
|
|
761
|
+
result[asset] = None
|
|
762
|
+
|
|
763
|
+
except DataBentoAuthenticationError as e:
|
|
764
|
+
logger.error(colored(f"DataBento authentication failed while getting bars for {asset}: {e}", "red"))
|
|
765
|
+
raise
|
|
766
|
+
except Exception as e:
|
|
767
|
+
logger.error(f"Error getting bars for {asset}: {e}")
|
|
768
|
+
result[asset] = None
|
|
769
|
+
|
|
770
|
+
return result
|
|
607
771
|
|
|
608
772
|
def _pull_source_symbol_bars(
|
|
609
773
|
self,
|
|
@@ -616,62 +780,212 @@ class DataBentoDataBacktestingPolars(DataSourceBacktesting):
|
|
|
616
780
|
include_after_hours=True,
|
|
617
781
|
):
|
|
618
782
|
"""
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
This is
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
Parameters
|
|
625
|
-
----------
|
|
626
|
-
asset : Asset
|
|
627
|
-
The asset to get data for
|
|
628
|
-
length : int
|
|
629
|
-
Number of bars to retrieve
|
|
630
|
-
timestep : str
|
|
631
|
-
Timestep for the data ('minute', 'hour', 'day')
|
|
632
|
-
timeshift : int
|
|
633
|
-
Minutes to shift back in time
|
|
634
|
-
quote : Asset, optional
|
|
635
|
-
Quote asset (not used for DataBento)
|
|
636
|
-
exchange : str, optional
|
|
637
|
-
Exchange/venue filter
|
|
638
|
-
include_after_hours : bool
|
|
639
|
-
Whether to include after-hours data
|
|
640
|
-
|
|
641
|
-
Returns
|
|
642
|
-
-------
|
|
643
|
-
pandas.DataFrame
|
|
644
|
-
Historical price data with datetime index
|
|
783
|
+
Override parent method to fetch data from DataBento instead of pre-loaded data store
|
|
784
|
+
|
|
785
|
+
This method is called by get_historical_prices and is responsible for actually
|
|
786
|
+
fetching the data from the DataBento API.
|
|
645
787
|
"""
|
|
646
788
|
timestep = timestep if timestep else "minute"
|
|
647
789
|
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
bars = self.get_historical_prices(
|
|
652
|
-
asset=asset,
|
|
653
|
-
length=length,
|
|
654
|
-
timestep=timestep,
|
|
655
|
-
timeshift=timedelta(minutes=timeshift) if timeshift else None,
|
|
656
|
-
quote=quote,
|
|
657
|
-
exchange=exchange,
|
|
658
|
-
include_after_hours=include_after_hours
|
|
659
|
-
)
|
|
790
|
+
# OPTIMIZATION: Check iteration cache first
|
|
791
|
+
self._check_and_clear_cache()
|
|
792
|
+
current_dt = self.get_datetime()
|
|
660
793
|
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
794
|
+
# Get data from our cached pandas_data
|
|
795
|
+
search_asset = asset
|
|
796
|
+
quote_asset = quote if quote is not None else Asset("USD", "forex")
|
|
664
797
|
|
|
665
|
-
if
|
|
666
|
-
|
|
667
|
-
|
|
798
|
+
if isinstance(search_asset, tuple):
|
|
799
|
+
asset_separated, quote_asset = search_asset
|
|
800
|
+
else:
|
|
801
|
+
search_asset = (search_asset, quote_asset)
|
|
802
|
+
asset_separated = asset
|
|
668
803
|
|
|
669
|
-
#
|
|
670
|
-
#
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
804
|
+
# OPTIMIZATION: Build cache key and check cache
|
|
805
|
+
# Convert timeshift to consistent format for caching
|
|
806
|
+
timeshift_key = 0
|
|
807
|
+
if timeshift:
|
|
808
|
+
if isinstance(timeshift, int):
|
|
809
|
+
timeshift_key = timeshift
|
|
810
|
+
else:
|
|
811
|
+
timeshift_key = int(timeshift.total_seconds() / 60)
|
|
812
|
+
|
|
813
|
+
cache_key = (search_asset, length, timestep, timeshift_key, current_dt)
|
|
814
|
+
if cache_key in self._filtered_bars_cache:
|
|
815
|
+
return self._filtered_bars_cache[cache_key]
|
|
816
|
+
|
|
817
|
+
# Check if we need to fetch data by calling _update_pandas_data first
|
|
818
|
+
# This will only fetch if data is not already cached or prefetched
|
|
819
|
+
self._update_pandas_data(asset, quote, length, timestep)
|
|
820
|
+
|
|
821
|
+
# Check if we have data in pandas_data cache
|
|
822
|
+
if search_asset in self.pandas_data:
|
|
823
|
+
asset_data = self.pandas_data[search_asset]
|
|
824
|
+
|
|
825
|
+
# OPTIMIZATION: If asset_data is DataPolars, work with polars directly to avoid conversion
|
|
826
|
+
if isinstance(asset_data, DataPolars):
|
|
827
|
+
polars_df = asset_data.polars_df
|
|
828
|
+
|
|
829
|
+
if polars_df.height > 0:
|
|
830
|
+
# ========================================================================
|
|
831
|
+
# CRITICAL: NEGATIVE TIMESHIFT ARITHMETIC FOR LOOKAHEAD (MATCHES PANDAS)
|
|
832
|
+
# ========================================================================
|
|
833
|
+
# Negative timeshift allows broker to "peek ahead" for realistic fills.
|
|
834
|
+
# This arithmetic MUST match pandas exactly: current_dt - timeshift
|
|
835
|
+
# With timeshift=-2: current_dt - (-2) = current_dt + 2 minutes ✓
|
|
836
|
+
# ========================================================================
|
|
837
|
+
shift_seconds = 0
|
|
838
|
+
if timeshift:
|
|
839
|
+
if isinstance(timeshift, int):
|
|
840
|
+
shift_seconds = timeshift * 60
|
|
841
|
+
current_dt = current_dt - timedelta(minutes=timeshift) # FIXED: was +, now matches pandas
|
|
842
|
+
else:
|
|
843
|
+
shift_seconds = timeshift.total_seconds()
|
|
844
|
+
current_dt = current_dt - timeshift # FIXED: was +, now matches pandas
|
|
845
|
+
|
|
846
|
+
# Ensure current_dt is timezone-aware for comparison
|
|
847
|
+
current_dt_aware = to_datetime_aware(current_dt)
|
|
848
|
+
|
|
849
|
+
# Step back one bar to avoid exposing the in-progress bar
|
|
850
|
+
bar_delta = timedelta(minutes=1)
|
|
851
|
+
if asset_data.timestep == "hour":
|
|
852
|
+
bar_delta = timedelta(hours=1)
|
|
853
|
+
elif asset_data.timestep == "day":
|
|
854
|
+
bar_delta = timedelta(days=1)
|
|
855
|
+
|
|
856
|
+
cutoff_dt = current_dt_aware - bar_delta
|
|
857
|
+
|
|
858
|
+
# Convert to UTC for polars comparison (polars DataFrame datetime is in UTC)
|
|
859
|
+
# Get the timezone from polars DataFrame
|
|
860
|
+
polars_tz = polars_df["datetime"].dtype.time_zone
|
|
861
|
+
if polars_tz:
|
|
862
|
+
# Convert current_dt_aware to match polars timezone
|
|
863
|
+
cutoff_dt_compat = pd.Timestamp(cutoff_dt).tz_convert(polars_tz)
|
|
864
|
+
current_dt_compat = pd.Timestamp(current_dt_aware).tz_convert(polars_tz)
|
|
865
|
+
else:
|
|
866
|
+
cutoff_dt_compat = cutoff_dt
|
|
867
|
+
current_dt_compat = current_dt_aware
|
|
868
|
+
|
|
869
|
+
# INSTRUMENTATION: Log timeshift application and filtering
|
|
870
|
+
broker_dt_orig = self.get_datetime()
|
|
871
|
+
filter_branch = "shift_seconds > 0 (<=cutoff)" if shift_seconds > 0 else "shift_seconds <= 0 (<current)"
|
|
872
|
+
|
|
873
|
+
# Filter using polars operations (no conversion!)
|
|
874
|
+
if shift_seconds > 0:
|
|
875
|
+
filtered_df = polars_df.filter(pl.col("datetime") <= cutoff_dt_compat)
|
|
876
|
+
else:
|
|
877
|
+
filtered_df = polars_df.filter(pl.col("datetime") < current_dt_compat)
|
|
878
|
+
|
|
879
|
+
# Log what bar we're returning
|
|
880
|
+
if filtered_df.height > 0:
|
|
881
|
+
returned_bar_dt = filtered_df["datetime"][-1]
|
|
882
|
+
logger.debug(f"[TIMESHIFT_POLARS] asset={asset_separated.symbol} broker_dt={broker_dt_orig} "
|
|
883
|
+
f"timeshift={timeshift} shift_seconds={shift_seconds} "
|
|
884
|
+
f"shifted_dt={current_dt_aware} cutoff_dt={cutoff_dt} "
|
|
885
|
+
f"filter={filter_branch} returned_bar={returned_bar_dt}")
|
|
886
|
+
|
|
887
|
+
# Take the last 'length' bars
|
|
888
|
+
result_df = filtered_df.tail(length)
|
|
889
|
+
|
|
890
|
+
# OPTIMIZATION: Cache the result before returning
|
|
891
|
+
if result_df.height > 0:
|
|
892
|
+
self._filtered_bars_cache[cache_key] = result_df
|
|
893
|
+
return result_df
|
|
894
|
+
else:
|
|
895
|
+
self._filtered_bars_cache[cache_key] = None
|
|
896
|
+
return None
|
|
897
|
+
else:
|
|
898
|
+
return None
|
|
899
|
+
else:
|
|
900
|
+
# For regular Data objects, use pandas operations
|
|
901
|
+
df = asset_data.df
|
|
902
|
+
|
|
903
|
+
if not df.empty:
|
|
904
|
+
# ========================================================================
|
|
905
|
+
# CRITICAL: NEGATIVE TIMESHIFT ARITHMETIC FOR LOOKAHEAD (MATCHES PANDAS)
|
|
906
|
+
# ========================================================================
|
|
907
|
+
# Negative timeshift allows broker to "peek ahead" for realistic fills.
|
|
908
|
+
# This arithmetic MUST match pandas exactly: current_dt - timeshift
|
|
909
|
+
# With timeshift=-2: current_dt - (-2) = current_dt + 2 minutes ✓
|
|
910
|
+
# ========================================================================
|
|
911
|
+
shift_seconds = 0
|
|
912
|
+
if timeshift:
|
|
913
|
+
if isinstance(timeshift, int):
|
|
914
|
+
shift_seconds = timeshift * 60
|
|
915
|
+
current_dt = current_dt - timedelta(minutes=timeshift) # FIXED: was +, now matches pandas
|
|
916
|
+
else:
|
|
917
|
+
shift_seconds = timeshift.total_seconds()
|
|
918
|
+
current_dt = current_dt - timeshift # FIXED: was +, now matches pandas
|
|
919
|
+
|
|
920
|
+
# Ensure current_dt is timezone-aware for comparison
|
|
921
|
+
current_dt_aware = to_datetime_aware(current_dt)
|
|
922
|
+
|
|
923
|
+
# Step back one bar to avoid exposing the in-progress bar
|
|
924
|
+
bar_delta = timedelta(minutes=1)
|
|
925
|
+
if asset_data.timestep == "hour":
|
|
926
|
+
bar_delta = timedelta(hours=1)
|
|
927
|
+
elif asset_data.timestep == "day":
|
|
928
|
+
bar_delta = timedelta(days=1)
|
|
929
|
+
|
|
930
|
+
cutoff_dt = current_dt_aware - bar_delta
|
|
931
|
+
|
|
932
|
+
# INSTRUMENTATION: Log timeshift application and filtering (pandas fallback)
|
|
933
|
+
broker_dt_orig = self.get_datetime()
|
|
934
|
+
filter_branch = "shift_seconds > 0 (<=cutoff)" if shift_seconds > 0 else "shift_seconds <= 0 (<current)"
|
|
935
|
+
|
|
936
|
+
# Filter data up to current backtest time (exclude current bar unless broker overrides)
|
|
937
|
+
filtered_df = df[df.index <= cutoff_dt] if shift_seconds > 0 else df[df.index < current_dt_aware]
|
|
938
|
+
|
|
939
|
+
# Log what bar we're returning
|
|
940
|
+
if not filtered_df.empty:
|
|
941
|
+
returned_bar_dt = filtered_df.index[-1]
|
|
942
|
+
logger.debug(f"[TIMESHIFT_POLARS_PD] asset={asset_separated.symbol} broker_dt={broker_dt_orig} "
|
|
943
|
+
f"timeshift={timeshift} shift_seconds={shift_seconds} "
|
|
944
|
+
f"shifted_dt={current_dt_aware} cutoff_dt={cutoff_dt} "
|
|
945
|
+
f"filter={filter_branch} returned_bar={returned_bar_dt}")
|
|
946
|
+
|
|
947
|
+
# Take the last 'length' bars
|
|
948
|
+
result_df = filtered_df.tail(length)
|
|
949
|
+
|
|
950
|
+
# OPTIMIZATION: Cache the result before returning
|
|
951
|
+
if not result_df.empty:
|
|
952
|
+
self._filtered_bars_cache[cache_key] = result_df
|
|
953
|
+
return result_df
|
|
954
|
+
else:
|
|
955
|
+
self._filtered_bars_cache[cache_key] = None
|
|
956
|
+
return None
|
|
957
|
+
else:
|
|
958
|
+
return None
|
|
959
|
+
else:
|
|
960
|
+
return None
|
|
961
|
+
|
|
962
|
+
def initialize_data_for_backtest(self, strategy_assets, timestep="minute"):
|
|
963
|
+
"""
|
|
964
|
+
Convenience method to prefetch all required data for a backtest strategy.
|
|
965
|
+
This should be called during strategy initialization to load all data up front.
|
|
966
|
+
|
|
967
|
+
Parameters
|
|
968
|
+
----------
|
|
969
|
+
strategy_assets : list of Asset or list of str
|
|
970
|
+
List of assets or asset symbols that the strategy will use
|
|
971
|
+
timestep : str, optional
|
|
972
|
+
Primary timestep for the data (default: "minute")
|
|
973
|
+
"""
|
|
974
|
+
# Convert string symbols to Asset objects if needed
|
|
975
|
+
assets = []
|
|
976
|
+
for asset in strategy_assets:
|
|
977
|
+
if isinstance(asset, str):
|
|
978
|
+
# Try to determine asset type from symbol format
|
|
979
|
+
if any(month in asset for month in ['F', 'G', 'H', 'J', 'K', 'M', 'N', 'Q', 'U', 'V', 'X', 'Z']):
|
|
980
|
+
# Looks like a futures symbol
|
|
981
|
+
assets.append(Asset(asset, "future"))
|
|
982
|
+
else:
|
|
983
|
+
# Default to stock
|
|
984
|
+
assets.append(Asset(asset, "stock"))
|
|
985
|
+
else:
|
|
986
|
+
assets.append(asset)
|
|
987
|
+
|
|
988
|
+
# Prefetch data for all assets
|
|
989
|
+
self.prefetch_data(assets, timestep)
|
|
990
|
+
|
|
991
|
+
logger.debug(f"Initialized DataBento backtesting with prefetched data for {len(assets)} assets")
|