lumibot 4.1.3__py3-none-any.whl → 4.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lumibot might be problematic. Click here for more details.

Files changed (163) hide show
  1. lumibot/backtesting/__init__.py +19 -5
  2. lumibot/backtesting/backtesting_broker.py +98 -18
  3. lumibot/backtesting/databento_backtesting.py +5 -686
  4. lumibot/backtesting/databento_backtesting_pandas.py +738 -0
  5. lumibot/backtesting/databento_backtesting_polars.py +860 -546
  6. lumibot/backtesting/fix_debug.py +37 -0
  7. lumibot/backtesting/thetadata_backtesting.py +9 -355
  8. lumibot/backtesting/thetadata_backtesting_pandas.py +1178 -0
  9. lumibot/brokers/alpaca.py +8 -1
  10. lumibot/brokers/schwab.py +12 -2
  11. lumibot/credentials.py +13 -0
  12. lumibot/data_sources/__init__.py +5 -8
  13. lumibot/data_sources/data_source.py +6 -2
  14. lumibot/data_sources/data_source_backtesting.py +30 -0
  15. lumibot/data_sources/databento_data.py +5 -390
  16. lumibot/data_sources/databento_data_pandas.py +440 -0
  17. lumibot/data_sources/databento_data_polars.py +15 -9
  18. lumibot/data_sources/pandas_data.py +30 -17
  19. lumibot/data_sources/polars_data.py +986 -0
  20. lumibot/data_sources/polars_mixin.py +472 -96
  21. lumibot/data_sources/polygon_data_polars.py +5 -0
  22. lumibot/data_sources/yahoo_data.py +9 -2
  23. lumibot/data_sources/yahoo_data_polars.py +5 -0
  24. lumibot/entities/__init__.py +15 -0
  25. lumibot/entities/asset.py +5 -28
  26. lumibot/entities/bars.py +89 -20
  27. lumibot/entities/data.py +29 -6
  28. lumibot/entities/data_polars.py +668 -0
  29. lumibot/entities/position.py +38 -4
  30. lumibot/strategies/_strategy.py +2 -1
  31. lumibot/strategies/strategy.py +61 -49
  32. lumibot/tools/backtest_cache.py +284 -0
  33. lumibot/tools/databento_helper.py +35 -35
  34. lumibot/tools/databento_helper_polars.py +738 -775
  35. lumibot/tools/futures_roll.py +251 -0
  36. lumibot/tools/indicators.py +135 -104
  37. lumibot/tools/polars_utils.py +142 -0
  38. lumibot/tools/thetadata_helper.py +1068 -134
  39. {lumibot-4.1.3.dist-info → lumibot-4.2.0.dist-info}/METADATA +9 -1
  40. {lumibot-4.1.3.dist-info → lumibot-4.2.0.dist-info}/RECORD +71 -147
  41. tests/backtest/test_databento.py +37 -6
  42. tests/backtest/test_databento_comprehensive_trading.py +8 -4
  43. tests/backtest/test_databento_parity.py +4 -2
  44. tests/backtest/test_debug_avg_fill_price.py +1 -1
  45. tests/backtest/test_example_strategies.py +11 -1
  46. tests/backtest/test_futures_edge_cases.py +3 -3
  47. tests/backtest/test_futures_single_trade.py +2 -2
  48. tests/backtest/test_futures_ultra_simple.py +2 -2
  49. tests/backtest/test_polars_lru_eviction.py +470 -0
  50. tests/backtest/test_yahoo.py +42 -0
  51. tests/test_asset.py +4 -4
  52. tests/test_backtest_cache_manager.py +149 -0
  53. tests/test_backtesting_data_source_env.py +6 -0
  54. tests/test_continuous_futures_resolution.py +60 -48
  55. tests/test_data_polars_parity.py +160 -0
  56. tests/test_databento_asset_validation.py +23 -5
  57. tests/test_databento_backtesting.py +1 -1
  58. tests/test_databento_backtesting_polars.py +312 -192
  59. tests/test_databento_data.py +220 -463
  60. tests/test_databento_live.py +10 -10
  61. tests/test_futures_roll.py +38 -0
  62. tests/test_indicator_subplots.py +101 -0
  63. tests/test_market_infinite_loop_bug.py +77 -3
  64. tests/test_polars_resample.py +67 -0
  65. tests/test_polygon_helper.py +46 -0
  66. tests/test_thetadata_backwards_compat.py +97 -0
  67. tests/test_thetadata_helper.py +222 -23
  68. tests/test_thetadata_pandas_verification.py +186 -0
  69. lumibot/__pycache__/__init__.cpython-312.pyc +0 -0
  70. lumibot/__pycache__/constants.cpython-312.pyc +0 -0
  71. lumibot/__pycache__/credentials.cpython-312.pyc +0 -0
  72. lumibot/backtesting/__pycache__/__init__.cpython-312.pyc +0 -0
  73. lumibot/backtesting/__pycache__/alpaca_backtesting.cpython-312.pyc +0 -0
  74. lumibot/backtesting/__pycache__/alpha_vantage_backtesting.cpython-312.pyc +0 -0
  75. lumibot/backtesting/__pycache__/backtesting_broker.cpython-312.pyc +0 -0
  76. lumibot/backtesting/__pycache__/ccxt_backtesting.cpython-312.pyc +0 -0
  77. lumibot/backtesting/__pycache__/databento_backtesting.cpython-312.pyc +0 -0
  78. lumibot/backtesting/__pycache__/interactive_brokers_rest_backtesting.cpython-312.pyc +0 -0
  79. lumibot/backtesting/__pycache__/pandas_backtesting.cpython-312.pyc +0 -0
  80. lumibot/backtesting/__pycache__/polygon_backtesting.cpython-312.pyc +0 -0
  81. lumibot/backtesting/__pycache__/thetadata_backtesting.cpython-312.pyc +0 -0
  82. lumibot/backtesting/__pycache__/yahoo_backtesting.cpython-312.pyc +0 -0
  83. lumibot/brokers/__pycache__/__init__.cpython-312.pyc +0 -0
  84. lumibot/brokers/__pycache__/alpaca.cpython-312.pyc +0 -0
  85. lumibot/brokers/__pycache__/bitunix.cpython-312.pyc +0 -0
  86. lumibot/brokers/__pycache__/broker.cpython-312.pyc +0 -0
  87. lumibot/brokers/__pycache__/ccxt.cpython-312.pyc +0 -0
  88. lumibot/brokers/__pycache__/example_broker.cpython-312.pyc +0 -0
  89. lumibot/brokers/__pycache__/interactive_brokers.cpython-312.pyc +0 -0
  90. lumibot/brokers/__pycache__/interactive_brokers_rest.cpython-312.pyc +0 -0
  91. lumibot/brokers/__pycache__/projectx.cpython-312.pyc +0 -0
  92. lumibot/brokers/__pycache__/schwab.cpython-312.pyc +0 -0
  93. lumibot/brokers/__pycache__/tradier.cpython-312.pyc +0 -0
  94. lumibot/brokers/__pycache__/tradovate.cpython-312.pyc +0 -0
  95. lumibot/data_sources/__pycache__/__init__.cpython-312.pyc +0 -0
  96. lumibot/data_sources/__pycache__/alpaca_data.cpython-312.pyc +0 -0
  97. lumibot/data_sources/__pycache__/alpha_vantage_data.cpython-312.pyc +0 -0
  98. lumibot/data_sources/__pycache__/bitunix_data.cpython-312.pyc +0 -0
  99. lumibot/data_sources/__pycache__/ccxt_backtesting_data.cpython-312.pyc +0 -0
  100. lumibot/data_sources/__pycache__/ccxt_data.cpython-312.pyc +0 -0
  101. lumibot/data_sources/__pycache__/data_source.cpython-312.pyc +0 -0
  102. lumibot/data_sources/__pycache__/data_source_backtesting.cpython-312.pyc +0 -0
  103. lumibot/data_sources/__pycache__/databento_data_polars_backtesting.cpython-312.pyc +0 -0
  104. lumibot/data_sources/__pycache__/databento_data_polars_live.cpython-312.pyc +0 -0
  105. lumibot/data_sources/__pycache__/example_broker_data.cpython-312.pyc +0 -0
  106. lumibot/data_sources/__pycache__/exceptions.cpython-312.pyc +0 -0
  107. lumibot/data_sources/__pycache__/interactive_brokers_data.cpython-312.pyc +0 -0
  108. lumibot/data_sources/__pycache__/interactive_brokers_rest_data.cpython-312.pyc +0 -0
  109. lumibot/data_sources/__pycache__/pandas_data.cpython-312.pyc +0 -0
  110. lumibot/data_sources/__pycache__/polars_mixin.cpython-312.pyc +0 -0
  111. lumibot/data_sources/__pycache__/polygon_data_polars.cpython-312.pyc +0 -0
  112. lumibot/data_sources/__pycache__/projectx_data.cpython-312.pyc +0 -0
  113. lumibot/data_sources/__pycache__/schwab_data.cpython-312.pyc +0 -0
  114. lumibot/data_sources/__pycache__/tradier_data.cpython-312.pyc +0 -0
  115. lumibot/data_sources/__pycache__/tradovate_data.cpython-312.pyc +0 -0
  116. lumibot/data_sources/__pycache__/yahoo_data_polars.cpython-312.pyc +0 -0
  117. lumibot/entities/__pycache__/__init__.cpython-312.pyc +0 -0
  118. lumibot/entities/__pycache__/asset.cpython-312.pyc +0 -0
  119. lumibot/entities/__pycache__/bar.cpython-312.pyc +0 -0
  120. lumibot/entities/__pycache__/bars.cpython-312.pyc +0 -0
  121. lumibot/entities/__pycache__/chains.cpython-312.pyc +0 -0
  122. lumibot/entities/__pycache__/data.cpython-312.pyc +0 -0
  123. lumibot/entities/__pycache__/dataline.cpython-312.pyc +0 -0
  124. lumibot/entities/__pycache__/order.cpython-312.pyc +0 -0
  125. lumibot/entities/__pycache__/position.cpython-312.pyc +0 -0
  126. lumibot/entities/__pycache__/quote.cpython-312.pyc +0 -0
  127. lumibot/entities/__pycache__/trading_fee.cpython-312.pyc +0 -0
  128. lumibot/example_strategies/__pycache__/__init__.cpython-312.pyc +0 -0
  129. lumibot/example_strategies/__pycache__/test_broker_functions.cpython-312-pytest-8.4.1.pyc +0 -0
  130. lumibot/strategies/__pycache__/__init__.cpython-312.pyc +0 -0
  131. lumibot/strategies/__pycache__/_strategy.cpython-312.pyc +0 -0
  132. lumibot/strategies/__pycache__/strategy.cpython-312.pyc +0 -0
  133. lumibot/strategies/__pycache__/strategy_executor.cpython-312.pyc +0 -0
  134. lumibot/tools/__pycache__/__init__.cpython-312.pyc +0 -0
  135. lumibot/tools/__pycache__/alpaca_helpers.cpython-312.pyc +0 -0
  136. lumibot/tools/__pycache__/bitunix_helpers.cpython-312.pyc +0 -0
  137. lumibot/tools/__pycache__/black_scholes.cpython-312.pyc +0 -0
  138. lumibot/tools/__pycache__/ccxt_data_store.cpython-312.pyc +0 -0
  139. lumibot/tools/__pycache__/databento_helper.cpython-312.pyc +0 -0
  140. lumibot/tools/__pycache__/databento_helper_polars.cpython-312.pyc +0 -0
  141. lumibot/tools/__pycache__/debugers.cpython-312.pyc +0 -0
  142. lumibot/tools/__pycache__/decorators.cpython-312.pyc +0 -0
  143. lumibot/tools/__pycache__/helpers.cpython-312.pyc +0 -0
  144. lumibot/tools/__pycache__/indicators.cpython-312.pyc +0 -0
  145. lumibot/tools/__pycache__/lumibot_logger.cpython-312.pyc +0 -0
  146. lumibot/tools/__pycache__/pandas.cpython-312.pyc +0 -0
  147. lumibot/tools/__pycache__/polygon_helper.cpython-312.pyc +0 -0
  148. lumibot/tools/__pycache__/polygon_helper_async.cpython-312.pyc +0 -0
  149. lumibot/tools/__pycache__/polygon_helper_polars_optimized.cpython-312.pyc +0 -0
  150. lumibot/tools/__pycache__/projectx_helpers.cpython-312.pyc +0 -0
  151. lumibot/tools/__pycache__/schwab_helper.cpython-312.pyc +0 -0
  152. lumibot/tools/__pycache__/thetadata_helper.cpython-312.pyc +0 -0
  153. lumibot/tools/__pycache__/types.cpython-312.pyc +0 -0
  154. lumibot/tools/__pycache__/yahoo_helper.cpython-312.pyc +0 -0
  155. lumibot/tools/__pycache__/yahoo_helper_polars_optimized.cpython-312.pyc +0 -0
  156. lumibot/traders/__pycache__/__init__.cpython-312.pyc +0 -0
  157. lumibot/traders/__pycache__/trader.cpython-312.pyc +0 -0
  158. lumibot/trading_builtins/__pycache__/__init__.cpython-312.pyc +0 -0
  159. lumibot/trading_builtins/__pycache__/custom_stream.cpython-312.pyc +0 -0
  160. lumibot/trading_builtins/__pycache__/safe_list.cpython-312.pyc +0 -0
  161. {lumibot-4.1.3.dist-info → lumibot-4.2.0.dist-info}/WHEEL +0 -0
  162. {lumibot-4.1.3.dist-info → lumibot-4.2.0.dist-info}/licenses/LICENSE +0 -0
  163. {lumibot-4.1.3.dist-info → lumibot-4.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,668 @@
1
+ import datetime
2
+ from decimal import Decimal
3
+ from typing import Optional, Union
4
+
5
+ import pandas as pd
6
+ import polars as pl
7
+
8
+ from lumibot.constants import LUMIBOT_DEFAULT_PYTZ as DEFAULT_PYTZ
9
+ from lumibot.tools.helpers import parse_timestep_qty_and_unit, to_datetime_aware
10
+ from lumibot.tools.lumibot_logger import get_logger
11
+
12
+ from .asset import Asset
13
+ from .dataline import Dataline
14
+
15
+ logger = get_logger(__name__)
16
+
17
+ # Set the option to raise an error if downcasting is not possible (if available in this pandas version)
18
+ try:
19
+ pd.set_option('future.no_silent_downcasting', True)
20
+ except (pd._config.config.OptionError, AttributeError):
21
+ # Option not available in this pandas version, skip it
22
+ pass
23
+
24
+
25
+ class DataPolars:
26
+ """Input and manage Polars dataframes for backtesting.
27
+
28
+ This is a polars-optimized version of the Data class that stores data as polars
29
+ DataFrames internally and only converts to pandas when explicitly requested.
30
+
31
+ Parameters
32
+ ----------
33
+ asset : Asset Object
34
+ Asset to which this data is attached.
35
+ df : polars.DataFrame
36
+ Polars DataFrame containing OHLCV etc. trade data.
37
+ Must have a 'datetime' column with datetime type.
38
+ Other columns are strictly ["open", "high", "low", "close", "volume"]
39
+ quote : Asset Object
40
+ The quote asset for this data. If not provided, then the quote asset will default to USD.
41
+ date_start : Datetime or None
42
+ Starting date for this data, if not provided then first date in
43
+ the dataframe.
44
+ date_end : Datetime or None
45
+ Ending date for this data, if not provided then last date in
46
+ the dataframe.
47
+ trading_hours_start : datetime.time or None
48
+ If not supplied, then default is 0001 hrs.
49
+ trading_hours_end : datetime.time or None
50
+ If not supplied, then default is 2359 hrs.
51
+ timestep : str
52
+ Either "minute" (default) or "day"
53
+ localize_timezone : str or None
54
+ If not None, then localize the timezone of the dataframe to the
55
+ given timezone as a string. The values can be any supported by tz_localize,
56
+ e.g. "US/Eastern", "UTC", etc.
57
+
58
+ Attributes
59
+ ----------
60
+ asset : Asset Object
61
+ Asset object to which this data is attached.
62
+ symbol : str
63
+ The underlying or stock symbol as a string.
64
+ polars_df : polars.DataFrame
65
+ Polars DataFrame containing OHLCV etc trade data.
66
+ Has a 'datetime' column with datetime type.
67
+ Other columns are strictly ["open", "high", "low", "close", "volume"]
68
+ df : pandas.DataFrame (property)
69
+ Pandas DataFrame conversion for compatibility.
70
+ This is computed on-demand and cached.
71
+ date_start : Datetime or None
72
+ Starting date for this data.
73
+ date_end : Datetime or None
74
+ Ending date for this data.
75
+ trading_hours_start : datetime.time or None
76
+ Trading hours start time.
77
+ trading_hours_end : datetime.time or None
78
+ Trading hours end time.
79
+ timestep : str
80
+ Either "minute" (default) or "day"
81
+ """
82
+
83
+ MIN_TIMESTEP = "minute"
84
+ TIMESTEP_MAPPING = [
85
+ {"timestep": "day", "representations": ["1D", "day"]},
86
+ {"timestep": "minute", "representations": ["1M", "minute"]},
87
+ ]
88
+
89
+ def __init__(
90
+ self,
91
+ asset,
92
+ df,
93
+ date_start=None,
94
+ date_end=None,
95
+ trading_hours_start=datetime.time(0, 0),
96
+ trading_hours_end=datetime.time(23, 59),
97
+ timestep="minute",
98
+ quote=None,
99
+ timezone=None,
100
+ ):
101
+ self.asset = asset
102
+ self.symbol = self.asset.symbol
103
+
104
+ if self.asset.asset_type == "crypto" and quote is None:
105
+ raise ValueError(
106
+ f"A crypto asset {self.symbol} was added to data without a corresponding"
107
+ f"`quote` asset. Please add the quote asset. For example, if trying to add "
108
+ f"`BTCUSD` to data, you would need to add `USD` as the quote asset."
109
+ f"Quote must be provided for crypto assets."
110
+ )
111
+ else:
112
+ self.quote = quote
113
+
114
+ # Throw an error if the quote is not an asset object
115
+ if self.quote is not None and not isinstance(self.quote, Asset):
116
+ raise ValueError(
117
+ f"The quote asset for DataPolars must be an Asset object. You provided a {type(self.quote)} object."
118
+ )
119
+
120
+ if timestep not in ["minute", "day"]:
121
+ raise ValueError(
122
+ f"Timestep must be either 'minute' or 'day', the value you entered ({timestep}) is not currently supported."
123
+ )
124
+
125
+ self.timestep = timestep
126
+
127
+ # Store the polars DataFrame
128
+ self.polars_df = self._columns(df)
129
+
130
+ # Ensure datetime column exists and is properly typed
131
+ if "datetime" not in self.polars_df.columns:
132
+ raise ValueError("Polars DataFrame must have a 'datetime' column")
133
+
134
+ # Convert datetime column to proper type if needed
135
+ # CRITICAL: Preserve timezone if it already exists (e.g., UTC from DataBento)
136
+ dtype = self.polars_df.schema["datetime"]
137
+ if isinstance(dtype, pl.datatypes.Datetime) and dtype.time_zone:
138
+ # Column already has timezone, preserve it during cast
139
+ desired = pl.datatypes.Datetime(time_unit=dtype.time_unit, time_zone=dtype.time_zone)
140
+ self.polars_df = self.polars_df.with_columns(pl.col("datetime").cast(desired))
141
+ elif self.polars_df["datetime"].dtype != pl.Datetime:
142
+ # No timezone, cast to naive datetime
143
+ self.polars_df = self.polars_df.with_columns(
144
+ pl.col("datetime").cast(pl.Datetime(time_unit="ns"))
145
+ )
146
+
147
+ # Apply timezone if specified
148
+ if timezone is not None:
149
+ # For polars, we'll handle timezone in the pandas conversion
150
+ self._timezone = timezone
151
+ else:
152
+ self._timezone = None
153
+
154
+ # Set dates and times
155
+ self.polars_df = self.polars_df.sort("datetime")
156
+
157
+ self.trading_hours_start, self.trading_hours_end = self.set_times(trading_hours_start, trading_hours_end)
158
+ self.date_start, self.date_end = self.set_dates(date_start, date_end)
159
+
160
+ self.polars_df = self.trim_data(
161
+ self.polars_df,
162
+ self.date_start,
163
+ self.date_end,
164
+ self.trading_hours_start,
165
+ self.trading_hours_end,
166
+ )
167
+
168
+ # Set datetime start and end from polars DataFrame
169
+ self.datetime_start = self.polars_df["datetime"][0]
170
+ self.datetime_end = self.polars_df["datetime"][-1]
171
+
172
+ # Convert polars datetime to pandas datetime for compatibility
173
+ if hasattr(self.datetime_start, 'to_pydatetime'):
174
+ self.datetime_start = self.datetime_start.to_pydatetime()
175
+ if hasattr(self.datetime_end, 'to_pydatetime'):
176
+ self.datetime_end = self.datetime_end.to_pydatetime()
177
+
178
+ # Cached pandas DataFrame (lazy conversion)
179
+ self._pandas_df = None
180
+
181
+ @property
182
+ def df(self):
183
+ """Return pandas DataFrame for compatibility. Converts from polars on-demand."""
184
+ if self._pandas_df is None:
185
+ logger.debug(f"[CONVERSION] DataPolars.df | polars → pandas | {self.symbol}")
186
+
187
+ # Check if polars datetime has timezone
188
+ polars_tz = None
189
+ if "datetime" in self.polars_df.columns:
190
+ polars_tz = self.polars_df["datetime"].dtype.time_zone
191
+
192
+ # Convert polars to pandas and set datetime as index
193
+ self._pandas_df = self.polars_df.to_pandas()
194
+
195
+ if "datetime" in self._pandas_df.columns:
196
+ self._pandas_df.set_index("datetime", inplace=True)
197
+
198
+ # Apply timezone conversion: UTC → America/New_York
199
+ if self._timezone is not None:
200
+ # Explicit timezone parameter takes priority
201
+ if not self._pandas_df.index.tzinfo:
202
+ self._pandas_df.index = self._pandas_df.index.tz_localize(self._timezone)
203
+ else:
204
+ self._pandas_df.index = self._pandas_df.index.tz_convert(self._timezone)
205
+ elif polars_tz is not None:
206
+ # Polars had timezone (e.g., UTC from DataBento), convert to DEFAULT_PYTZ
207
+ if not self._pandas_df.index.tzinfo:
208
+ # Timezone lost during conversion, re-localize then convert
209
+ self._pandas_df.index = self._pandas_df.index.tz_localize(polars_tz)
210
+ self._pandas_df.index = self._pandas_df.index.tz_convert(DEFAULT_PYTZ)
211
+ elif str(self._pandas_df.index.tz) != str(DEFAULT_PYTZ):
212
+ # Timezone preserved, just convert
213
+ self._pandas_df.index = self._pandas_df.index.tz_convert(DEFAULT_PYTZ)
214
+ elif not self._pandas_df.index.tzinfo:
215
+ # No timezone info, localize to DEFAULT_PYTZ
216
+ self._pandas_df.index = self._pandas_df.index.tz_localize(DEFAULT_PYTZ)
217
+ elif str(self._pandas_df.index.tz) != str(DEFAULT_PYTZ):
218
+ # Different timezone, convert to DEFAULT_PYTZ
219
+ self._pandas_df.index = self._pandas_df.index.tz_convert(DEFAULT_PYTZ)
220
+
221
+ return self._pandas_df
222
+
223
+ def set_times(self, trading_hours_start, trading_hours_end):
224
+ """Set the start and end times for the data. The default is 0001 hrs to 2359 hrs."""
225
+ if self.timestep == "minute":
226
+ ts = trading_hours_start
227
+ te = trading_hours_end
228
+ else:
229
+ ts = datetime.time(0, 0)
230
+ te = datetime.time(23, 59, 59, 999999)
231
+ return ts, te
232
+
233
+ def _columns(self, df):
234
+ """Adjust column names to lower case."""
235
+ # Rename columns to lowercase if they match OHLCV
236
+ rename_map = {}
237
+ for col in df.columns:
238
+ if col.lower() in ["open", "high", "low", "close", "volume"]:
239
+ rename_map[col] = col.lower()
240
+
241
+ if rename_map:
242
+ df = df.rename(rename_map)
243
+
244
+ return df
245
+
246
+ def set_dates(self, date_start, date_end):
247
+ """Set the start and end dates of the data."""
248
+ for dt in [date_start, date_end]:
249
+ if dt and not isinstance(dt, datetime.datetime):
250
+ raise TypeError(f"Start and End dates must be entered as full datetimes. {dt} was entered")
251
+
252
+ if not date_start:
253
+ date_start = self.polars_df["datetime"].min()
254
+ if hasattr(date_start, 'to_pydatetime'):
255
+ date_start = date_start.to_pydatetime()
256
+ if not date_end:
257
+ date_end = self.polars_df["datetime"].max()
258
+ if hasattr(date_end, 'to_pydatetime'):
259
+ date_end = date_end.to_pydatetime()
260
+
261
+ date_start = to_datetime_aware(date_start)
262
+ date_end = to_datetime_aware(date_end)
263
+
264
+ date_start = date_start.replace(hour=0, minute=0, second=0, microsecond=0)
265
+ date_end = date_end.replace(hour=23, minute=59, second=59, microsecond=999999)
266
+
267
+ return date_start, date_end
268
+
269
+ def trim_data(self, df, date_start, date_end, trading_hours_start, trading_hours_end):
270
+ """Trim the polars dataframe to match the desired backtesting dates."""
271
+ # Align date comparisons to polars datetime column timezone (matching pandas approach)
272
+ datetime_tz = df["datetime"].dtype.time_zone if "datetime" in df.columns else None
273
+
274
+ # Convert comparison timestamps to match column timezone
275
+ if datetime_tz is not None:
276
+ # Column has timezone, align dates to it
277
+ date_start_aligned = pd.Timestamp(date_start).tz_convert(datetime_tz) if hasattr(pd.Timestamp(date_start), 'tz_convert') else pd.Timestamp(date_start).tz_localize(datetime_tz)
278
+ date_end_aligned = pd.Timestamp(date_end).tz_convert(datetime_tz) if hasattr(pd.Timestamp(date_end), 'tz_convert') else pd.Timestamp(date_end).tz_localize(datetime_tz)
279
+ else:
280
+ # Column is naive, make dates naive too
281
+ date_start_aligned = pd.Timestamp(date_start).tz_localize(None) if hasattr(pd.Timestamp(date_start), 'tz') and pd.Timestamp(date_start).tz else pd.Timestamp(date_start)
282
+ date_end_aligned = pd.Timestamp(date_end).tz_localize(None) if hasattr(pd.Timestamp(date_end), 'tz') and pd.Timestamp(date_end).tz else pd.Timestamp(date_end)
283
+
284
+ # Filter by date range
285
+ df = df.filter(
286
+ (pl.col("datetime") >= date_start_aligned) & (pl.col("datetime") <= date_end_aligned)
287
+ )
288
+
289
+ # Filter by trading hours if minute data
290
+ if self.timestep == "minute":
291
+ df = df.filter(
292
+ (pl.col("datetime").dt.time() >= trading_hours_start) &
293
+ (pl.col("datetime").dt.time() <= trading_hours_end)
294
+ )
295
+
296
+ if df.height == 0:
297
+ raise ValueError(
298
+ f"When attempting to load a dataframe for {self.asset}, "
299
+ f"an empty dataframe was returned. This is likely due "
300
+ f"to your backtesting start and end dates not being "
301
+ f"within the start and end dates of the data provided. "
302
+ f"\nPlease check that at least one of your start "
303
+ f"or end dates for backtesting is within the range of "
304
+ f"your start and end dates for your data. "
305
+ )
306
+ return df
307
+
308
+ def repair_times_and_fill(self, idx):
309
+ """Create datalines and fill missing values.
310
+
311
+ This converts to pandas for compatibility with the existing dataline system.
312
+ """
313
+ # Get pandas DataFrame
314
+ df = self.df
315
+
316
+ # OPTIMIZATION: Use searchsorted instead of expensive boolean indexing
317
+ start_pos = idx.searchsorted(self.datetime_start, side='left')
318
+ end_pos = idx.searchsorted(self.datetime_end, side='right')
319
+ idx = idx[start_pos:end_pos]
320
+
321
+ # OPTIMIZATION: More efficient duplicate removal
322
+ if df.index.has_duplicates:
323
+ df = df[~df.index.duplicated(keep='first')]
324
+
325
+ # Reindex the DataFrame with the new index and forward-fill missing values.
326
+ df = df.reindex(idx, method="ffill")
327
+
328
+ # Check if we have a volume column, if not then add it and fill with 0 or NaN.
329
+ if "volume" in df.columns:
330
+ df.loc[df["volume"].isna(), "volume"] = 0
331
+ else:
332
+ df["volume"] = None
333
+
334
+ # OPTIMIZATION: More efficient column selection and forward fill
335
+ ohlc_cols = ["open", "high", "low"]
336
+ non_ohlc_cols = [col for col in df.columns if col not in ohlc_cols]
337
+ if non_ohlc_cols:
338
+ df[non_ohlc_cols] = df[non_ohlc_cols].ffill()
339
+
340
+ # If any of close, open, high, low columns are missing, add them with NaN.
341
+ for col in ["close", "open", "high", "low"]:
342
+ if col not in df.columns:
343
+ df[col] = None
344
+
345
+ # OPTIMIZATION: Vectorized NaN filling for OHLC columns
346
+ if "close" in df.columns:
347
+ for col in ["open", "high", "low"]:
348
+ if col in df.columns:
349
+ try:
350
+ # More efficient: compute mask once, use where
351
+ mask = df[col].isna()
352
+ if mask.any():
353
+ df[col] = df[col].where(~mask, df["close"])
354
+ except Exception as e:
355
+ logger.error(f"Error filling {col} column: {e}")
356
+
357
+ # Update the cached pandas DataFrame
358
+ self._pandas_df = df
359
+
360
+ # Set up iter_index and iter_index_dict for later use.
361
+ iter_index = pd.Series(df.index)
362
+ self.iter_index = pd.Series(iter_index.index, index=iter_index)
363
+ self.iter_index_dict = self.iter_index.to_dict()
364
+
365
+ # Populate the datalines dictionary.
366
+ self.datalines = dict()
367
+ self.to_datalines()
368
+
369
+ def to_datalines(self):
370
+ """Create datalines from the pandas DataFrame."""
371
+ df = self.df
372
+
373
+ self.datalines.update(
374
+ {
375
+ "datetime": Dataline(
376
+ self.asset,
377
+ "datetime",
378
+ df.index.to_numpy(),
379
+ df.index.dtype,
380
+ )
381
+ }
382
+ )
383
+ self.datetime = self.datalines["datetime"].dataline
384
+
385
+ for column in df.columns:
386
+ self.datalines.update(
387
+ {
388
+ column: Dataline(
389
+ self.asset,
390
+ column,
391
+ df[column].to_numpy(),
392
+ df[column].dtype,
393
+ )
394
+ }
395
+ )
396
+ setattr(self, column, self.datalines[column].dataline)
397
+
398
+ def get_iter_count(self, dt):
399
+ """Return the index location for a given datetime."""
400
+ i = None
401
+
402
+ # Check if we have the iter_index_dict, if not then repair the times and fill
403
+ if getattr(self, "iter_index_dict", None) is None:
404
+ self.repair_times_and_fill(self.df.index)
405
+
406
+ # Search for dt in self.iter_index_dict
407
+ if dt in self.iter_index_dict:
408
+ i = self.iter_index_dict[dt]
409
+ else:
410
+ # If not found, get the last known data
411
+ i = self.iter_index.asof(dt)
412
+
413
+ return i
414
+
415
+ def check_data(func):
416
+ """Validates if the provided date, length, timeshift, and timestep will return data."""
417
+ def checker(self, *args, **kwargs):
418
+ if type(kwargs.get("length", 1)) not in [int, float]:
419
+ raise TypeError(f"Length must be an integer. {type(kwargs.get('length', 1))} was provided.")
420
+
421
+ dt = args[0]
422
+
423
+ # Check if the iter date is outside of this data's date range.
424
+ if dt < self.datetime_start:
425
+ raise ValueError(
426
+ f"The date you are looking for ({dt}) for ({self.asset}) is outside of the data's date range ({self.datetime_start} to {self.datetime_end}). This could be because the data for this asset does not exist for the date you are looking for, or something else."
427
+ )
428
+
429
+ # Search for dt in self.iter_index_dict
430
+ if getattr(self, "iter_index_dict", None) is None:
431
+ self.repair_times_and_fill(self.df.index)
432
+
433
+ if dt in self.iter_index_dict:
434
+ i = self.iter_index_dict[dt]
435
+ else:
436
+ # If not found, get the last known data
437
+ i = self.iter_index.asof(dt)
438
+
439
+ length = kwargs.get("length", 1)
440
+ timeshift = kwargs.get("timeshift", 0)
441
+ # Convert timeshift to integer if it's a timedelta
442
+ if isinstance(timeshift, datetime.timedelta):
443
+ timestep = kwargs.get("timestep", self.timestep)
444
+ if timestep == "day":
445
+ timeshift = timeshift.days
446
+ else: # minute
447
+ timeshift = int(timeshift.total_seconds() / 60)
448
+ data_index = i + 1 - length - timeshift
449
+ is_data = data_index >= 0
450
+ if not is_data:
451
+ logger.warning(
452
+ f"The date you are looking for ({dt}) is outside of the data's date range ({self.datetime_start} to {self.datetime_end}) after accounting for a length of {kwargs.get('length', 1)} and a timeshift of {kwargs.get('timeshift', 0)}. Keep in mind that the length you are requesting must also be available in your data, in this case we are {data_index} rows away from the data you need."
453
+ )
454
+
455
+ res = func(self, *args, **kwargs)
456
+ return res
457
+
458
+ return checker
459
+
460
+ @check_data
461
+ def get_last_price(self, dt, length=1, timeshift=0) -> Union[float, Decimal, None]:
462
+ """Returns the last known price of the data."""
463
+ iter_count = self.get_iter_count(dt)
464
+ open_price = self.datalines["open"].dataline[iter_count]
465
+ close_price = self.datalines["close"].dataline[iter_count]
466
+ price = close_price if dt > self.datalines["datetime"].dataline[iter_count] else open_price
467
+ return price
468
+
469
+ @check_data
470
+ def get_quote(self, dt, length=1, timeshift=0):
471
+ """Returns the last known quote data."""
472
+ required_price_cols = ["open", "high", "low", "close", "volume"]
473
+ missing_price_cols = [col for col in required_price_cols if col not in self.datalines]
474
+ if missing_price_cols:
475
+ logger.warning(
476
+ "DataPolars object %s is missing price columns %s required for quote retrieval.",
477
+ self.asset,
478
+ missing_price_cols,
479
+ )
480
+ return {}
481
+
482
+ quote_fields = {
483
+ "open": ("open", 2),
484
+ "high": ("high", 2),
485
+ "low": ("low", 2),
486
+ "close": ("close", 2),
487
+ "volume": ("volume", 0),
488
+ "bid": ("bid", 2),
489
+ "ask": ("ask", 2),
490
+ "bid_size": ("bid_size", 0),
491
+ "bid_condition": ("bid_condition", 0),
492
+ "bid_exchange": ("bid_exchange", 0),
493
+ "ask_size": ("ask_size", 0),
494
+ "ask_condition": ("ask_condition", 0),
495
+ "ask_exchange": ("ask_exchange", 0),
496
+ }
497
+
498
+ missing_quote_cols = [
499
+ col for col in ["bid", "ask", "bid_size", "ask_size", "bid_condition", "ask_condition",
500
+ "bid_exchange", "ask_exchange"]
501
+ if col not in self.datalines
502
+ ]
503
+ if missing_quote_cols:
504
+ logger.warning(
505
+ "DataPolars object %s is missing quote columns %s; returning None for those values.",
506
+ self.asset,
507
+ missing_quote_cols,
508
+ )
509
+
510
+ iter_count = self.get_iter_count(dt)
511
+
512
+ def _get_value(column: str, round_digits: Optional[int]):
513
+ if column not in self.datalines:
514
+ return None
515
+ value = self.datalines[column].dataline[iter_count]
516
+ try:
517
+ if round_digits is None:
518
+ return value
519
+ return round(value, round_digits)
520
+ except TypeError:
521
+ return value
522
+
523
+ quote_dict = {
524
+ name: _get_value(column, digits) for name, (column, digits) in quote_fields.items()
525
+ }
526
+
527
+ return quote_dict
528
+
529
+ @check_data
530
+ def _get_bars_dict(self, dt, length=1, timestep=None, timeshift=0):
531
+ """Returns a dictionary of the data."""
532
+ # Convert timeshift to integer if it's a timedelta
533
+ if isinstance(timeshift, datetime.timedelta):
534
+ logger.debug(f"[TIMESHIFT_CONVERT] asset={self.symbol} input_timeshift={timeshift} type={type(timeshift)} repr={repr(timeshift)}")
535
+ ts = timestep if timestep is not None else self.timestep
536
+ if ts == "day":
537
+ timeshift_converted = int(timeshift.total_seconds() / (24 * 3600))
538
+ logger.debug(f"[TIMESHIFT_CONVERT] asset={self.symbol} timestep=day total_seconds={timeshift.total_seconds()} converted={timeshift_converted}")
539
+ timeshift = timeshift_converted
540
+ else: # minute
541
+ timeshift_converted = int(timeshift.total_seconds() / 60)
542
+ logger.debug(f"[TIMESHIFT_CONVERT] asset={self.symbol} timestep=minute total_seconds={timeshift.total_seconds()} converted={timeshift_converted}")
543
+ timeshift = timeshift_converted
544
+
545
+ # Get bars.
546
+ end_row = self.get_iter_count(dt) - timeshift
547
+ start_row = end_row - length
548
+
549
+ if start_row < 0:
550
+ start_row = 0
551
+
552
+ # Cast both start_row and end_row to int
553
+ start_row = int(start_row)
554
+ end_row = int(end_row)
555
+
556
+ dict = {}
557
+ for dl_name, dl in self.datalines.items():
558
+ dict[dl_name] = dl.dataline[start_row:end_row]
559
+
560
+ return dict
561
+
562
+ def _get_bars_between_dates_dict(self, timestep=None, start_date=None, end_date=None):
563
+ """Returns a dictionary of all the data available between the start and end dates."""
564
+ end_row = self.get_iter_count(end_date)
565
+ start_row = self.get_iter_count(start_date)
566
+
567
+ if start_row < 0:
568
+ start_row = 0
569
+
570
+ # Cast both start_row and end_row to int
571
+ start_row = int(start_row)
572
+ end_row = int(end_row)
573
+
574
+ dict = {}
575
+ for dl_name, dl in self.datalines.items():
576
+ dict[dl_name] = dl.dataline[start_row:end_row]
577
+
578
+ return dict
579
+
580
+ def get_bars(self, dt, length=1, timestep=MIN_TIMESTEP, timeshift=0):
581
+ """Returns a dataframe of the data."""
582
+ # Parse the timestep
583
+ quantity, timestep = parse_timestep_qty_and_unit(timestep)
584
+ num_periods = length
585
+
586
+ if timestep == "minute" and self.timestep == "day":
587
+ raise ValueError("You are requesting minute data from a daily data source. This is not supported.")
588
+
589
+ if timestep != "minute" and timestep != "day":
590
+ raise ValueError(f"Only minute and day are supported for timestep. You provided: {timestep}")
591
+
592
+ agg_column_map = {
593
+ "open": "first",
594
+ "high": "max",
595
+ "low": "min",
596
+ "close": "last",
597
+ "volume": "sum",
598
+ }
599
+ if timestep == "day" and self.timestep == "minute":
600
+ length = length * 1440
601
+ unit = "D"
602
+ data = self._get_bars_dict(dt, length=length, timestep="minute", timeshift=timeshift)
603
+
604
+ elif timestep == 'day' and self.timestep == 'day':
605
+ unit = "D"
606
+ data = self._get_bars_dict(dt, length=length, timestep=timestep, timeshift=timeshift)
607
+
608
+ else:
609
+ unit = "min"
610
+ length = length * quantity
611
+ data = self._get_bars_dict(dt, length=length, timestep=timestep, timeshift=timeshift)
612
+
613
+ if data is None:
614
+ return None
615
+
616
+ df = pd.DataFrame(data).assign(datetime=lambda df: pd.to_datetime(df['datetime'])).set_index('datetime')
617
+ if "dividend" in df.columns:
618
+ agg_column_map["dividend"] = "sum"
619
+ df_result = df.resample(f"{quantity}{unit}").agg(agg_column_map)
620
+
621
+ # Drop any rows that have NaN values
622
+ df_result = df_result.dropna()
623
+
624
+ # Remove partial day data from the current day
625
+ if timestep == "day" and self.timestep == "minute":
626
+ df_result = df_result[df_result.index < dt.replace(hour=0, minute=0, second=0, microsecond=0)]
627
+
628
+ # Only return the last n rows
629
+ df_result = df_result.tail(n=int(num_periods))
630
+
631
+ return df_result
632
+
633
+ def get_bars_between_dates(self, timestep=MIN_TIMESTEP, exchange=None, start_date=None, end_date=None):
634
+ """Returns a dataframe of all the data available between the start and end dates."""
635
+ if timestep == "minute" and self.timestep == "day":
636
+ raise ValueError("You are requesting minute data from a daily data source. This is not supported.")
637
+
638
+ if timestep != "minute" and timestep != "day":
639
+ raise ValueError(f"Only minute and day are supported for timestep. You provided: {timestep}")
640
+
641
+ if timestep == "day" and self.timestep == "minute":
642
+ dict = self._get_bars_between_dates_dict(timestep=timestep, start_date=start_date, end_date=end_date)
643
+
644
+ if dict is None:
645
+ return None
646
+
647
+ df = pd.DataFrame(dict).set_index("datetime")
648
+
649
+ df_result = df.resample("D").agg(
650
+ {
651
+ "open": "first",
652
+ "high": "max",
653
+ "low": "min",
654
+ "close": "last",
655
+ "volume": "sum",
656
+ }
657
+ )
658
+
659
+ return df_result
660
+
661
+ else:
662
+ dict = self._get_bars_between_dates_dict(timestep=timestep, start_date=start_date, end_date=end_date)
663
+
664
+ if dict is None:
665
+ return None
666
+
667
+ df = pd.DataFrame(dict).set_index("datetime")
668
+ return df