lumibot 4.1.3__py3-none-any.whl → 4.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lumibot might be problematic. Click here for more details.

Files changed (163) hide show
  1. lumibot/backtesting/__init__.py +19 -5
  2. lumibot/backtesting/backtesting_broker.py +98 -18
  3. lumibot/backtesting/databento_backtesting.py +5 -686
  4. lumibot/backtesting/databento_backtesting_pandas.py +738 -0
  5. lumibot/backtesting/databento_backtesting_polars.py +860 -546
  6. lumibot/backtesting/fix_debug.py +37 -0
  7. lumibot/backtesting/thetadata_backtesting.py +9 -355
  8. lumibot/backtesting/thetadata_backtesting_pandas.py +1167 -0
  9. lumibot/brokers/alpaca.py +8 -1
  10. lumibot/brokers/schwab.py +12 -2
  11. lumibot/credentials.py +13 -0
  12. lumibot/data_sources/__init__.py +5 -8
  13. lumibot/data_sources/data_source.py +6 -2
  14. lumibot/data_sources/data_source_backtesting.py +30 -0
  15. lumibot/data_sources/databento_data.py +5 -390
  16. lumibot/data_sources/databento_data_pandas.py +440 -0
  17. lumibot/data_sources/databento_data_polars.py +15 -9
  18. lumibot/data_sources/pandas_data.py +30 -17
  19. lumibot/data_sources/polars_data.py +986 -0
  20. lumibot/data_sources/polars_mixin.py +472 -96
  21. lumibot/data_sources/polygon_data_polars.py +5 -0
  22. lumibot/data_sources/yahoo_data.py +9 -2
  23. lumibot/data_sources/yahoo_data_polars.py +5 -0
  24. lumibot/entities/__init__.py +15 -0
  25. lumibot/entities/asset.py +5 -28
  26. lumibot/entities/bars.py +89 -20
  27. lumibot/entities/data.py +29 -6
  28. lumibot/entities/data_polars.py +668 -0
  29. lumibot/entities/position.py +38 -4
  30. lumibot/strategies/_strategy.py +2 -1
  31. lumibot/strategies/strategy.py +61 -49
  32. lumibot/tools/backtest_cache.py +284 -0
  33. lumibot/tools/databento_helper.py +35 -35
  34. lumibot/tools/databento_helper_polars.py +738 -775
  35. lumibot/tools/futures_roll.py +251 -0
  36. lumibot/tools/indicators.py +135 -104
  37. lumibot/tools/polars_utils.py +142 -0
  38. lumibot/tools/thetadata_helper.py +1068 -134
  39. {lumibot-4.1.3.dist-info → lumibot-4.2.1.dist-info}/METADATA +9 -1
  40. {lumibot-4.1.3.dist-info → lumibot-4.2.1.dist-info}/RECORD +71 -147
  41. tests/backtest/test_databento.py +37 -6
  42. tests/backtest/test_databento_comprehensive_trading.py +8 -4
  43. tests/backtest/test_databento_parity.py +4 -2
  44. tests/backtest/test_debug_avg_fill_price.py +1 -1
  45. tests/backtest/test_example_strategies.py +11 -1
  46. tests/backtest/test_futures_edge_cases.py +3 -3
  47. tests/backtest/test_futures_single_trade.py +2 -2
  48. tests/backtest/test_futures_ultra_simple.py +2 -2
  49. tests/backtest/test_polars_lru_eviction.py +470 -0
  50. tests/backtest/test_yahoo.py +42 -0
  51. tests/test_asset.py +4 -4
  52. tests/test_backtest_cache_manager.py +149 -0
  53. tests/test_backtesting_data_source_env.py +6 -0
  54. tests/test_continuous_futures_resolution.py +60 -48
  55. tests/test_data_polars_parity.py +160 -0
  56. tests/test_databento_asset_validation.py +23 -5
  57. tests/test_databento_backtesting.py +1 -1
  58. tests/test_databento_backtesting_polars.py +312 -192
  59. tests/test_databento_data.py +220 -463
  60. tests/test_databento_live.py +10 -10
  61. tests/test_futures_roll.py +38 -0
  62. tests/test_indicator_subplots.py +101 -0
  63. tests/test_market_infinite_loop_bug.py +77 -3
  64. tests/test_polars_resample.py +67 -0
  65. tests/test_polygon_helper.py +46 -0
  66. tests/test_thetadata_backwards_compat.py +97 -0
  67. tests/test_thetadata_helper.py +222 -23
  68. tests/test_thetadata_pandas_verification.py +186 -0
  69. lumibot/__pycache__/__init__.cpython-312.pyc +0 -0
  70. lumibot/__pycache__/constants.cpython-312.pyc +0 -0
  71. lumibot/__pycache__/credentials.cpython-312.pyc +0 -0
  72. lumibot/backtesting/__pycache__/__init__.cpython-312.pyc +0 -0
  73. lumibot/backtesting/__pycache__/alpaca_backtesting.cpython-312.pyc +0 -0
  74. lumibot/backtesting/__pycache__/alpha_vantage_backtesting.cpython-312.pyc +0 -0
  75. lumibot/backtesting/__pycache__/backtesting_broker.cpython-312.pyc +0 -0
  76. lumibot/backtesting/__pycache__/ccxt_backtesting.cpython-312.pyc +0 -0
  77. lumibot/backtesting/__pycache__/databento_backtesting.cpython-312.pyc +0 -0
  78. lumibot/backtesting/__pycache__/interactive_brokers_rest_backtesting.cpython-312.pyc +0 -0
  79. lumibot/backtesting/__pycache__/pandas_backtesting.cpython-312.pyc +0 -0
  80. lumibot/backtesting/__pycache__/polygon_backtesting.cpython-312.pyc +0 -0
  81. lumibot/backtesting/__pycache__/thetadata_backtesting.cpython-312.pyc +0 -0
  82. lumibot/backtesting/__pycache__/yahoo_backtesting.cpython-312.pyc +0 -0
  83. lumibot/brokers/__pycache__/__init__.cpython-312.pyc +0 -0
  84. lumibot/brokers/__pycache__/alpaca.cpython-312.pyc +0 -0
  85. lumibot/brokers/__pycache__/bitunix.cpython-312.pyc +0 -0
  86. lumibot/brokers/__pycache__/broker.cpython-312.pyc +0 -0
  87. lumibot/brokers/__pycache__/ccxt.cpython-312.pyc +0 -0
  88. lumibot/brokers/__pycache__/example_broker.cpython-312.pyc +0 -0
  89. lumibot/brokers/__pycache__/interactive_brokers.cpython-312.pyc +0 -0
  90. lumibot/brokers/__pycache__/interactive_brokers_rest.cpython-312.pyc +0 -0
  91. lumibot/brokers/__pycache__/projectx.cpython-312.pyc +0 -0
  92. lumibot/brokers/__pycache__/schwab.cpython-312.pyc +0 -0
  93. lumibot/brokers/__pycache__/tradier.cpython-312.pyc +0 -0
  94. lumibot/brokers/__pycache__/tradovate.cpython-312.pyc +0 -0
  95. lumibot/data_sources/__pycache__/__init__.cpython-312.pyc +0 -0
  96. lumibot/data_sources/__pycache__/alpaca_data.cpython-312.pyc +0 -0
  97. lumibot/data_sources/__pycache__/alpha_vantage_data.cpython-312.pyc +0 -0
  98. lumibot/data_sources/__pycache__/bitunix_data.cpython-312.pyc +0 -0
  99. lumibot/data_sources/__pycache__/ccxt_backtesting_data.cpython-312.pyc +0 -0
  100. lumibot/data_sources/__pycache__/ccxt_data.cpython-312.pyc +0 -0
  101. lumibot/data_sources/__pycache__/data_source.cpython-312.pyc +0 -0
  102. lumibot/data_sources/__pycache__/data_source_backtesting.cpython-312.pyc +0 -0
  103. lumibot/data_sources/__pycache__/databento_data_polars_backtesting.cpython-312.pyc +0 -0
  104. lumibot/data_sources/__pycache__/databento_data_polars_live.cpython-312.pyc +0 -0
  105. lumibot/data_sources/__pycache__/example_broker_data.cpython-312.pyc +0 -0
  106. lumibot/data_sources/__pycache__/exceptions.cpython-312.pyc +0 -0
  107. lumibot/data_sources/__pycache__/interactive_brokers_data.cpython-312.pyc +0 -0
  108. lumibot/data_sources/__pycache__/interactive_brokers_rest_data.cpython-312.pyc +0 -0
  109. lumibot/data_sources/__pycache__/pandas_data.cpython-312.pyc +0 -0
  110. lumibot/data_sources/__pycache__/polars_mixin.cpython-312.pyc +0 -0
  111. lumibot/data_sources/__pycache__/polygon_data_polars.cpython-312.pyc +0 -0
  112. lumibot/data_sources/__pycache__/projectx_data.cpython-312.pyc +0 -0
  113. lumibot/data_sources/__pycache__/schwab_data.cpython-312.pyc +0 -0
  114. lumibot/data_sources/__pycache__/tradier_data.cpython-312.pyc +0 -0
  115. lumibot/data_sources/__pycache__/tradovate_data.cpython-312.pyc +0 -0
  116. lumibot/data_sources/__pycache__/yahoo_data_polars.cpython-312.pyc +0 -0
  117. lumibot/entities/__pycache__/__init__.cpython-312.pyc +0 -0
  118. lumibot/entities/__pycache__/asset.cpython-312.pyc +0 -0
  119. lumibot/entities/__pycache__/bar.cpython-312.pyc +0 -0
  120. lumibot/entities/__pycache__/bars.cpython-312.pyc +0 -0
  121. lumibot/entities/__pycache__/chains.cpython-312.pyc +0 -0
  122. lumibot/entities/__pycache__/data.cpython-312.pyc +0 -0
  123. lumibot/entities/__pycache__/dataline.cpython-312.pyc +0 -0
  124. lumibot/entities/__pycache__/order.cpython-312.pyc +0 -0
  125. lumibot/entities/__pycache__/position.cpython-312.pyc +0 -0
  126. lumibot/entities/__pycache__/quote.cpython-312.pyc +0 -0
  127. lumibot/entities/__pycache__/trading_fee.cpython-312.pyc +0 -0
  128. lumibot/example_strategies/__pycache__/__init__.cpython-312.pyc +0 -0
  129. lumibot/example_strategies/__pycache__/test_broker_functions.cpython-312-pytest-8.4.1.pyc +0 -0
  130. lumibot/strategies/__pycache__/__init__.cpython-312.pyc +0 -0
  131. lumibot/strategies/__pycache__/_strategy.cpython-312.pyc +0 -0
  132. lumibot/strategies/__pycache__/strategy.cpython-312.pyc +0 -0
  133. lumibot/strategies/__pycache__/strategy_executor.cpython-312.pyc +0 -0
  134. lumibot/tools/__pycache__/__init__.cpython-312.pyc +0 -0
  135. lumibot/tools/__pycache__/alpaca_helpers.cpython-312.pyc +0 -0
  136. lumibot/tools/__pycache__/bitunix_helpers.cpython-312.pyc +0 -0
  137. lumibot/tools/__pycache__/black_scholes.cpython-312.pyc +0 -0
  138. lumibot/tools/__pycache__/ccxt_data_store.cpython-312.pyc +0 -0
  139. lumibot/tools/__pycache__/databento_helper.cpython-312.pyc +0 -0
  140. lumibot/tools/__pycache__/databento_helper_polars.cpython-312.pyc +0 -0
  141. lumibot/tools/__pycache__/debugers.cpython-312.pyc +0 -0
  142. lumibot/tools/__pycache__/decorators.cpython-312.pyc +0 -0
  143. lumibot/tools/__pycache__/helpers.cpython-312.pyc +0 -0
  144. lumibot/tools/__pycache__/indicators.cpython-312.pyc +0 -0
  145. lumibot/tools/__pycache__/lumibot_logger.cpython-312.pyc +0 -0
  146. lumibot/tools/__pycache__/pandas.cpython-312.pyc +0 -0
  147. lumibot/tools/__pycache__/polygon_helper.cpython-312.pyc +0 -0
  148. lumibot/tools/__pycache__/polygon_helper_async.cpython-312.pyc +0 -0
  149. lumibot/tools/__pycache__/polygon_helper_polars_optimized.cpython-312.pyc +0 -0
  150. lumibot/tools/__pycache__/projectx_helpers.cpython-312.pyc +0 -0
  151. lumibot/tools/__pycache__/schwab_helper.cpython-312.pyc +0 -0
  152. lumibot/tools/__pycache__/thetadata_helper.cpython-312.pyc +0 -0
  153. lumibot/tools/__pycache__/types.cpython-312.pyc +0 -0
  154. lumibot/tools/__pycache__/yahoo_helper.cpython-312.pyc +0 -0
  155. lumibot/tools/__pycache__/yahoo_helper_polars_optimized.cpython-312.pyc +0 -0
  156. lumibot/traders/__pycache__/__init__.cpython-312.pyc +0 -0
  157. lumibot/traders/__pycache__/trader.cpython-312.pyc +0 -0
  158. lumibot/trading_builtins/__pycache__/__init__.cpython-312.pyc +0 -0
  159. lumibot/trading_builtins/__pycache__/custom_stream.cpython-312.pyc +0 -0
  160. lumibot/trading_builtins/__pycache__/safe_list.cpython-312.pyc +0 -0
  161. {lumibot-4.1.3.dist-info → lumibot-4.2.1.dist-info}/WHEEL +0 -0
  162. {lumibot-4.1.3.dist-info → lumibot-4.2.1.dist-info}/licenses/LICENSE +0 -0
  163. {lumibot-4.1.3.dist-info → lumibot-4.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,986 @@
1
+ from collections import OrderedDict, defaultdict
2
+ from datetime import timedelta
3
+ from decimal import Decimal
4
+ from typing import Union
5
+
6
+ import pandas as pd
7
+
8
+ from lumibot.data_sources import DataSourceBacktesting
9
+ from lumibot.entities import Asset, Bars, Quote
10
+ from lumibot.tools.lumibot_logger import get_logger
11
+
12
+ logger = get_logger(__name__)
13
+
14
+
15
+ class PolarsData(DataSourceBacktesting):
16
+ """
17
+ PolarsData is a Backtesting-only DataSource that will be optimized to use Polars DataFrames.
18
+ Currently identical to PandasData as a baseline. Will be incrementally converted to use Polars.
19
+ """
20
+
21
+ SOURCE = "POLARS"
22
+ TIMESTEP_MAPPING = [
23
+ {"timestep": "day", "representations": ["1D", "day"]},
24
+ {"timestep": "minute", "representations": ["1M", "minute"]},
25
+ ]
26
+
27
+ def __init__(self, *args, pandas_data=None, auto_adjust=True, allow_option_quote_fallback: bool = False, **kwargs):
28
+ super().__init__(*args, **kwargs)
29
+ self.option_quote_fallback_allowed = allow_option_quote_fallback
30
+ self.name = "polars"
31
+ self.pandas_data = self._set_pandas_data_keys(pandas_data)
32
+ self.auto_adjust = auto_adjust
33
+ self._data_store = self.pandas_data
34
+ self._date_index = None
35
+ self._date_supply = None
36
+ self._timestep = "minute"
37
+
38
+ # Sliding window configuration (always-on, optimized for speed)
39
+ self._HISTORY_WINDOW_BARS = 5000 # Fixed window size
40
+ self._FUTURE_WINDOW_BARS = 1000 # Look-ahead buffer for efficiency
41
+ self._TRIM_FREQUENCY_BARS = 1000 # Trim every 1000 iterations
42
+ self._trim_iteration_count = 0 # Counter for periodic trimming
43
+
44
+ # Aggregated bars cache (separate from pandas_data)
45
+ # Uses existing OrderedDict infrastructure for LRU tracking
46
+ self._aggregated_cache = OrderedDict()
47
+
48
+ # Memory limits (1 GB hard cap)
49
+ self.MAX_STORAGE_BYTES = 1_000_000_000
50
+
51
+ def _trim_cached_data(self):
52
+ """Periodically trim cached data to maintain sliding window.
53
+
54
+ Called every _TRIM_FREQUENCY_BARS iterations to remove old bars
55
+ that are outside the sliding window. This keeps memory usage low
56
+ while maintaining enough history for lookback calculations.
57
+
58
+ This is always-on and requires no user configuration.
59
+ """
60
+ # Increment iteration counter
61
+ self._trim_iteration_count += 1
62
+
63
+ # Only trim every TRIM_FREQUENCY_BARS iterations
64
+ if self._trim_iteration_count < self._TRIM_FREQUENCY_BARS:
65
+ return
66
+
67
+ # Reset counter
68
+ self._trim_iteration_count = 0
69
+
70
+ # Get current datetime for window calculation
71
+ current_dt = self.get_datetime()
72
+
73
+ # Trim each DataPolars object in the data store
74
+ # CRITICAL: Use each data object's own timestep, not global self._timestep
75
+ # A backtest can have mixed timeframes (1m, 5m, 1h, 1d for same asset)
76
+ trimmed_count = 0
77
+ for asset_key, data in self._data_store.items():
78
+ # Only trim if data is a DataPolars object (has trim_before method)
79
+ if not hasattr(data, 'trim_before'):
80
+ continue
81
+
82
+ try:
83
+ # Get this data object's timestep (not the global self._timestep!)
84
+ data_timestep = getattr(data, 'timestep', 'minute')
85
+
86
+ # Use convert_timestep_str_to_timedelta for robust conversion
87
+ base_delta, _ = self.convert_timestep_str_to_timedelta(data_timestep)
88
+
89
+ # Calculate cutoff for this specific data object
90
+ # Keep HISTORY_WINDOW_BARS bars of this timestep before current time
91
+ window_delta = base_delta * self._HISTORY_WINDOW_BARS
92
+ cutoff_dt = current_dt - window_delta
93
+
94
+ # Trim with the correct per-asset cutoff
95
+ data.trim_before(cutoff_dt)
96
+
97
+ trimmed_count += 1
98
+
99
+ except Exception as e:
100
+ logger.warning(f"Failed to trim data for {asset_key}: {e}")
101
+
102
+ if trimmed_count > 0:
103
+ logger.debug(f"[SLIDING WINDOW] Trimmed {trimmed_count} assets at iteration {self._TRIM_FREQUENCY_BARS}")
104
+
105
+ def _get_aggregation_cache_key(self, asset, quote, timestep):
106
+ """Generate a unique cache key for aggregated bars.
107
+
108
+ Parameters
109
+ ----------
110
+ asset : Asset
111
+ The asset
112
+ quote : Asset
113
+ The quote asset
114
+ timestep : str
115
+ The timestep (e.g., "5 minutes", "15 minutes", "hour", "day")
116
+
117
+ Returns
118
+ -------
119
+ tuple
120
+ Cache key (asset, quote, timestep)
121
+ """
122
+ if isinstance(asset, tuple):
123
+ asset, quote = asset
124
+ return (asset, quote, timestep)
125
+
126
+ def _aggregate_polars_bars(self, source_data, target_timestep):
127
+ """Aggregate minute-level polars data to higher timeframes.
128
+
129
+ This is a critical performance optimization - aggregating once and caching
130
+ is much faster than re-aggregating every iteration.
131
+
132
+ Parameters
133
+ ----------
134
+ source_data : DataPolars
135
+ Source data (typically 1-minute bars)
136
+ target_timestep : str
137
+ Target timestep ("5 minutes", "15 minutes", "hour", "day")
138
+
139
+ Returns
140
+ -------
141
+ polars.DataFrame or None
142
+ Aggregated data, or None if aggregation not possible
143
+ """
144
+ try:
145
+ import polars as pl
146
+
147
+ # Get the polars DataFrame from DataPolars
148
+ if not hasattr(source_data, 'polars_df'):
149
+ return None
150
+
151
+ df = source_data.polars_df
152
+ if df.height == 0:
153
+ return None
154
+
155
+ # Map timestep to polars interval
156
+ interval_mapping = {
157
+ "5 minutes": "5m",
158
+ "15 minutes": "15m",
159
+ "30 minutes": "30m",
160
+ "hour": "1h",
161
+ "2 hours": "2h",
162
+ "4 hours": "4h",
163
+ "day": "1d",
164
+ }
165
+
166
+ interval = interval_mapping.get(target_timestep)
167
+ if not interval:
168
+ logger.warning(f"Unsupported aggregation timestep: {target_timestep}")
169
+ return None
170
+
171
+ # Aggregate using polars group_by_dynamic (fast!)
172
+ # This is the core optimization - polars aggregation is 10-100x faster than pandas
173
+ aggregated = df.group_by_dynamic(
174
+ "datetime",
175
+ every=interval,
176
+ closed="left",
177
+ label="left"
178
+ ).agg([
179
+ pl.col("open").first(),
180
+ pl.col("high").max(),
181
+ pl.col("low").min(),
182
+ pl.col("close").last(),
183
+ pl.col("volume").sum(),
184
+ ])
185
+
186
+ logger.debug(f"[AGGREGATION] {source_data.asset.symbol}: {df.height} rows ({source_data.timestep}) → {aggregated.height} rows ({target_timestep})")
187
+ return aggregated
188
+
189
+ except Exception as e:
190
+ logger.error(f"Error aggregating data: {e}")
191
+ return None
192
+
193
+ def _get_or_aggregate_bars(self, asset, quote, length, source_timestep, target_timestep):
194
+ """Get aggregated bars from cache or create them.
195
+
196
+ This method implements the aggregated bars cache to avoid re-aggregating
197
+ 5m/15m/1h bars from 1-minute data on every iteration.
198
+
199
+ Parameters
200
+ ----------
201
+ asset : Asset
202
+ The asset
203
+ quote : Asset
204
+ The quote asset
205
+ length : int
206
+ Number of bars requested
207
+ source_timestep : str
208
+ Source timestep (typically "minute")
209
+ target_timestep : str
210
+ Target timestep (e.g., "5 minutes", "15 minutes", "hour")
211
+
212
+ Returns
213
+ -------
214
+ polars.DataFrame or None
215
+ Aggregated bars, or None if not available
216
+ """
217
+ # Generate cache key
218
+ cache_key = self._get_aggregation_cache_key(asset, quote, target_timestep)
219
+
220
+ # Check if we already have aggregated data cached
221
+ if cache_key in self._aggregated_cache:
222
+ # Move to end (LRU tracking)
223
+ self._aggregated_cache.move_to_end(cache_key)
224
+ logger.debug(f"[AGG CACHE HIT] {asset.symbol} {target_timestep}")
225
+ return self._aggregated_cache[cache_key]
226
+
227
+ # Need to aggregate from source data
228
+ asset_key = self.find_asset_in_data_store(asset, quote)
229
+ if not asset_key or asset_key not in self._data_store:
230
+ return None
231
+
232
+ source_data = self._data_store[asset_key]
233
+
234
+ # Only aggregate from DataPolars objects (has polars_df)
235
+ if not hasattr(source_data, 'polars_df'):
236
+ logger.warning(f"Cannot aggregate - source data is not DataPolars: {type(source_data)}")
237
+ return None
238
+
239
+ # Perform aggregation
240
+ aggregated_df = self._aggregate_polars_bars(source_data, target_timestep)
241
+ if aggregated_df is None:
242
+ return None
243
+
244
+ # Cache the result (LRU cache)
245
+ self._aggregated_cache[cache_key] = aggregated_df
246
+ logger.debug(f"[AGG CACHE MISS] {asset.symbol} {target_timestep} - cached {aggregated_df.height} rows")
247
+
248
+ # Note: Memory limits are enforced periodically in get_historical_prices()
249
+ # Don't enforce here to avoid immediate eviction after caching
250
+
251
+ return aggregated_df
252
+
253
+ def _enforce_memory_limits(self):
254
+ """Enforce memory limits using LRU eviction.
255
+
256
+ This method ensures total memory usage stays under MAX_STORAGE_BYTES (1GB)
257
+ by evicting least-recently-used items from both _data_store and _aggregated_cache.
258
+
259
+ Uses the proven LRU pattern from polygon_backtesting_pandas.py.
260
+
261
+ PERFORMANCE: Only checks every _TRIM_FREQUENCY_BARS iterations (same as trim).
262
+ Checking memory on every get_historical_prices() call is expensive!
263
+ """
264
+ # Use the same periodic counter as _trim_cached_data
265
+ # Only check memory limits when we actually trim (every 1000 iterations)
266
+ # This avoids iterating all data on every get_historical_prices call
267
+ if self._trim_iteration_count != 0:
268
+ return # Not time to check yet
269
+
270
+ try:
271
+ # Calculate total memory usage
272
+ storage_used = 0
273
+
274
+ # Memory from _data_store (DataPolars objects)
275
+ for data in self._data_store.values():
276
+ if hasattr(data, 'polars_df'):
277
+ # Estimate polars DataFrame memory
278
+ df = data.polars_df
279
+ if df.height > 0:
280
+ # Polars estimated_size() returns bytes
281
+ storage_used += df.estimated_size()
282
+
283
+ # Memory from _aggregated_cache (polars DataFrames)
284
+ for agg_df in self._aggregated_cache.values():
285
+ if agg_df is not None and hasattr(agg_df, 'estimated_size'):
286
+ storage_used += agg_df.estimated_size()
287
+
288
+ if storage_used <= self.MAX_STORAGE_BYTES:
289
+ return # Under limit, nothing to do
290
+
291
+ logger.debug(f"[MEMORY] Storage used: {storage_used:,} bytes ({len(self._data_store)} data + {len(self._aggregated_cache)} aggregated)")
292
+ logger.warning(f"[MEMORY] Exceeds limit of {self.MAX_STORAGE_BYTES:,} bytes, evicting LRU items...")
293
+
294
+ # Evict from aggregated cache first (less critical than source data)
295
+ while storage_used > self.MAX_STORAGE_BYTES and len(self._aggregated_cache) > 0:
296
+ # popitem(last=False) removes oldest (LRU)
297
+ k, agg_df = self._aggregated_cache.popitem(last=False)
298
+ if agg_df is not None and hasattr(agg_df, 'estimated_size'):
299
+ freed = agg_df.estimated_size()
300
+ storage_used -= freed
301
+ logger.debug(f"[MEMORY] Evicted aggregated cache for {k}: freed {freed:,} bytes")
302
+ else:
303
+ # Item has no size - assume 0 bytes freed but continue evicting
304
+ logger.warning(f"[MEMORY] Evicted aggregated cache for {k}: no estimated_size(), assuming 0 bytes")
305
+
306
+ # If still over limit, evict from data_store (more aggressive)
307
+ evicted_data_items = 0
308
+ while storage_used > self.MAX_STORAGE_BYTES and len(self._data_store) > 0:
309
+ # popitem(last=False) removes oldest (LRU)
310
+ k, data = self._data_store.popitem(last=False)
311
+ if hasattr(data, 'polars_df'):
312
+ df = data.polars_df
313
+ if df.height > 0:
314
+ freed = df.estimated_size()
315
+ storage_used -= freed
316
+ evicted_data_items += 1
317
+ logger.warning(f"[MEMORY] Evicted data_store for {k}: freed {freed:,} bytes")
318
+ else:
319
+ # DataFrame is empty - assume 0 bytes
320
+ evicted_data_items += 1
321
+ logger.warning(f"[MEMORY] Evicted data_store for {k}: empty DataFrame, 0 bytes freed")
322
+ else:
323
+ # Not a DataPolars object - assume 0 bytes
324
+ logger.warning(f"[MEMORY] Evicted data_store for {k}: no polars_df, assuming 0 bytes")
325
+
326
+ if evicted_data_items > 0:
327
+ logger.warning(f"[MEMORY] Evicted {evicted_data_items} data items to stay under {self.MAX_STORAGE_BYTES:,} bytes")
328
+
329
+ logger.debug(f"[MEMORY] After eviction: {storage_used:,} bytes ({len(self._data_store)} data + {len(self._aggregated_cache)} aggregated)")
330
+
331
+ except Exception as e:
332
+ logger.error(f"Error enforcing memory limits: {e}")
333
+
334
+ @staticmethod
335
+ def _set_pandas_data_keys(pandas_data):
336
+ # OrderedDict tracks the LRU dataframes for when it comes time to do evictions.
337
+ new_pandas_data = OrderedDict()
338
+
339
+ def _get_new_pandas_data_key(data):
340
+ # Always save the asset as a tuple of Asset and quote
341
+ if isinstance(data.asset, tuple):
342
+ return data.asset
343
+ elif isinstance(data.asset, Asset):
344
+ # If quote is not specified, use USD as the quote
345
+ if data.quote is None:
346
+ # Warn that USD is being used as the quote
347
+ logger.warning(f"No quote specified for {data.asset}. Using USD as the quote.")
348
+ return data.asset, Asset(symbol="USD", asset_type="forex")
349
+ return data.asset, data.quote
350
+ else:
351
+ raise ValueError("Asset must be an Asset or a tuple of Asset and quote")
352
+
353
+ # Check if pandas_data is a dictionary
354
+ if isinstance(pandas_data, dict):
355
+ for k, data in pandas_data.items():
356
+ key = _get_new_pandas_data_key(data)
357
+ new_pandas_data[key] = data
358
+
359
+ # Check if pandas_data is a list
360
+ elif isinstance(pandas_data, list):
361
+ for data in pandas_data:
362
+ key = _get_new_pandas_data_key(data)
363
+ new_pandas_data[key] = data
364
+
365
+ return new_pandas_data
366
+
367
+ def load_data(self):
368
+ self._data_store = self.pandas_data
369
+ self._date_index = self.update_date_index()
370
+
371
+ if len(self._data_store.values()) > 0:
372
+ self._timestep = list(self._data_store.values())[0].timestep
373
+
374
+ pcal = self.get_trading_days_pandas()
375
+ self._date_index = self.clean_trading_times(self._date_index, pcal)
376
+ for _, data in self._data_store.items():
377
+ data.repair_times_and_fill(self._date_index)
378
+ return pcal
379
+
380
+ def clean_trading_times(self, dt_index, pcal):
381
+ """Fill gaps within trading days using the supplied market calendar.
382
+
383
+ Parameters
384
+ ----------
385
+ dt_index : pandas.DatetimeIndex
386
+ Original datetime index.
387
+ pcal : pandas.DataFrame
388
+ Calendar with ``market_open`` and ``market_close`` columns indexed by date.
389
+
390
+ Returns
391
+ -------
392
+ pandas.DatetimeIndex
393
+ Cleaned index with one-minute frequency during market hours.
394
+ """
395
+ # Ensure the datetime index is in datetime format and drop duplicate timestamps
396
+ dt_index = pd.to_datetime(dt_index).drop_duplicates()
397
+
398
+ # Create a DataFrame with dt_index as the index and sort it
399
+ df = pd.DataFrame(range(len(dt_index)), index=dt_index)
400
+ df = df.sort_index()
401
+
402
+ # Create a column for the date portion only (normalize to date, keeping as datetime64 type)
403
+ df["dates"] = df.index.normalize()
404
+
405
+ # Merge with the trading calendar on the 'dates' column to get market open/close times.
406
+ # Use a left join to keep all rows from the original index.
407
+ df = df.merge(
408
+ pcal[["market_open", "market_close"]],
409
+ left_on="dates",
410
+ right_index=True,
411
+ how="left"
412
+ )
413
+
414
+ if self._timestep == "minute":
415
+ # Resample to a 1-minute frequency, using pad to fill missing times.
416
+ # At this point, the index is unique so asfreq will work correctly.
417
+ df = df.asfreq("1min", method="pad")
418
+
419
+ # Filter to include only the rows that fall within market open and close times.
420
+ result_index = df.loc[
421
+ (df.index >= df["market_open"]) & (df.index <= df["market_close"])
422
+ ].index
423
+ else:
424
+ result_index = df.index
425
+
426
+ return result_index
427
+
428
+ def get_trading_days_pandas(self):
429
+ pcal = pd.DataFrame(self._date_index)
430
+
431
+ if pcal.empty:
432
+ # Create a dummy dataframe that spans the entire date range with market_open and market_close
433
+ # set to 00:00:00 and 23:59:59 respectively.
434
+ result = pd.DataFrame(
435
+ index=pd.date_range(start=self.datetime_start, end=self.datetime_end, freq="D"),
436
+ columns=["market_open", "market_close"],
437
+ )
438
+ result["market_open"] = result.index.floor("D")
439
+ result["market_close"] = result.index.ceil("D") - pd.Timedelta("1s")
440
+ return result
441
+
442
+ else:
443
+ pcal.columns = ["datetime"]
444
+ # Normalize to date but keep as datetime64 type (not date objects)
445
+ pcal["date"] = pcal["datetime"].dt.normalize()
446
+ result = pcal.groupby("date").agg(
447
+ market_open=(
448
+ "datetime",
449
+ "first",
450
+ ),
451
+ market_close=(
452
+ "datetime",
453
+ "last",
454
+ ),
455
+ )
456
+ return result
457
+
458
+ def get_assets(self):
459
+ return list(self._data_store.keys())
460
+
461
+ def get_asset_by_name(self, name):
462
+ return [asset for asset in self.get_assets() if asset.name == name]
463
+
464
+ def get_asset_by_symbol(self, symbol, asset_type=None):
465
+ """Finds the assets that match the symbol. If type is specified
466
+ finds the assets matching symbol and type.
467
+
468
+ Parameters
469
+ ----------
470
+ symbol : str
471
+ The symbol of the asset.
472
+ asset_type : str
473
+ Asset type. One of:
474
+ - stock
475
+ - future
476
+ - option
477
+ - forex
478
+
479
+ Returns
480
+ -------
481
+ list of Asset
482
+ """
483
+ store_assets = self.get_assets()
484
+ if asset_type is None:
485
+ return [asset for asset in store_assets if asset.symbol == symbol]
486
+ else:
487
+ return [asset for asset in store_assets if (asset.symbol == symbol and asset.asset_type == asset_type)]
488
+
489
+ def update_date_index(self):
490
+ dt_index = None
491
+ for asset, data in self._data_store.items():
492
+ if dt_index is None:
493
+ df = data.df
494
+ dt_index = df.index
495
+ else:
496
+ dt_index = dt_index.join(data.df.index, how="outer")
497
+
498
+ if dt_index is None:
499
+ # Build a dummy index
500
+ freq = "1min" if self._timestep == "minute" else "1D"
501
+ dt_index = pd.date_range(start=self.datetime_start, end=self.datetime_end, freq=freq)
502
+
503
+ else:
504
+ if self.datetime_end < dt_index[0]:
505
+ raise ValueError(
506
+ f"The ending date for the backtest was set for {self.datetime_end}. "
507
+ f"The earliest data entered is {dt_index[0]}. \nNo backtest can "
508
+ f"be run since there is no data before the backtest end date."
509
+ )
510
+ elif self.datetime_start > dt_index[-1]:
511
+ raise ValueError(
512
+ f"The starting date for the backtest was set for {self.datetime_start}. "
513
+ f"The latest data entered is {dt_index[-1]}. \nNo backtest can "
514
+ f"be run since there is no data after the backtest start date."
515
+ )
516
+
517
+ return dt_index
518
+
519
+ def get_last_price(self, asset, quote=None, exchange=None) -> Union[float, Decimal, None]:
520
+ # Takes an asset and returns the last known price
521
+ tuple_to_find = self.find_asset_in_data_store(asset, quote)
522
+
523
+ if tuple_to_find in self._data_store:
524
+ # LRU tracking - mark this data as recently used
525
+ self._data_store.move_to_end(tuple_to_find)
526
+ data = self._data_store[tuple_to_find]
527
+ try:
528
+ dt = self.get_datetime()
529
+ price = data.get_last_price(dt)
530
+
531
+ # Check if price is NaN
532
+ if pd.isna(price):
533
+ # Provide more specific error message for index assets
534
+ if hasattr(asset, 'asset_type') and asset.asset_type == Asset.AssetType.INDEX:
535
+ logger.warning(f"Index asset `{asset.symbol}` returned NaN price. This could be due to missing data for the index or a subscription issue if using Polygon.io. Note that some index data (like SPX) requires a paid subscription. Consider using Yahoo Finance for broader index data coverage.")
536
+ else:
537
+ logger.debug(f"Error getting last price for {tuple_to_find}: price is NaN")
538
+ return None
539
+
540
+ return price
541
+ except Exception as e:
542
+ logger.debug(f"Error getting last price for {tuple_to_find}: {e}")
543
+ return None
544
+ else:
545
+ # Provide more specific error message when asset not found in data store
546
+ if hasattr(asset, 'asset_type') and asset.asset_type == Asset.AssetType.INDEX:
547
+ logger.warning(f"The index asset `{asset.symbol}` does not exist or does not have data. Index data may not be available from this data source. If using Polygon, note that some index data (like SPX) requires a paid subscription. Consider using Yahoo Finance for broader index data coverage.")
548
+ return None
549
+
550
+ def get_quote(self, asset, quote=None, exchange=None) -> Quote:
551
+ """
552
+ Get the latest quote for an asset.
553
+ Returns a Quote object with bid, ask, last, and other fields if available.
554
+
555
+ Parameters
556
+ ----------
557
+ asset : Asset object
558
+ The asset for which the quote is needed.
559
+ quote : Asset object, optional
560
+ The quote asset for cryptocurrency pairs.
561
+ exchange : str, optional
562
+ The exchange to get the quote from.
563
+
564
+ Returns
565
+ -------
566
+ Quote
567
+ A Quote object with the quote information.
568
+ """
569
+ from lumibot.entities import Quote
570
+
571
+ # Takes an asset and returns the last known price
572
+ tuple_to_find = self.find_asset_in_data_store(asset, quote)
573
+
574
+ if tuple_to_find in self._data_store:
575
+ # LRU tracking - mark this data as recently used
576
+ self._data_store.move_to_end(tuple_to_find)
577
+ data = self._data_store[tuple_to_find]
578
+ dt = self.get_datetime()
579
+ ohlcv_bid_ask_dict = data.get_quote(dt)
580
+
581
+ # Check if ohlcv_bid_ask_dict is NaN
582
+ if pd.isna(ohlcv_bid_ask_dict):
583
+ logger.debug(f"Error getting ohlcv_bid_ask for {tuple_to_find}: ohlcv_bid_ask_dict is NaN")
584
+ return Quote(asset=asset)
585
+
586
+ # Convert dictionary to Quote object
587
+ return Quote(
588
+ asset=asset,
589
+ price=ohlcv_bid_ask_dict.get('close'),
590
+ bid=ohlcv_bid_ask_dict.get('bid'),
591
+ ask=ohlcv_bid_ask_dict.get('ask'),
592
+ volume=ohlcv_bid_ask_dict.get('volume'),
593
+ timestamp=dt,
594
+ bid_size=ohlcv_bid_ask_dict.get('bid_size'),
595
+ ask_size=ohlcv_bid_ask_dict.get('ask_size'),
596
+ raw_data=ohlcv_bid_ask_dict
597
+ )
598
+ else:
599
+ return Quote(asset=asset)
600
+
601
+ def get_last_prices(self, assets, quote=None, exchange=None, **kwargs):
602
+ result = {}
603
+ for asset in assets:
604
+ result[asset] = self.get_last_price(asset, quote=quote, exchange=exchange)
605
+ return result
606
+
607
+ def _get_polars_data_entry(self, asset, quote, timestep):
608
+ """Retrieve a cached DataPolars entry for a specific timestep if available."""
609
+ polars_cache = getattr(self, "_polars_data", {})
610
+
611
+ # Build candidate quotes: exact match first, then USD fallback (default storage)
612
+ quote_candidates = []
613
+ if quote is not None:
614
+ quote_candidates.append(quote)
615
+ quote_candidates.append(Asset(symbol="USD", asset_type="forex"))
616
+
617
+ for candidate_quote in quote_candidates:
618
+ key = (asset, candidate_quote, timestep)
619
+ entry = polars_cache.get(key)
620
+ if entry is not None:
621
+ return entry
622
+
623
+ # Final attempt: linear scan to cope with differing Asset instances
624
+ for (cached_asset, cached_quote, cached_timestep), entry in polars_cache.items():
625
+ if cached_asset == asset and cached_timestep == timestep:
626
+ if quote is None or cached_quote == quote:
627
+ return entry
628
+ return None
629
+
630
+ def find_asset_in_data_store(self, asset, quote=None, timestep=None):
631
+ """
632
+ Locate the cache key for an asset, preferring timestep-aware keys but
633
+ gracefully falling back to legacy (asset, quote) entries for backward
634
+ compatibility.
635
+ """
636
+ candidates = []
637
+
638
+ if timestep is not None:
639
+ base_quote = quote if quote is not None else Asset("USD", "forex")
640
+ candidates.append((asset, base_quote, timestep))
641
+ # If a quote was explicitly supplied, also consider the USD fallback to
642
+ # match historical cache entries that were stored with USD.
643
+ if quote is not None:
644
+ candidates.append((asset, Asset("USD", "forex"), timestep))
645
+
646
+ if quote is not None:
647
+ candidates.append((asset, quote))
648
+
649
+ if isinstance(asset, Asset):
650
+ candidates.append((asset, Asset("USD", "forex")))
651
+
652
+ candidates.append(asset)
653
+
654
+ for key in candidates:
655
+ if key in self._data_store:
656
+ return key
657
+ return None
658
+
659
+ def _pull_source_symbol_bars(
660
+ self,
661
+ asset,
662
+ length,
663
+ timestep="",
664
+ timeshift=0,
665
+ quote=None,
666
+ exchange=None,
667
+ include_after_hours=True,
668
+ ):
669
+ timestep = timestep if timestep else self.MIN_TIMESTEP
670
+ if exchange is not None:
671
+ logger.warning(
672
+ f"the exchange parameter is not implemented for PandasData, but {exchange} was passed as the exchange"
673
+ )
674
+
675
+ if not timeshift:
676
+ timeshift = 0
677
+
678
+ asset_to_find = self.find_asset_in_data_store(asset, quote, timestep)
679
+
680
+ if asset_to_find in self._data_store:
681
+ # LRU tracking - mark this data as recently used
682
+ self._data_store.move_to_end(asset_to_find)
683
+ data = self._data_store[asset_to_find]
684
+ else:
685
+ if hasattr(asset, 'asset_type') and asset.asset_type == Asset.AssetType.INDEX:
686
+ logger.warning(f"The index asset `{asset.symbol}` does not exist or does not have data. Index data may not be available from this data source. If using Polygon, note that some index data (like SPX) requires a paid subscription. Consider using Yahoo Finance for broader index data coverage.")
687
+ else:
688
+ logger.warning(f"The asset: `{asset}` does not exist or does not have data.")
689
+ return
690
+
691
+ desired_timestep = timestep
692
+
693
+ # Prefer a direct DataPolars match for the requested timestep (if available) to
694
+ # avoid aggregating from trimmed minute windows.
695
+ current_timestep = getattr(data, "timestep", None)
696
+ if desired_timestep and current_timestep != desired_timestep:
697
+ direct_match = self._get_polars_data_entry(asset, quote, desired_timestep)
698
+ if direct_match is not None:
699
+ data = direct_match
700
+ current_timestep = data.timestep
701
+
702
+ # OPTIMIZATION: Use aggregated bars cache for different timesteps
703
+ # This avoids re-aggregating 5m/15m/1h bars from minute data every iteration
704
+ source_timestep = current_timestep
705
+ can_aggregate = (
706
+ source_timestep == "minute"
707
+ and timestep != source_timestep
708
+ and hasattr(data, 'polars_df') # Only for DataPolars objects
709
+ and timestep in ["5 minutes", "15 minutes", "30 minutes", "hour", "2 hours", "4 hours", "day"]
710
+ )
711
+
712
+ if can_aggregate:
713
+ # Try to get aggregated bars from cache
714
+ aggregated_df = self._get_or_aggregate_bars(asset, quote, length, source_timestep, timestep)
715
+ if aggregated_df is not None:
716
+ # We have aggregated data - now filter and tail it like get_bars would
717
+ import polars as pl
718
+
719
+ now = self.get_datetime()
720
+ # Apply timeshift if specified
721
+ # CRITICAL: Integer timeshift represents BAR offsets, not minute deltas!
722
+ # Must calculate adjustment based on the actual timestep being requested.
723
+ if timeshift:
724
+ from datetime import timedelta
725
+ if isinstance(timeshift, int):
726
+ # Calculate timedelta for one bar of this timestep
727
+ timestep_delta, _ = self.convert_timestep_str_to_timedelta(timestep)
728
+ # Multiply by timeshift to get total adjustment
729
+ # Example: timestep="5 minutes", timeshift=-2 → adjustment = -10 minutes
730
+ now = now + (timestep_delta * timeshift)
731
+ else:
732
+ # Timeshift is already a timedelta - use it directly
733
+ now = now + timeshift
734
+
735
+ # Filter to current time and take last 'length' bars
736
+ # Convert now to match polars DataFrame timezone
737
+ import pytz
738
+ if now.tzinfo is None:
739
+ now_aware = pytz.utc.localize(now)
740
+ else:
741
+ now_aware = now
742
+
743
+ polars_tz = aggregated_df["datetime"].dtype.time_zone
744
+ if polars_tz:
745
+ import pandas as pd
746
+ now_compat = pd.Timestamp(now_aware).tz_convert(polars_tz)
747
+ else:
748
+ now_compat = now_aware
749
+
750
+ filtered = aggregated_df.filter(pl.col("datetime") <= now_compat)
751
+ result = filtered.tail(length)
752
+
753
+ if result.height >= length:
754
+ logger.debug(f"[AGG CACHE] {asset.symbol} {timestep}: returning {result.height} bars from cache")
755
+ return result
756
+
757
+ # Aggregated slice is insufficient—evict this cache entry and try to fall back
758
+ logger.warning(
759
+ "[AGG CACHE] %s %s: insufficient rows (requested=%s, filtered=%s, returning=%s); falling back",
760
+ asset.symbol,
761
+ timestep,
762
+ length,
763
+ filtered.height,
764
+ result.height,
765
+ )
766
+ cache_key = self._get_aggregation_cache_key(asset, quote, timestep)
767
+ self._aggregated_cache.pop(cache_key, None)
768
+
769
+ direct_match = self._get_polars_data_entry(asset, quote, timestep)
770
+ if direct_match is not None:
771
+ data = direct_match
772
+ source_timestep = data.timestep
773
+ # Fall through to regular get_bars
774
+
775
+ # Regular path - use data.get_bars() which handles timestep conversion internally
776
+ now = self.get_datetime()
777
+
778
+ try:
779
+ res = data.get_bars(now, length=length, timestep=timestep, timeshift=timeshift)
780
+ # Return None if data.get_bars returns a ValueError
781
+ except ValueError as e:
782
+ logger.debug(f"Error getting bars for {asset}: {e}")
783
+ return None
784
+
785
+ return res
786
+
787
+ def _pull_source_symbol_bars_between_dates(
788
+ self,
789
+ asset,
790
+ timestep="",
791
+ quote=None,
792
+ exchange=None,
793
+ include_after_hours=True,
794
+ start_date=None,
795
+ end_date=None,
796
+ ):
797
+ """Pull all bars for an asset"""
798
+ timestep = timestep if timestep else self.MIN_TIMESTEP
799
+ asset_to_find = self.find_asset_in_data_store(asset, quote)
800
+
801
+ if asset_to_find in self._data_store:
802
+ # LRU tracking - mark this data as recently used
803
+ self._data_store.move_to_end(asset_to_find)
804
+ data = self._data_store[asset_to_find]
805
+ else:
806
+ if hasattr(asset, 'asset_type') and asset.asset_type == Asset.AssetType.INDEX:
807
+ logger.warning(f"The index asset `{asset.symbol}` does not exist or does not have data. Index data may not be available from this data source. If using Polygon, note that some index data (like SPX) requires a paid subscription. Consider using Yahoo Finance for broader index data coverage.")
808
+ else:
809
+ logger.warning(f"The asset: `{asset}` does not exist or does not have data.")
810
+ return
811
+
812
+ try:
813
+ res = data.get_bars_between_dates(start_date=start_date, end_date=end_date, timestep=timestep)
814
+ # Return None if data.get_bars returns a ValueError
815
+ except ValueError as e:
816
+ logger.debug(f"Error getting bars for {asset}: {e}")
817
+ res = None
818
+ return res
819
+
820
+ def _pull_source_bars(
821
+ self,
822
+ assets,
823
+ length,
824
+ timestep="",
825
+ timeshift=None,
826
+ quote=None,
827
+ include_after_hours=True,
828
+ ):
829
+ """pull broker bars for a list assets"""
830
+ timestep = timestep if timestep else self.MIN_TIMESTEP
831
+ self._parse_source_timestep(timestep, reverse=True)
832
+
833
+ result = {}
834
+ for asset in assets:
835
+ result[asset] = self._pull_source_symbol_bars(
836
+ asset, length, timestep=timestep, timeshift=timeshift, quote=quote
837
+ )
838
+ # remove assets that have no data from the result
839
+ if result[asset] is None:
840
+ result.pop(asset)
841
+
842
+ return result
843
+
844
+ def _parse_source_symbol_bars(self, response, asset, quote=None, length=None, return_polars=False):
845
+ """parse broker response for a single asset
846
+
847
+ CRITICAL: return_polars defaults to False for backwards compatibility.
848
+ Existing strategies expect pandas DataFrames!
849
+ """
850
+ asset1 = asset
851
+ asset2 = quote
852
+ if isinstance(asset, tuple):
853
+ asset1, asset2 = asset
854
+ bars = Bars(response, self.SOURCE, asset1, quote=asset2, raw=response, return_polars=return_polars)
855
+ return bars
856
+
857
+ def get_yesterday_dividend(self, asset, quote=None):
858
+ pass
859
+
860
+ def get_yesterday_dividends(self, assets, quote=None):
861
+ pass
862
+
863
+ # =======Options methods.=================
864
+ def get_chains(self, asset: Asset, quote: Asset = None, exchange: str = None):
865
+ """Returns option chains.
866
+
867
+ Obtains option chain information for the asset (stock) from each
868
+ of the exchanges the options trade on and returns a dictionary
869
+ for each exchange.
870
+
871
+ Parameters
872
+ ----------
873
+ asset : Asset object
874
+ The stock whose option chain is being fetched. Represented
875
+ as an asset object.
876
+ quote : Asset object, optional
877
+ The quote asset. Default is None.
878
+ exchange : str, optional
879
+ The exchange to fetch the option chains from. For PandasData, will only use "SMART".
880
+
881
+ Returns
882
+ -------
883
+ dict
884
+ Mapping with keys such as ``Multiplier`` (e.g. ``"100"``) and ``Chains``.
885
+ ``Chains`` is a nested dictionary where expiration dates map to strike lists,
886
+ e.g. ``chains['Chains']['CALL']['2023-07-31'] = [strike1, strike2, ...]``.
887
+ """
888
+ chains = dict(
889
+ Multiplier=100,
890
+ Exchange="SMART",
891
+ Chains={"CALL": defaultdict(list), "PUT": defaultdict(list)},
892
+ )
893
+
894
+ for store_item, data in self._data_store.items():
895
+ store_asset = store_item[0]
896
+ if store_asset.asset_type != "option":
897
+ continue
898
+ if store_asset.symbol != asset.symbol:
899
+ continue
900
+ chains["Chains"][store_asset.right][store_asset.expiration].append(store_asset.strike)
901
+
902
+ return chains
903
+
904
+ def get_start_datetime_and_ts_unit(self, length, timestep, start_dt=None, start_buffer=timedelta(days=5)):
905
+ """
906
+ Get the start datetime for the data.
907
+
908
+ Parameters
909
+ ----------
910
+ length : int
911
+ The number of data points to get.
912
+ timestep : str
913
+ The timestep to use. For example, "1minute" or "1hour" or "1day".
914
+
915
+
916
+ Returns
917
+ -------
918
+ datetime
919
+ The start datetime.
920
+ str
921
+ The timestep unit.
922
+ """
923
+ # Convert timestep string to timedelta and get start datetime
924
+ td, ts_unit = self.convert_timestep_str_to_timedelta(timestep)
925
+
926
+ if ts_unit == "day":
927
+ weeks_requested = length // 5 # Full trading week is 5 days
928
+ extra_padding_days = weeks_requested * 3 # to account for 3day weekends
929
+ td = timedelta(days=length + extra_padding_days)
930
+ else:
931
+ td *= length
932
+
933
+ if start_dt is not None:
934
+ start_datetime = start_dt - td
935
+ else:
936
+ start_datetime = self.datetime_start - td
937
+
938
+ # Subtract an extra 5 days to the start datetime to make sure we have enough
939
+ # data when it's a sparsely traded asset, especially over weekends
940
+ start_datetime = start_datetime - start_buffer
941
+
942
+ return start_datetime, ts_unit
943
+
944
+ def get_historical_prices(
945
+ self,
946
+ asset: Asset,
947
+ length: int,
948
+ timestep: str = None,
949
+ timeshift: int = None,
950
+ quote: Asset = None,
951
+ exchange: str = None,
952
+ include_after_hours: bool = True,
953
+ # PolarsData supports return_polars to enable polars-backed Bars for performance.
954
+ # When True, returns Bars with polars DataFrame internally (lazy conversion to pandas).
955
+ # CRITICAL: Default MUST be False for backwards compatibility with existing strategies!
956
+ return_polars: bool = False,
957
+ ):
958
+ """Get bars for a given asset"""
959
+ # Periodically trim cached data to maintain sliding window
960
+ self._trim_cached_data()
961
+
962
+ # Enforce memory limits after trimming (same periodic frequency)
963
+ # This ensures total memory usage stays under 1GB cap
964
+ self._enforce_memory_limits()
965
+
966
+ if isinstance(asset, str):
967
+ asset = Asset(symbol=asset)
968
+
969
+ if not timestep:
970
+ timestep = self.get_timestep()
971
+ response = self._pull_source_symbol_bars(
972
+ asset,
973
+ length,
974
+ timestep=timestep,
975
+ timeshift=timeshift,
976
+ quote=quote,
977
+ exchange=exchange,
978
+ include_after_hours=include_after_hours,
979
+ )
980
+ if isinstance(response, float):
981
+ return response
982
+ elif response is None:
983
+ return None
984
+
985
+ bars = self._parse_source_symbol_bars(response, asset, quote=quote, length=length, return_polars=return_polars)
986
+ return bars