lumibot 4.1.3__py3-none-any.whl → 4.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lumibot might be problematic. Click here for more details.

Files changed (163) hide show
  1. lumibot/backtesting/__init__.py +19 -5
  2. lumibot/backtesting/backtesting_broker.py +98 -18
  3. lumibot/backtesting/databento_backtesting.py +5 -686
  4. lumibot/backtesting/databento_backtesting_pandas.py +738 -0
  5. lumibot/backtesting/databento_backtesting_polars.py +860 -546
  6. lumibot/backtesting/fix_debug.py +37 -0
  7. lumibot/backtesting/thetadata_backtesting.py +9 -355
  8. lumibot/backtesting/thetadata_backtesting_pandas.py +1167 -0
  9. lumibot/brokers/alpaca.py +8 -1
  10. lumibot/brokers/schwab.py +12 -2
  11. lumibot/credentials.py +13 -0
  12. lumibot/data_sources/__init__.py +5 -8
  13. lumibot/data_sources/data_source.py +6 -2
  14. lumibot/data_sources/data_source_backtesting.py +30 -0
  15. lumibot/data_sources/databento_data.py +5 -390
  16. lumibot/data_sources/databento_data_pandas.py +440 -0
  17. lumibot/data_sources/databento_data_polars.py +15 -9
  18. lumibot/data_sources/pandas_data.py +30 -17
  19. lumibot/data_sources/polars_data.py +986 -0
  20. lumibot/data_sources/polars_mixin.py +472 -96
  21. lumibot/data_sources/polygon_data_polars.py +5 -0
  22. lumibot/data_sources/yahoo_data.py +9 -2
  23. lumibot/data_sources/yahoo_data_polars.py +5 -0
  24. lumibot/entities/__init__.py +15 -0
  25. lumibot/entities/asset.py +5 -28
  26. lumibot/entities/bars.py +89 -20
  27. lumibot/entities/data.py +29 -6
  28. lumibot/entities/data_polars.py +668 -0
  29. lumibot/entities/position.py +38 -4
  30. lumibot/strategies/_strategy.py +2 -1
  31. lumibot/strategies/strategy.py +61 -49
  32. lumibot/tools/backtest_cache.py +284 -0
  33. lumibot/tools/databento_helper.py +35 -35
  34. lumibot/tools/databento_helper_polars.py +738 -775
  35. lumibot/tools/futures_roll.py +251 -0
  36. lumibot/tools/indicators.py +135 -104
  37. lumibot/tools/polars_utils.py +142 -0
  38. lumibot/tools/thetadata_helper.py +1068 -134
  39. {lumibot-4.1.3.dist-info → lumibot-4.2.1.dist-info}/METADATA +9 -1
  40. {lumibot-4.1.3.dist-info → lumibot-4.2.1.dist-info}/RECORD +71 -147
  41. tests/backtest/test_databento.py +37 -6
  42. tests/backtest/test_databento_comprehensive_trading.py +8 -4
  43. tests/backtest/test_databento_parity.py +4 -2
  44. tests/backtest/test_debug_avg_fill_price.py +1 -1
  45. tests/backtest/test_example_strategies.py +11 -1
  46. tests/backtest/test_futures_edge_cases.py +3 -3
  47. tests/backtest/test_futures_single_trade.py +2 -2
  48. tests/backtest/test_futures_ultra_simple.py +2 -2
  49. tests/backtest/test_polars_lru_eviction.py +470 -0
  50. tests/backtest/test_yahoo.py +42 -0
  51. tests/test_asset.py +4 -4
  52. tests/test_backtest_cache_manager.py +149 -0
  53. tests/test_backtesting_data_source_env.py +6 -0
  54. tests/test_continuous_futures_resolution.py +60 -48
  55. tests/test_data_polars_parity.py +160 -0
  56. tests/test_databento_asset_validation.py +23 -5
  57. tests/test_databento_backtesting.py +1 -1
  58. tests/test_databento_backtesting_polars.py +312 -192
  59. tests/test_databento_data.py +220 -463
  60. tests/test_databento_live.py +10 -10
  61. tests/test_futures_roll.py +38 -0
  62. tests/test_indicator_subplots.py +101 -0
  63. tests/test_market_infinite_loop_bug.py +77 -3
  64. tests/test_polars_resample.py +67 -0
  65. tests/test_polygon_helper.py +46 -0
  66. tests/test_thetadata_backwards_compat.py +97 -0
  67. tests/test_thetadata_helper.py +222 -23
  68. tests/test_thetadata_pandas_verification.py +186 -0
  69. lumibot/__pycache__/__init__.cpython-312.pyc +0 -0
  70. lumibot/__pycache__/constants.cpython-312.pyc +0 -0
  71. lumibot/__pycache__/credentials.cpython-312.pyc +0 -0
  72. lumibot/backtesting/__pycache__/__init__.cpython-312.pyc +0 -0
  73. lumibot/backtesting/__pycache__/alpaca_backtesting.cpython-312.pyc +0 -0
  74. lumibot/backtesting/__pycache__/alpha_vantage_backtesting.cpython-312.pyc +0 -0
  75. lumibot/backtesting/__pycache__/backtesting_broker.cpython-312.pyc +0 -0
  76. lumibot/backtesting/__pycache__/ccxt_backtesting.cpython-312.pyc +0 -0
  77. lumibot/backtesting/__pycache__/databento_backtesting.cpython-312.pyc +0 -0
  78. lumibot/backtesting/__pycache__/interactive_brokers_rest_backtesting.cpython-312.pyc +0 -0
  79. lumibot/backtesting/__pycache__/pandas_backtesting.cpython-312.pyc +0 -0
  80. lumibot/backtesting/__pycache__/polygon_backtesting.cpython-312.pyc +0 -0
  81. lumibot/backtesting/__pycache__/thetadata_backtesting.cpython-312.pyc +0 -0
  82. lumibot/backtesting/__pycache__/yahoo_backtesting.cpython-312.pyc +0 -0
  83. lumibot/brokers/__pycache__/__init__.cpython-312.pyc +0 -0
  84. lumibot/brokers/__pycache__/alpaca.cpython-312.pyc +0 -0
  85. lumibot/brokers/__pycache__/bitunix.cpython-312.pyc +0 -0
  86. lumibot/brokers/__pycache__/broker.cpython-312.pyc +0 -0
  87. lumibot/brokers/__pycache__/ccxt.cpython-312.pyc +0 -0
  88. lumibot/brokers/__pycache__/example_broker.cpython-312.pyc +0 -0
  89. lumibot/brokers/__pycache__/interactive_brokers.cpython-312.pyc +0 -0
  90. lumibot/brokers/__pycache__/interactive_brokers_rest.cpython-312.pyc +0 -0
  91. lumibot/brokers/__pycache__/projectx.cpython-312.pyc +0 -0
  92. lumibot/brokers/__pycache__/schwab.cpython-312.pyc +0 -0
  93. lumibot/brokers/__pycache__/tradier.cpython-312.pyc +0 -0
  94. lumibot/brokers/__pycache__/tradovate.cpython-312.pyc +0 -0
  95. lumibot/data_sources/__pycache__/__init__.cpython-312.pyc +0 -0
  96. lumibot/data_sources/__pycache__/alpaca_data.cpython-312.pyc +0 -0
  97. lumibot/data_sources/__pycache__/alpha_vantage_data.cpython-312.pyc +0 -0
  98. lumibot/data_sources/__pycache__/bitunix_data.cpython-312.pyc +0 -0
  99. lumibot/data_sources/__pycache__/ccxt_backtesting_data.cpython-312.pyc +0 -0
  100. lumibot/data_sources/__pycache__/ccxt_data.cpython-312.pyc +0 -0
  101. lumibot/data_sources/__pycache__/data_source.cpython-312.pyc +0 -0
  102. lumibot/data_sources/__pycache__/data_source_backtesting.cpython-312.pyc +0 -0
  103. lumibot/data_sources/__pycache__/databento_data_polars_backtesting.cpython-312.pyc +0 -0
  104. lumibot/data_sources/__pycache__/databento_data_polars_live.cpython-312.pyc +0 -0
  105. lumibot/data_sources/__pycache__/example_broker_data.cpython-312.pyc +0 -0
  106. lumibot/data_sources/__pycache__/exceptions.cpython-312.pyc +0 -0
  107. lumibot/data_sources/__pycache__/interactive_brokers_data.cpython-312.pyc +0 -0
  108. lumibot/data_sources/__pycache__/interactive_brokers_rest_data.cpython-312.pyc +0 -0
  109. lumibot/data_sources/__pycache__/pandas_data.cpython-312.pyc +0 -0
  110. lumibot/data_sources/__pycache__/polars_mixin.cpython-312.pyc +0 -0
  111. lumibot/data_sources/__pycache__/polygon_data_polars.cpython-312.pyc +0 -0
  112. lumibot/data_sources/__pycache__/projectx_data.cpython-312.pyc +0 -0
  113. lumibot/data_sources/__pycache__/schwab_data.cpython-312.pyc +0 -0
  114. lumibot/data_sources/__pycache__/tradier_data.cpython-312.pyc +0 -0
  115. lumibot/data_sources/__pycache__/tradovate_data.cpython-312.pyc +0 -0
  116. lumibot/data_sources/__pycache__/yahoo_data_polars.cpython-312.pyc +0 -0
  117. lumibot/entities/__pycache__/__init__.cpython-312.pyc +0 -0
  118. lumibot/entities/__pycache__/asset.cpython-312.pyc +0 -0
  119. lumibot/entities/__pycache__/bar.cpython-312.pyc +0 -0
  120. lumibot/entities/__pycache__/bars.cpython-312.pyc +0 -0
  121. lumibot/entities/__pycache__/chains.cpython-312.pyc +0 -0
  122. lumibot/entities/__pycache__/data.cpython-312.pyc +0 -0
  123. lumibot/entities/__pycache__/dataline.cpython-312.pyc +0 -0
  124. lumibot/entities/__pycache__/order.cpython-312.pyc +0 -0
  125. lumibot/entities/__pycache__/position.cpython-312.pyc +0 -0
  126. lumibot/entities/__pycache__/quote.cpython-312.pyc +0 -0
  127. lumibot/entities/__pycache__/trading_fee.cpython-312.pyc +0 -0
  128. lumibot/example_strategies/__pycache__/__init__.cpython-312.pyc +0 -0
  129. lumibot/example_strategies/__pycache__/test_broker_functions.cpython-312-pytest-8.4.1.pyc +0 -0
  130. lumibot/strategies/__pycache__/__init__.cpython-312.pyc +0 -0
  131. lumibot/strategies/__pycache__/_strategy.cpython-312.pyc +0 -0
  132. lumibot/strategies/__pycache__/strategy.cpython-312.pyc +0 -0
  133. lumibot/strategies/__pycache__/strategy_executor.cpython-312.pyc +0 -0
  134. lumibot/tools/__pycache__/__init__.cpython-312.pyc +0 -0
  135. lumibot/tools/__pycache__/alpaca_helpers.cpython-312.pyc +0 -0
  136. lumibot/tools/__pycache__/bitunix_helpers.cpython-312.pyc +0 -0
  137. lumibot/tools/__pycache__/black_scholes.cpython-312.pyc +0 -0
  138. lumibot/tools/__pycache__/ccxt_data_store.cpython-312.pyc +0 -0
  139. lumibot/tools/__pycache__/databento_helper.cpython-312.pyc +0 -0
  140. lumibot/tools/__pycache__/databento_helper_polars.cpython-312.pyc +0 -0
  141. lumibot/tools/__pycache__/debugers.cpython-312.pyc +0 -0
  142. lumibot/tools/__pycache__/decorators.cpython-312.pyc +0 -0
  143. lumibot/tools/__pycache__/helpers.cpython-312.pyc +0 -0
  144. lumibot/tools/__pycache__/indicators.cpython-312.pyc +0 -0
  145. lumibot/tools/__pycache__/lumibot_logger.cpython-312.pyc +0 -0
  146. lumibot/tools/__pycache__/pandas.cpython-312.pyc +0 -0
  147. lumibot/tools/__pycache__/polygon_helper.cpython-312.pyc +0 -0
  148. lumibot/tools/__pycache__/polygon_helper_async.cpython-312.pyc +0 -0
  149. lumibot/tools/__pycache__/polygon_helper_polars_optimized.cpython-312.pyc +0 -0
  150. lumibot/tools/__pycache__/projectx_helpers.cpython-312.pyc +0 -0
  151. lumibot/tools/__pycache__/schwab_helper.cpython-312.pyc +0 -0
  152. lumibot/tools/__pycache__/thetadata_helper.cpython-312.pyc +0 -0
  153. lumibot/tools/__pycache__/types.cpython-312.pyc +0 -0
  154. lumibot/tools/__pycache__/yahoo_helper.cpython-312.pyc +0 -0
  155. lumibot/tools/__pycache__/yahoo_helper_polars_optimized.cpython-312.pyc +0 -0
  156. lumibot/traders/__pycache__/__init__.cpython-312.pyc +0 -0
  157. lumibot/traders/__pycache__/trader.cpython-312.pyc +0 -0
  158. lumibot/trading_builtins/__pycache__/__init__.cpython-312.pyc +0 -0
  159. lumibot/trading_builtins/__pycache__/custom_stream.cpython-312.pyc +0 -0
  160. lumibot/trading_builtins/__pycache__/safe_list.cpython-312.pyc +0 -0
  161. {lumibot-4.1.3.dist-info → lumibot-4.2.1.dist-info}/WHEEL +0 -0
  162. {lumibot-4.1.3.dist-info → lumibot-4.2.1.dist-info}/licenses/LICENSE +0 -0
  163. {lumibot-4.1.3.dist-info → lumibot-4.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1167 @@
1
+ from decimal import Decimal
2
+ from typing import Dict, Optional, Union
3
+
4
+ import logging
5
+ import pandas as pd
6
+ import pytz
7
+ import subprocess
8
+ from datetime import date, datetime, timedelta
9
+
10
+ from lumibot.data_sources import PandasData
11
+ from lumibot.entities import Asset, Data
12
+ from lumibot.credentials import THETADATA_CONFIG
13
+ from lumibot.tools import thetadata_helper
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ def _parity_log(message: str, *args) -> None:
19
+ """Emit parity diagnostics only when debug logging is enabled."""
20
+ if logger.isEnabledFor(logging.DEBUG):
21
+ logger.debug(message, *args)
22
+
23
+
24
+ START_BUFFER = timedelta(days=5)
25
+
26
+
27
+ class ThetaDataBacktestingPandas(PandasData):
28
+ """
29
+ Backtesting implementation of ThetaData
30
+ """
31
+
32
+ IS_BACKTESTING_BROKER = True
33
+
34
+ # Enable fallback to last_price when bid/ask quotes are unavailable for options
35
+ option_quote_fallback_allowed = True
36
+
37
+ def __init__(
38
+ self,
39
+ datetime_start,
40
+ datetime_end,
41
+ pandas_data=None,
42
+ username=None,
43
+ password=None,
44
+ use_quote_data=True,
45
+ **kwargs,
46
+ ):
47
+ # Pass allow_option_quote_fallback to parent to enable fallback mechanism
48
+ super().__init__(datetime_start=datetime_start, datetime_end=datetime_end, pandas_data=pandas_data,
49
+ allow_option_quote_fallback=True, **kwargs)
50
+
51
+ if username is None:
52
+ username = THETADATA_CONFIG.get("THETADATA_USERNAME")
53
+ if password is None:
54
+ password = THETADATA_CONFIG.get("THETADATA_PASSWORD")
55
+ if username is None or password is None:
56
+ logger.warning("ThetaData credentials are not configured; ThetaTerminal may fail to authenticate.")
57
+
58
+ self._username = username
59
+ self._password = password
60
+ self._use_quote_data = use_quote_data
61
+
62
+ self._dataset_metadata: Dict[tuple, Dict[str, object]] = {}
63
+
64
+ # Set data_source to self since this class acts as both broker and data source
65
+ self.data_source = self
66
+
67
+ self.kill_processes_by_name("ThetaTerminal.jar")
68
+ thetadata_helper.reset_theta_terminal_tracking()
69
+
70
+ def is_weekend(self, date):
71
+ """
72
+ Check if the given date is a weekend.
73
+
74
+ :param date: datetime.date object
75
+ :return: Boolean, True if weekend, False otherwise
76
+ """
77
+ return date.weekday() >= 5 # 5 = Saturday, 6 = Sunday
78
+
79
+ def kill_processes_by_name(self, keyword):
80
+ try:
81
+ # Find all processes related to the keyword
82
+ result = subprocess.run(['pgrep', '-f', keyword], capture_output=True, text=True)
83
+ pids = result.stdout.strip().split('\n')
84
+
85
+ if pids:
86
+ for pid in pids:
87
+ if pid: # Ensure the PID is not empty
88
+ logger.info(f"Killing process with PID: {pid}")
89
+ subprocess.run(['kill', '-9', pid])
90
+ logger.info(f"All processes related to '{keyword}' have been killed.")
91
+ else:
92
+ logger.info(f"No processes found related to '{keyword}'.")
93
+
94
+ except Exception as e:
95
+ print(f"An error occurred during kill process: {e}")
96
+
97
+ def _normalize_default_timezone(self, dt_value: Optional[datetime]) -> Optional[datetime]:
98
+ """Normalize datetimes to the strategy timezone for consistent comparisons."""
99
+ if dt_value is None:
100
+ return None
101
+ if isinstance(dt_value, pd.Timestamp):
102
+ dt_value = dt_value.to_pydatetime()
103
+ if dt_value.tzinfo is None:
104
+ try:
105
+ dt_value = self.tzinfo.localize(dt_value)
106
+ except AttributeError:
107
+ dt_value = dt_value.replace(tzinfo=self.tzinfo)
108
+ return self.to_default_timezone(dt_value)
109
+
110
+ def _option_expiration_end(self, asset: Asset) -> Optional[datetime]:
111
+ """Return expiration datetime localized to default timezone, if applicable."""
112
+ if getattr(asset, "asset_type", None) != Asset.AssetType.OPTION or asset.expiration is None:
113
+ return None
114
+ expiration_dt = datetime.combine(asset.expiration, datetime.max.time())
115
+ try:
116
+ expiration_dt = self.tzinfo.localize(expiration_dt)
117
+ except AttributeError:
118
+ expiration_dt = expiration_dt.replace(tzinfo=self.tzinfo)
119
+ return self.to_default_timezone(expiration_dt)
120
+
121
+ def _record_metadata(self, key, frame: pd.DataFrame, ts_unit: str, asset: Asset) -> None:
122
+ """Persist dataset coverage details for reuse checks."""
123
+ previous_meta = self._dataset_metadata.get(key, {})
124
+
125
+ if frame is None or frame.empty:
126
+ start = end = None
127
+ rows = 0
128
+ else:
129
+ if isinstance(frame.index, pd.DatetimeIndex):
130
+ dt_source = frame.index
131
+ elif "datetime" in frame.columns:
132
+ dt_source = frame["datetime"]
133
+ elif "index" in frame.columns:
134
+ dt_source = frame["index"]
135
+ else:
136
+ dt_source = frame.index
137
+ dt_index = pd.to_datetime(dt_source)
138
+ if len(dt_index):
139
+ start = dt_index.min().to_pydatetime()
140
+ end = dt_index.max().to_pydatetime()
141
+ else:
142
+ start = end = None
143
+ rows = len(frame)
144
+
145
+ normalized_start = self._normalize_default_timezone(start)
146
+ normalized_end = self._normalize_default_timezone(end)
147
+
148
+ metadata: Dict[str, object] = {
149
+ "timestep": ts_unit,
150
+ "start": normalized_start,
151
+ "end": normalized_end,
152
+ "rows": rows,
153
+ }
154
+ metadata["empty_fetch"] = frame is None or frame.empty
155
+
156
+ if frame is not None and not frame.empty and "missing" in frame.columns:
157
+ placeholder_flags = frame["missing"].fillna(False).astype(bool)
158
+ metadata["placeholders"] = int(placeholder_flags.sum())
159
+ metadata["tail_placeholder"] = bool(placeholder_flags.iloc[-1])
160
+ if placeholder_flags.shape[0] and bool(placeholder_flags.all()):
161
+ metadata["empty_fetch"] = True
162
+ else:
163
+ metadata["placeholders"] = 0
164
+ metadata["tail_placeholder"] = False
165
+ if not metadata["empty_fetch"]:
166
+ metadata["empty_fetch"] = False
167
+
168
+ if getattr(asset, "asset_type", None) == Asset.AssetType.OPTION:
169
+ metadata["expiration"] = asset.expiration
170
+
171
+ if metadata.get("expiration") != previous_meta.get("expiration"):
172
+ metadata["expiration_notice"] = False
173
+ else:
174
+ metadata["expiration_notice"] = previous_meta.get("expiration_notice", False)
175
+
176
+ self._dataset_metadata[key] = metadata
177
+
178
+ def _finalize_day_frame(
179
+ self,
180
+ pandas_df: Optional[pd.DataFrame],
181
+ current_dt: datetime,
182
+ requested_length: int,
183
+ timeshift: Optional[timedelta],
184
+ asset: Optional[Asset] = None, # DEBUG-LOG: Added for logging
185
+ ) -> Optional[pd.DataFrame]:
186
+ # DEBUG-LOG: Method entry with full parameter context
187
+ logger.debug(
188
+ "[THETA][DEBUG][PANDAS][FINALIZE][ENTRY] asset=%s current_dt=%s requested_length=%s timeshift=%s input_shape=%s input_columns=%s input_index_type=%s input_has_tz=%s input_index_sample=%s",
189
+ getattr(asset, 'symbol', asset) if asset else 'UNKNOWN',
190
+ current_dt.isoformat() if hasattr(current_dt, 'isoformat') else current_dt,
191
+ requested_length,
192
+ timeshift,
193
+ pandas_df.shape if pandas_df is not None else 'NONE',
194
+ list(pandas_df.columns) if pandas_df is not None else 'NONE',
195
+ type(pandas_df.index).__name__ if pandas_df is not None else 'NONE',
196
+ getattr(pandas_df.index, 'tz', None) if pandas_df is not None else 'NONE',
197
+ list(pandas_df.index[:5]) if pandas_df is not None and len(pandas_df) > 0 else 'EMPTY'
198
+ )
199
+
200
+ if pandas_df is None or pandas_df.empty:
201
+ # DEBUG-LOG: Early return for empty input
202
+ logger.debug(
203
+ "[THETA][DEBUG][PANDAS][FINALIZE][EMPTY_INPUT] asset=%s returning_none_or_empty=True",
204
+ getattr(asset, 'symbol', asset) if asset else 'UNKNOWN'
205
+ )
206
+ return pandas_df
207
+
208
+ frame = pandas_df.copy()
209
+ if "datetime" in frame.columns:
210
+ frame = frame.set_index("datetime")
211
+
212
+ frame.index = pd.to_datetime(frame.index)
213
+
214
+ # DEBUG-LOG: Timezone state before localization
215
+ logger.debug(
216
+ "[THETA][DEBUG][PANDAS][FINALIZE][TZ_CHECK] asset=%s frame_index_tz=%s target_tz=%s needs_localization=%s frame_shape=%s",
217
+ getattr(asset, 'symbol', asset) if asset else 'UNKNOWN',
218
+ frame.index.tz,
219
+ self.tzinfo,
220
+ frame.index.tz is None,
221
+ frame.shape
222
+ )
223
+
224
+ if frame.index.tz is None:
225
+ frame.index = frame.index.tz_localize(pytz.UTC)
226
+ localized_index = frame.index.tz_convert(self.tzinfo)
227
+ normalized_for_cutoff = localized_index.normalize()
228
+
229
+ # DEBUG-LOG: After localization
230
+ logger.debug(
231
+ "[THETA][DEBUG][PANDAS][FINALIZE][LOCALIZED] asset=%s localized_index_tz=%s localized_sample=%s",
232
+ getattr(asset, 'symbol', asset) if asset else 'UNKNOWN',
233
+ localized_index.tz,
234
+ list(localized_index[:3]) if len(localized_index) > 0 else 'EMPTY'
235
+ )
236
+
237
+ cutoff = self.to_default_timezone(current_dt).replace(hour=0, minute=0, second=0, microsecond=0) - timedelta(days=1)
238
+ cutoff_mask = normalized_for_cutoff <= cutoff
239
+
240
+ # DEBUG-LOG: Cutoff filtering state
241
+ logger.debug(
242
+ "[THETA][DEBUG][PANDAS][FINALIZE][CUTOFF] asset=%s cutoff=%s cutoff_mask_true=%s cutoff_mask_false=%s",
243
+ getattr(asset, 'symbol', asset) if asset else 'UNKNOWN',
244
+ cutoff,
245
+ int(cutoff_mask.sum()) if hasattr(cutoff_mask, 'sum') else 'N/A',
246
+ int((~cutoff_mask).sum()) if hasattr(cutoff_mask, 'sum') else 'N/A'
247
+ )
248
+
249
+ if timeshift and not isinstance(timeshift, int):
250
+ cutoff_mask &= normalized_for_cutoff <= (cutoff - timeshift)
251
+ # DEBUG-LOG: After timeshift adjustment
252
+ logger.debug(
253
+ "[THETA][DEBUG][PANDAS][FINALIZE][TIMESHIFT_ADJUSTED] asset=%s timeshift=%s new_cutoff=%s cutoff_mask_true=%s",
254
+ getattr(asset, 'symbol', asset) if asset else 'UNKNOWN',
255
+ timeshift,
256
+ cutoff - timeshift,
257
+ int(cutoff_mask.sum()) if hasattr(cutoff_mask, 'sum') else 'N/A'
258
+ )
259
+
260
+ frame = frame.loc[cutoff_mask]
261
+ localized_index = localized_index[cutoff_mask]
262
+ normalized_for_cutoff = normalized_for_cutoff[cutoff_mask]
263
+
264
+ # DEBUG-LOG: After cutoff filtering
265
+ logger.debug(
266
+ "[THETA][DEBUG][PANDAS][FINALIZE][AFTER_CUTOFF] asset=%s shape=%s index_range=%s",
267
+ getattr(asset, 'symbol', asset) if asset else 'UNKNOWN',
268
+ frame.shape,
269
+ (localized_index[0], localized_index[-1]) if len(localized_index) > 0 else ('EMPTY', 'EMPTY')
270
+ )
271
+
272
+ if timeshift and isinstance(timeshift, int):
273
+ if timeshift > 0:
274
+ frame = frame.iloc[:-timeshift] if len(frame) > timeshift else frame.iloc[0:0]
275
+ localized_index = localized_index[: len(frame)]
276
+
277
+ normalized_index = localized_index.normalize()
278
+ frame = frame.copy()
279
+ frame.index = normalized_index
280
+ raw_frame = frame.copy()
281
+
282
+ # DEBUG-LOG: After normalization
283
+ logger.debug(
284
+ "[THETA][DEBUG][PANDAS][FINALIZE][NORMALIZED_INDEX] asset=%s shape=%s index_sample=%s",
285
+ getattr(asset, 'symbol', asset) if asset else 'UNKNOWN',
286
+ frame.shape,
287
+ list(normalized_index[:3]) if len(normalized_index) > 0 else 'EMPTY'
288
+ )
289
+
290
+ expected_last_dt = self.to_default_timezone(current_dt).replace(hour=0, minute=0, second=0, microsecond=0) - timedelta(days=1)
291
+ target_index = pd.date_range(end=expected_last_dt, periods=requested_length, freq="D", tz=self.tzinfo)
292
+
293
+ # DEBUG-LOG: Target index details
294
+ logger.debug(
295
+ "[THETA][DEBUG][PANDAS][FINALIZE][TARGET_INDEX] asset=%s target_length=%s target_range=%s",
296
+ getattr(asset, 'symbol', asset) if asset else 'UNKNOWN',
297
+ len(target_index),
298
+ (target_index[0], target_index[-1]) if len(target_index) > 0 else ('EMPTY', 'EMPTY')
299
+ )
300
+
301
+ if "missing" not in frame.columns:
302
+ frame["missing"] = False
303
+
304
+ frame = frame.reindex(target_index)
305
+
306
+ # DEBUG-LOG: After reindex
307
+ logger.debug(
308
+ "[THETA][DEBUG][PANDAS][FINALIZE][AFTER_REINDEX] asset=%s shape=%s columns=%s",
309
+ getattr(asset, 'symbol', asset) if asset else 'UNKNOWN',
310
+ frame.shape,
311
+ list(frame.columns)
312
+ )
313
+
314
+ value_columns = [col for col in ["open", "high", "low", "close", "volume"] if col in frame.columns]
315
+ if value_columns:
316
+ placeholder_mask = frame[value_columns].isna().all(axis=1)
317
+ else:
318
+ placeholder_mask = frame.isna().all(axis=1)
319
+
320
+ # DEBUG-LOG: Placeholder mask computation
321
+ logger.debug(
322
+ "[THETA][DEBUG][PANDAS][FINALIZE][PLACEHOLDER_MASK] asset=%s placeholder_true=%s placeholder_false=%s value_columns=%s",
323
+ getattr(asset, 'symbol', asset) if asset else 'UNKNOWN',
324
+ int(placeholder_mask.sum()) if hasattr(placeholder_mask, 'sum') else 'N/A',
325
+ int((~placeholder_mask).sum()) if hasattr(placeholder_mask, 'sum') else 'N/A',
326
+ value_columns
327
+ )
328
+
329
+ frame.loc[placeholder_mask, "missing"] = True
330
+ frame["missing"] = frame["missing"].fillna(False)
331
+ frame = frame.sort_index()
332
+ frame.index.name = "datetime"
333
+
334
+ if "missing" in frame.columns:
335
+ # Drop placeholder rows (weekends/holidays) to avoid NaNs in returned results.
336
+ missing_flags = frame["missing"].astype(bool)
337
+ real_rows = frame.loc[~missing_flags]
338
+ if len(real_rows) < requested_length:
339
+ deficit = requested_length - len(real_rows)
340
+ raw_missing_flags = raw_frame.get("missing")
341
+ if raw_missing_flags is not None:
342
+ raw_real_rows = raw_frame.loc[~raw_missing_flags.astype(bool)]
343
+ else:
344
+ raw_real_rows = raw_frame
345
+ supplemental = raw_real_rows.tail(requested_length + deficit)
346
+ combined = pd.concat([supplemental, real_rows]).sort_index()
347
+ combined = combined[~combined.index.duplicated(keep="last")]
348
+ frame = combined.tail(requested_length).copy()
349
+ else:
350
+ frame = real_rows.tail(requested_length).copy()
351
+ else:
352
+ frame = frame.tail(requested_length).copy()
353
+
354
+ if value_columns:
355
+ frame["missing"] = frame[value_columns].isna().all(axis=1)
356
+ else:
357
+ frame["missing"] = False
358
+
359
+ # DEBUG-LOG: Final missing flag state
360
+ try:
361
+ missing_count = int(frame["missing"].sum())
362
+ logger.debug(
363
+ "[THETA][DEBUG][PANDAS][FINALIZE][MISSING_FINAL] asset=%s missing_true=%s missing_false=%s total_rows=%s",
364
+ getattr(asset, 'symbol', asset) if asset else 'UNKNOWN',
365
+ missing_count,
366
+ len(frame) - missing_count,
367
+ len(frame)
368
+ )
369
+ except Exception as e:
370
+ logger.debug(
371
+ "[THETA][DEBUG][PANDAS][FINALIZE][MISSING_FINAL] asset=%s error=%s",
372
+ getattr(asset, 'symbol', asset) if asset else 'UNKNOWN',
373
+ str(e)
374
+ )
375
+
376
+ # DEBUG-LOG: Return value
377
+ logger.debug(
378
+ "[THETA][DEBUG][PANDAS][FINALIZE][RETURN] asset=%s shape=%s columns=%s index_range=%s",
379
+ getattr(asset, 'symbol', asset) if asset else 'UNKNOWN',
380
+ frame.shape,
381
+ list(frame.columns),
382
+ (frame.index[0], frame.index[-1]) if len(frame) > 0 else ('EMPTY', 'EMPTY')
383
+ )
384
+
385
+ return frame
386
+
387
+ def _update_pandas_data(self, asset, quote, length, timestep, start_dt=None):
388
+ """
389
+ Get asset data and update the self.pandas_data dictionary.
390
+
391
+ Parameters
392
+ ----------
393
+ asset : Asset
394
+ The asset to get data for.
395
+ quote : Asset
396
+ The quote asset to use. For example, if asset is "SPY" and quote is "USD", the data will be for "SPY/USD".
397
+ length : int
398
+ The number of data points to get.
399
+ timestep : str
400
+ The timestep to use. For example, "1minute" or "1hour" or "1day".
401
+
402
+ Returns
403
+ -------
404
+ dict
405
+ A dictionary with the keys being the asset and the values being the PandasData objects.
406
+ """
407
+ # DEBUG: Log when strike 157 is requested
408
+ if hasattr(asset, 'strike') and asset.strike == 157:
409
+ import traceback
410
+ logger.info(f"\n[DEBUG STRIKE 157] _update_pandas_data called for asset: {asset}")
411
+ logger.info(f"[DEBUG STRIKE 157] Traceback:\n{''.join(traceback.format_stack())}")
412
+
413
+ search_asset = asset
414
+ asset_separated = asset
415
+ quote_asset = quote if quote is not None else Asset("USD", "forex")
416
+
417
+ if isinstance(search_asset, tuple):
418
+ asset_separated, quote_asset = search_asset
419
+ else:
420
+ search_asset = (search_asset, quote_asset)
421
+
422
+ if asset_separated.asset_type == "option":
423
+ expiry = asset_separated.expiration
424
+ if self.is_weekend(expiry):
425
+ logger.info(f"\nSKIP: Expiry {expiry} date is a weekend, no contract exists: {asset_separated}")
426
+ return None
427
+
428
+ # Get the start datetime and timestep unit
429
+ start_datetime, ts_unit = self.get_start_datetime_and_ts_unit(
430
+ length, timestep, start_dt, start_buffer=START_BUFFER
431
+ )
432
+
433
+ requested_length = length
434
+ requested_start = self._normalize_default_timezone(start_datetime)
435
+ start_threshold = requested_start + START_BUFFER if requested_start is not None else None
436
+ current_dt = self.get_datetime()
437
+ end_requirement = self.datetime_end if ts_unit == "day" else current_dt
438
+ end_requirement = self._normalize_default_timezone(end_requirement)
439
+ expiration_dt = self._option_expiration_end(asset_separated)
440
+ if expiration_dt is not None and end_requirement is not None and expiration_dt < end_requirement:
441
+ end_requirement = expiration_dt
442
+
443
+ existing_data = self.pandas_data.get(search_asset)
444
+ if existing_data is not None and search_asset not in self._dataset_metadata:
445
+ self._record_metadata(search_asset, existing_data.df, existing_data.timestep, asset_separated)
446
+ existing_meta = self._dataset_metadata.get(search_asset)
447
+
448
+ if existing_data is not None and existing_meta and existing_meta.get("timestep") == ts_unit:
449
+ existing_start = existing_meta.get("start")
450
+ existing_rows = existing_meta.get("rows", 0)
451
+ existing_end = existing_meta.get("end")
452
+
453
+ # DEBUG-LOG: Cache validation entry
454
+ logger.debug(
455
+ "[DEBUG][BACKTEST][THETA][DEBUG][PANDAS][CACHE_VALIDATION][ENTRY] asset=%s timestep=%s | "
456
+ "REQUESTED: start=%s start_threshold=%s end_requirement=%s length=%d | "
457
+ "EXISTING: start=%s end=%s rows=%d",
458
+ asset_separated.symbol if hasattr(asset_separated, 'symbol') else str(asset_separated),
459
+ ts_unit,
460
+ requested_start.isoformat() if requested_start else None,
461
+ start_threshold.isoformat() if start_threshold else None,
462
+ end_requirement.isoformat() if end_requirement else None,
463
+ requested_length,
464
+ existing_start.isoformat() if existing_start else None,
465
+ existing_end.isoformat() if existing_end else None,
466
+ existing_rows
467
+ )
468
+
469
+ start_ok = (
470
+ existing_start is not None
471
+ and (start_threshold is None or existing_start <= start_threshold)
472
+ )
473
+
474
+ # DEBUG-LOG: Start validation result
475
+ logger.debug(
476
+ "[DEBUG][BACKTEST][THETA][DEBUG][PANDAS][START_VALIDATION] asset=%s | "
477
+ "start_ok=%s | "
478
+ "existing_start=%s start_threshold=%s | "
479
+ "reasoning=%s",
480
+ asset_separated.symbol if hasattr(asset_separated, 'symbol') else str(asset_separated),
481
+ start_ok,
482
+ existing_start.isoformat() if existing_start else None,
483
+ start_threshold.isoformat() if start_threshold else None,
484
+ "existing_start <= start_threshold" if start_ok else
485
+ ("start_threshold is None" if start_threshold is None else "existing_start > start_threshold")
486
+ )
487
+
488
+ tail_placeholder = existing_meta.get("tail_placeholder", False)
489
+ end_ok = True
490
+
491
+ # DEBUG-LOG: End validation entry
492
+ logger.debug(
493
+ "[DEBUG][BACKTEST][THETA][DEBUG][PANDAS][END_VALIDATION][ENTRY] asset=%s | "
494
+ "end_requirement=%s existing_end=%s tail_placeholder=%s",
495
+ asset_separated.symbol if hasattr(asset_separated, 'symbol') else str(asset_separated),
496
+ end_requirement.isoformat() if end_requirement else None,
497
+ existing_end.isoformat() if existing_end else None,
498
+ tail_placeholder
499
+ )
500
+
501
+ if end_requirement is not None:
502
+ if existing_end is None:
503
+ end_ok = False
504
+ logger.debug(
505
+ "[DEBUG][BACKTEST][THETA][DEBUG][PANDAS][END_VALIDATION][RESULT] asset=%s | "
506
+ "end_ok=FALSE | reason=existing_end_is_None",
507
+ asset_separated.symbol if hasattr(asset_separated, 'symbol') else str(asset_separated)
508
+ )
509
+ else:
510
+ # FIX: For daily data, use date-only comparison instead of datetime comparison
511
+ # This prevents false negatives when existing_end is midnight and end_requirement is later the same day
512
+ if ts_unit == "day":
513
+ existing_end_date = existing_end.date() if hasattr(existing_end, 'date') else existing_end
514
+ end_requirement_date = end_requirement.date() if hasattr(end_requirement, 'date') else end_requirement
515
+ existing_end_cmp = existing_end_date
516
+ end_requirement_cmp = end_requirement_date
517
+ else:
518
+ existing_end_cmp = existing_end
519
+ end_requirement_cmp = end_requirement
520
+
521
+ if existing_end_cmp > end_requirement_cmp:
522
+ end_ok = True
523
+ logger.debug(
524
+ "[DEBUG][BACKTEST][THETA][DEBUG][PANDAS][END_VALIDATION][RESULT] asset=%s | "
525
+ "end_ok=TRUE | reason=existing_end_exceeds_requirement | "
526
+ "existing_end=%s end_requirement=%s ts_unit=%s",
527
+ asset_separated.symbol if hasattr(asset_separated, 'symbol') else str(asset_separated),
528
+ existing_end.isoformat(),
529
+ end_requirement.isoformat(),
530
+ ts_unit
531
+ )
532
+ elif existing_end_cmp == end_requirement_cmp:
533
+ weekday = existing_end.weekday() if hasattr(existing_end, "weekday") else None
534
+ placeholder_on_weekend = tail_placeholder and weekday is not None and weekday >= 5
535
+ placeholder_empty_fetch = tail_placeholder and existing_meta.get("empty_fetch")
536
+ end_ok = (not tail_placeholder) or placeholder_on_weekend or placeholder_empty_fetch
537
+
538
+ logger.debug(
539
+ "[DEBUG][BACKTEST][THETA][DEBUG][PANDAS][END_VALIDATION][EXACT_MATCH] asset=%s | "
540
+ "existing_end == end_requirement | "
541
+ "weekday=%s placeholder_on_weekend=%s placeholder_empty_fetch=%s | "
542
+ "end_ok=%s ts_unit=%s",
543
+ asset_separated.symbol if hasattr(asset_separated, 'symbol') else str(asset_separated),
544
+ weekday,
545
+ placeholder_on_weekend,
546
+ placeholder_empty_fetch,
547
+ end_ok,
548
+ ts_unit
549
+ )
550
+ else:
551
+ end_ok = False
552
+ logger.debug(
553
+ "[DEBUG][BACKTEST][THETA][DEBUG][PANDAS][END_VALIDATION][RESULT] asset=%s | "
554
+ "end_ok=FALSE | reason=existing_end_less_than_requirement | "
555
+ "existing_end=%s end_requirement=%s ts_unit=%s",
556
+ asset_separated.symbol if hasattr(asset_separated, 'symbol') else str(asset_separated),
557
+ existing_end.isoformat(),
558
+ end_requirement.isoformat(),
559
+ ts_unit
560
+ )
561
+
562
+ cache_covers = (
563
+ start_ok
564
+ and existing_rows >= requested_length
565
+ and end_ok
566
+ )
567
+
568
+ # DEBUG-LOG: Final cache decision
569
+ logger.debug(
570
+ "[DEBUG][BACKTEST][THETA][DEBUG][PANDAS][CACHE_DECISION] asset=%s | "
571
+ "cache_covers=%s | "
572
+ "start_ok=%s rows_ok=%s (existing=%d >= requested=%d) end_ok=%s",
573
+ asset_separated.symbol if hasattr(asset_separated, 'symbol') else str(asset_separated),
574
+ cache_covers,
575
+ start_ok,
576
+ existing_rows >= requested_length,
577
+ existing_rows,
578
+ requested_length,
579
+ end_ok
580
+ )
581
+
582
+ if cache_covers:
583
+ if (
584
+ expiration_dt is not None
585
+ and end_requirement is not None
586
+ and expiration_dt == end_requirement
587
+ and not existing_meta.get("expiration_notice")
588
+ ):
589
+ logger.debug(
590
+ "[THETA][DEBUG][THETADATA-PANDAS] Reusing cached data for %s/%s through option expiry %s.",
591
+ asset_separated,
592
+ quote_asset,
593
+ asset_separated.expiration,
594
+ )
595
+ existing_meta["expiration_notice"] = True
596
+ else:
597
+ logger.debug(
598
+ "[THETA][DEBUG][THETADATA-PANDAS] cache covers %s/%s (%s) from %s to %s; length=%s rows=%s -> reuse",
599
+ asset_separated,
600
+ quote_asset,
601
+ ts_unit,
602
+ existing_start,
603
+ existing_end,
604
+ requested_length,
605
+ existing_rows,
606
+ )
607
+ return None
608
+
609
+ reasons = []
610
+ if existing_start is None or (start_threshold is not None and existing_start > start_threshold):
611
+ reasons.append("start")
612
+ if existing_rows < requested_length:
613
+ reasons.append("rows")
614
+ if not end_ok:
615
+ reasons.append("end")
616
+ logger.debug(
617
+ "[THETA][DEBUG][THETADATA-PANDAS] refreshing cache for %s/%s (%s); reasons=%s "
618
+ "(existing_start=%s requested_start=%s existing_end=%s end_requirement=%s existing_rows=%s needed_rows=%s)",
619
+ asset_separated,
620
+ quote_asset,
621
+ ts_unit,
622
+ ",".join(reasons) or "unknown",
623
+ existing_start,
624
+ requested_start,
625
+ existing_end,
626
+ end_requirement,
627
+ existing_rows,
628
+ requested_length,
629
+ )
630
+
631
+ # Check if we have data for this asset
632
+ if search_asset in self.pandas_data:
633
+ asset_data = self.pandas_data[search_asset]
634
+ asset_data_df = asset_data.df
635
+ data_start_datetime = asset_data_df.index[0]
636
+
637
+ # Get the timestep of the data
638
+ data_timestep = asset_data.timestep
639
+
640
+ # If the timestep is the same, we don't need to update the data
641
+ if data_timestep == ts_unit:
642
+ # Check if we have enough data (5 days is the buffer we subtracted from the start datetime)
643
+ if (data_start_datetime - start_datetime) < START_BUFFER:
644
+ return None
645
+
646
+ # Always try to get the lowest timestep possible because we can always resample
647
+ # If day is requested then make sure we at least have data that's less than a day
648
+ if ts_unit == "day":
649
+ if data_timestep == "minute":
650
+ # Check if we have enough data (5 days is the buffer we subtracted from the start datetime)
651
+ if (data_start_datetime - start_datetime) < START_BUFFER:
652
+ return None
653
+ else:
654
+ # We don't have enough data, so we need to get more (but in minutes)
655
+ ts_unit = "minute"
656
+ elif data_timestep == "hour":
657
+ # Check if we have enough data (5 days is the buffer we subtracted from the start datetime)
658
+ if (data_start_datetime - start_datetime) < START_BUFFER:
659
+ return None
660
+ else:
661
+ # We don't have enough data, so we need to get more (but in hours)
662
+ ts_unit = "hour"
663
+
664
+ # If hour is requested then make sure we at least have data that's less than an hour
665
+ if ts_unit == "hour":
666
+ if data_timestep == "minute":
667
+ # Check if we have enough data (5 days is the buffer we subtracted from the start datetime)
668
+ if (data_start_datetime - start_datetime) < START_BUFFER:
669
+ return None
670
+ else:
671
+ # We don't have enough data, so we need to get more (but in minutes)
672
+ ts_unit = "minute"
673
+
674
+ # Download data from ThetaData
675
+ # Get ohlc data from ThetaData
676
+ date_time_now = self.get_datetime()
677
+ logger.debug(
678
+ "[THETA][DEBUG][THETADATA-PANDAS] fetch asset=%s quote=%s length=%s timestep=%s start=%s end=%s",
679
+ asset_separated,
680
+ quote_asset,
681
+ length,
682
+ timestep,
683
+ start_datetime,
684
+ self.datetime_end,
685
+ )
686
+ df_ohlc = thetadata_helper.get_price_data(
687
+ self._username,
688
+ self._password,
689
+ asset_separated,
690
+ start_datetime,
691
+ self.datetime_end,
692
+ timespan=ts_unit,
693
+ quote_asset=quote_asset,
694
+ dt=date_time_now,
695
+ datastyle="ohlc",
696
+ include_after_hours=True # Default to True for extended hours data
697
+ )
698
+ if df_ohlc is None or df_ohlc.empty:
699
+ expired_reason = (
700
+ expiration_dt is not None
701
+ and end_requirement is not None
702
+ and expiration_dt == end_requirement
703
+ )
704
+ if expired_reason:
705
+ logger.debug(
706
+ "[THETA][DEBUG][THETADATA-PANDAS] No new OHLC rows for %s/%s (%s); option expired on %s. Keeping cached data.",
707
+ asset_separated,
708
+ quote_asset,
709
+ ts_unit,
710
+ asset_separated.expiration,
711
+ )
712
+ if existing_meta is not None:
713
+ existing_meta["expiration_notice"] = True
714
+ else:
715
+ logger.warning(f"No OHLC data returned for {asset_separated} / {quote_asset} ({ts_unit}); skipping cache update.")
716
+ cache_df = thetadata_helper.load_cache(
717
+ thetadata_helper.build_cache_filename(asset_separated, ts_unit, "ohlc")
718
+ )
719
+ if cache_df is not None and len(cache_df) > 0:
720
+ placeholder_data = Data(asset_separated, cache_df, timestep=ts_unit, quote=quote_asset)
721
+ placeholder_update = self._set_pandas_data_keys([placeholder_data])
722
+ if placeholder_update:
723
+ self.pandas_data.update(placeholder_update)
724
+ self._data_store.update(placeholder_update)
725
+ self._record_metadata(search_asset, placeholder_data.df, ts_unit, asset_separated)
726
+ logger.debug(
727
+ "[THETA][DEBUG][THETADATA-PANDAS] refreshed metadata from cache for %s/%s (%s) after empty fetch.",
728
+ asset_separated,
729
+ quote_asset,
730
+ ts_unit,
731
+ )
732
+ return None
733
+
734
+ df = df_ohlc
735
+
736
+ # Quote data (bid/ask) is only available for intraday data (minute, hour, second)
737
+ # For daily+ data, only use OHLC
738
+ if self._use_quote_data and ts_unit in ["minute", "hour", "second"]:
739
+ try:
740
+ df_quote = thetadata_helper.get_price_data(
741
+ self._username,
742
+ self._password,
743
+ asset_separated,
744
+ start_datetime,
745
+ self.datetime_end,
746
+ timespan=ts_unit,
747
+ quote_asset=quote_asset,
748
+ dt=date_time_now,
749
+ datastyle="quote",
750
+ include_after_hours=True # Default to True for extended hours data
751
+ )
752
+ except Exception as exc:
753
+ logger.exception(
754
+ "ThetaData quote download failed for %s / %s (%s)",
755
+ asset_separated,
756
+ quote_asset,
757
+ ts_unit,
758
+ )
759
+ raise
760
+
761
+ # If the quote dataframe is empty, continue with OHLC but log
762
+ if df_quote is None or df_quote.empty:
763
+ logger.warning(f"No QUOTE data returned for {asset_separated} / {quote_asset} ({ts_unit}); continuing without quotes.")
764
+ else:
765
+ # Combine the ohlc and quote data using outer join to preserve all data
766
+ # Use forward fill for missing quote values (ThetaData's recommended approach)
767
+ df = pd.concat([df_ohlc, df_quote], axis=1, join='outer')
768
+
769
+ # Forward fill missing quote values
770
+ quote_columns = ['bid', 'ask', 'bid_size', 'ask_size', 'bid_condition', 'ask_condition', 'bid_exchange', 'ask_exchange']
771
+ existing_quote_cols = [col for col in quote_columns if col in df.columns]
772
+ if existing_quote_cols:
773
+ df[existing_quote_cols] = df[existing_quote_cols].fillna(method='ffill')
774
+
775
+ # Log how much forward filling occurred
776
+ if 'bid' in df.columns and 'ask' in df.columns:
777
+ remaining_nulls = df[['bid', 'ask']].isna().sum().sum()
778
+ if remaining_nulls > 0:
779
+ logger.info(f"Forward-filled missing quote values for {asset_separated}. {remaining_nulls} nulls remain at start of data.")
780
+
781
+ if df is None or df.empty:
782
+ return None
783
+
784
+ data = Data(asset_separated, df, timestep=ts_unit, quote=quote_asset)
785
+ pandas_data_update = self._set_pandas_data_keys([data])
786
+ if pandas_data_update is not None:
787
+ # Add the keys to the self.pandas_data dictionary
788
+ self.pandas_data.update(pandas_data_update)
789
+ self._data_store.update(pandas_data_update)
790
+ self._record_metadata(search_asset, data.df, ts_unit, asset_separated)
791
+
792
+
793
+ def _pull_source_symbol_bars(
794
+ self,
795
+ asset,
796
+ length,
797
+ timestep=None,
798
+ timeshift=None,
799
+ quote=None,
800
+ exchange=None,
801
+ include_after_hours=True,
802
+ ):
803
+ dt = self.get_datetime()
804
+ requested_length = self.estimate_requested_length(length, timestep=timestep)
805
+ logger.debug(
806
+ "[THETA][DEBUG][THETADATA-PANDAS] request asset=%s quote=%s timestep=%s length=%s inferred_length=%s at %s",
807
+ asset,
808
+ quote,
809
+ timestep,
810
+ length,
811
+ requested_length,
812
+ dt,
813
+ )
814
+ self._update_pandas_data(asset, quote, requested_length, timestep, dt)
815
+ response = super()._pull_source_symbol_bars(
816
+ asset, length, timestep, timeshift, quote, exchange, include_after_hours
817
+ )
818
+ if response is None:
819
+ return None
820
+ effective_timestep = timestep or "minute"
821
+ if isinstance(response, pd.DataFrame) and effective_timestep == "day":
822
+ finalized = self._finalize_day_frame(response, dt, requested_length, timeshift, asset=asset)
823
+ if finalized is None or finalized.empty:
824
+ return None
825
+ return finalized
826
+ return response
827
+
828
+ # Get pricing data for an asset for the entire backtesting period
829
+ def get_historical_prices_between_dates(
830
+ self,
831
+ asset,
832
+ timestep="minute",
833
+ quote=None,
834
+ exchange=None,
835
+ include_after_hours=True,
836
+ start_date=None,
837
+ end_date=None,
838
+ ):
839
+ inferred_length = self.estimate_requested_length(
840
+ None, start_date=start_date, end_date=end_date, timestep=timestep
841
+ )
842
+ self._update_pandas_data(asset, quote, inferred_length, timestep, end_date)
843
+
844
+ response = super()._pull_source_symbol_bars_between_dates(
845
+ asset, timestep, quote, exchange, include_after_hours, start_date, end_date
846
+ )
847
+
848
+ if response is None:
849
+ return None
850
+
851
+ bars = self._parse_source_symbol_bars(response, asset, quote=quote)
852
+ final_df = getattr(bars, "df", None)
853
+ final_rows = len(final_df) if final_df is not None else 0
854
+ logger.debug(
855
+ "[THETA][DEBUG][FETCH][THETA][DEBUG][PANDAS][FINAL] asset=%s quote=%s length=%s timestep=%s timeshift=%s current_dt=%s rows=%s",
856
+ getattr(asset, "symbol", asset) if not isinstance(asset, str) else asset,
857
+ getattr(quote, "symbol", quote),
858
+ length,
859
+ timestep,
860
+ timeshift,
861
+ current_dt,
862
+ final_rows,
863
+ )
864
+ return bars
865
+
866
+ def get_last_price(self, asset, timestep="minute", quote=None, exchange=None, **kwargs) -> Union[float, Decimal, None]:
867
+ sample_length = 5
868
+ dt = self.get_datetime()
869
+ self._update_pandas_data(asset, quote, sample_length, timestep, dt)
870
+ _, ts_unit = self.get_start_datetime_and_ts_unit(
871
+ sample_length, timestep, dt, start_buffer=START_BUFFER
872
+ )
873
+ source = None
874
+ tuple_key = self.find_asset_in_data_store(asset, quote, ts_unit)
875
+ legacy_hit = False
876
+ frame_last_dt = None
877
+ frame_last_close = None
878
+ if tuple_key is not None:
879
+ data = self.pandas_data.get(tuple_key)
880
+ if data is None and isinstance(tuple_key, tuple) and len(tuple_key) == 3:
881
+ legacy_tuple_key = (tuple_key[0], tuple_key[1])
882
+ data = self.pandas_data.get(legacy_tuple_key)
883
+ if data is not None:
884
+ legacy_hit = True
885
+ elif isinstance(tuple_key, tuple) and len(tuple_key) != 3:
886
+ legacy_hit = True
887
+ if data is not None and hasattr(data, "df"):
888
+ close_series = data.df.get("close")
889
+ if close_series is None:
890
+ return super().get_last_price(asset=asset, quote=quote, exchange=exchange)
891
+ closes = close_series.dropna()
892
+ if closes.empty:
893
+ logger.debug(
894
+ "[THETA][DEBUG][THETADATA-PANDAS] get_last_price found no valid closes for %s/%s; returning None (likely expired).",
895
+ asset,
896
+ quote or Asset("USD", "forex"),
897
+ )
898
+ return None
899
+ closes = closes.tail(sample_length)
900
+ source = "pandas_dataset"
901
+ if len(closes):
902
+ frame_last_dt = closes.index[-1]
903
+ frame_last_close = closes.iloc[-1]
904
+ try:
905
+ frame_last_dt = frame_last_dt.isoformat()
906
+ except AttributeError:
907
+ frame_last_dt = str(frame_last_dt)
908
+ value = super().get_last_price(asset=asset, quote=quote, exchange=exchange)
909
+ logger.debug(
910
+ "[THETA][DEBUG][THETADATA-PANDAS] get_last_price resolved via %s for %s/%s (close=%s)",
911
+ source or "super",
912
+ asset,
913
+ quote or Asset("USD", "forex"),
914
+ value,
915
+ )
916
+ _parity_log(
917
+ "[THETA][DEBUG][PARITY][LAST_PRICE][THETA][DEBUG][PANDAS] asset=%s quote=%s dt=%s value=%s source=%s tuple_key=%s legacy_key_used=%s ts_unit=%s frame_last_dt=%s frame_last_close=%s",
918
+ getattr(asset, "symbol", asset),
919
+ getattr(quote, "symbol", quote) if quote else "USD",
920
+ dt.isoformat() if hasattr(dt, "isoformat") else str(dt),
921
+ value,
922
+ source or "super",
923
+ tuple_key,
924
+ legacy_hit,
925
+ ts_unit,
926
+ frame_last_dt,
927
+ float(frame_last_close) if frame_last_close is not None else None,
928
+ )
929
+ return value
930
+
931
+ def get_historical_prices(
932
+ self,
933
+ asset: Asset | str,
934
+ length: int,
935
+ timestep: str = "minute",
936
+ timeshift: int | timedelta | None = None,
937
+ quote: Optional[Asset] = None,
938
+ exchange: Optional[str] = None,
939
+ include_after_hours: bool = True,
940
+ return_polars: bool = False,
941
+ ):
942
+ if return_polars:
943
+ raise ValueError("ThetaData backtesting currently supports pandas output only.")
944
+
945
+ current_dt = self.get_datetime()
946
+ bars = super().get_historical_prices(
947
+ asset=asset,
948
+ length=length,
949
+ timestep=timestep,
950
+ timeshift=timeshift,
951
+ quote=quote,
952
+ exchange=exchange,
953
+ include_after_hours=include_after_hours,
954
+ return_polars=False,
955
+ )
956
+ if bars is None or getattr(bars, "df", None) is None or bars.df.empty:
957
+ logger.debug(
958
+ "[THETA][DEBUG][FETCH][THETA][DEBUG][PANDAS] asset=%s quote=%s length=%s timestep=%s timeshift=%s current_dt=%s "
959
+ "rows=0 first_ts=None last_ts=None columns=None",
960
+ getattr(asset, "symbol", asset) if not isinstance(asset, str) else asset,
961
+ getattr(quote, "symbol", quote),
962
+ length,
963
+ timestep,
964
+ timeshift,
965
+ current_dt,
966
+ )
967
+ return bars
968
+
969
+ df = bars.df
970
+ rows = len(df)
971
+ columns = list(df.columns)
972
+ if "datetime" in df.columns:
973
+ first_ts = df["datetime"].iloc[0]
974
+ last_ts = df["datetime"].iloc[-1]
975
+ else:
976
+ first_ts = df.index[0]
977
+ last_ts = df.index[-1]
978
+
979
+ logger.debug(
980
+ "[THETA][DEBUG][FETCH][THETA][DEBUG][PANDAS] asset=%s quote=%s length=%s timestep=%s timeshift=%s current_dt=%s rows=%s "
981
+ "first_ts=%s last_ts=%s columns=%s",
982
+ getattr(asset, "symbol", asset) if not isinstance(asset, str) else asset,
983
+ getattr(quote, "symbol", quote),
984
+ length,
985
+ timestep,
986
+ timeshift,
987
+ current_dt,
988
+ rows,
989
+ first_ts,
990
+ last_ts,
991
+ columns,
992
+ )
993
+ return bars
994
+
995
+ def get_quote(self, asset, timestep="minute", quote=None, exchange=None, **kwargs):
996
+ """
997
+ Get quote data for an asset during backtesting.
998
+
999
+ Parameters
1000
+ ----------
1001
+ asset : Asset object
1002
+ The asset for which the quote is needed.
1003
+ timestep : str, optional
1004
+ The timestep to use for the data.
1005
+ quote : Asset object, optional
1006
+ The quote asset for cryptocurrency pairs.
1007
+ exchange : str, optional
1008
+ The exchange to get the quote from.
1009
+ **kwargs : dict
1010
+ Additional keyword arguments.
1011
+
1012
+ Returns
1013
+ -------
1014
+ Quote
1015
+ A Quote object with the quote information.
1016
+ """
1017
+ dt = self.get_datetime()
1018
+
1019
+ # [INSTRUMENTATION] Log full asset details for options
1020
+ if hasattr(asset, 'asset_type') and asset.asset_type == Asset.AssetType.OPTION:
1021
+ logger.debug(
1022
+ "[THETA][DEBUG][QUOTE][THETA][DEBUG][PANDAS][OPTION_REQUEST] symbol=%s expiration=%s strike=%s right=%s current_dt=%s timestep=%s",
1023
+ asset.symbol,
1024
+ asset.expiration,
1025
+ asset.strike,
1026
+ asset.right,
1027
+ dt.isoformat() if hasattr(dt, 'isoformat') else dt,
1028
+ timestep
1029
+ )
1030
+ else:
1031
+ logger.debug(
1032
+ "[THETA][DEBUG][QUOTE][THETA][DEBUG][PANDAS][REQUEST] asset=%s current_dt=%s timestep=%s",
1033
+ getattr(asset, "symbol", asset) if not isinstance(asset, str) else asset,
1034
+ dt.isoformat() if hasattr(dt, 'isoformat') else dt,
1035
+ timestep
1036
+ )
1037
+
1038
+ self._update_pandas_data(asset, quote, 1, timestep, dt)
1039
+
1040
+ # [INSTRUMENTATION] Capture in-memory dataframe state after _update_pandas_data
1041
+ debug_enabled = True
1042
+
1043
+ search_asset = (asset, quote if quote else Asset("USD", "forex"))
1044
+ data_obj = self.pandas_data.get(search_asset)
1045
+ if data_obj is not None and hasattr(data_obj, 'df'):
1046
+ df = data_obj.df
1047
+ if df is not None and len(df) > 0:
1048
+ # Get first and last 5 rows
1049
+ head_df = df.head(5)
1050
+ tail_df = df.tail(5)
1051
+
1052
+ # Format columns to show
1053
+ cols_to_show = ['bid', 'ask', 'mid_price', 'close'] if hasattr(asset, 'asset_type') and asset.asset_type == Asset.AssetType.OPTION else ['close']
1054
+ available_cols = [col for col in cols_to_show if col in df.columns]
1055
+
1056
+ # Get timezone info
1057
+ tz_info = "NO_TZ"
1058
+ if isinstance(df.index, pd.DatetimeIndex) and df.index.tz is not None:
1059
+ tz_info = str(df.index.tz)
1060
+
1061
+ logger.debug(
1062
+ "[THETA][DEBUG][QUOTE][THETA][DEBUG][PANDAS][DATAFRAME_STATE] asset=%s | total_rows=%d | timestep=%s | index_type=%s | timezone=%s",
1063
+ getattr(asset, "symbol", asset),
1064
+ len(df),
1065
+ data_obj.timestep,
1066
+ type(df.index).__name__,
1067
+ tz_info
1068
+ )
1069
+
1070
+ # Log datetime range with timezone
1071
+ if isinstance(df.index, pd.DatetimeIndex):
1072
+ first_dt_str = df.index[0].isoformat() if hasattr(df.index[0], 'isoformat') else str(df.index[0])
1073
+ last_dt_str = df.index[-1].isoformat() if hasattr(df.index[-1], 'isoformat') else str(df.index[-1])
1074
+ logger.debug(
1075
+ "[THETA][DEBUG][QUOTE][THETA][DEBUG][PANDAS][DATETIME_RANGE] asset=%s | first_dt=%s | last_dt=%s | tz=%s",
1076
+ getattr(asset, "symbol", asset),
1077
+ first_dt_str,
1078
+ last_dt_str,
1079
+ tz_info
1080
+ )
1081
+
1082
+ # CRITICAL: Show tail with explicit datetime index to catch time-travel bug
1083
+ if debug_enabled and len(available_cols) > 0:
1084
+ logger.debug(
1085
+ "[THETA][DEBUG][QUOTE][THETA][DEBUG][PANDAS][DATAFRAME_HEAD] asset=%s | first_5_rows (with datetime index):\n%s",
1086
+ getattr(asset, "symbol", asset),
1087
+ head_df[available_cols].to_string()
1088
+ )
1089
+ logger.debug(
1090
+ "[THETA][DEBUG][QUOTE][THETA][DEBUG][PANDAS][DATAFRAME_TAIL] asset=%s | last_5_rows (with datetime index):\n%s",
1091
+ getattr(asset, "symbol", asset),
1092
+ tail_df[available_cols].to_string()
1093
+ )
1094
+
1095
+ # Show tail datetime values explicitly
1096
+ tail_datetimes = [dt.isoformat() if hasattr(dt, 'isoformat') else str(dt) for dt in tail_df.index]
1097
+ logger.debug(
1098
+ "[THETA][DEBUG][QUOTE][THETA][DEBUG][PANDAS][TAIL_DATETIMES] asset=%s | tail_index=%s",
1099
+ getattr(asset, "symbol", asset),
1100
+ tail_datetimes
1101
+ )
1102
+ else:
1103
+ logger.debug(
1104
+ "[THETA][DEBUG][QUOTE][THETA][DEBUG][PANDAS][DATAFRAME_STATE] asset=%s | EMPTY_DATAFRAME",
1105
+ getattr(asset, "symbol", asset)
1106
+ )
1107
+ else:
1108
+ logger.debug(
1109
+ "[THETA][DEBUG][QUOTE][THETA][DEBUG][PANDAS][DATAFRAME_STATE] asset=%s | NO_DATA_FOUND_IN_STORE",
1110
+ getattr(asset, "symbol", asset)
1111
+ )
1112
+
1113
+ quote_obj = super().get_quote(asset=asset, quote=quote, exchange=exchange)
1114
+
1115
+ # [INSTRUMENTATION] Final quote result with all details
1116
+ logger.debug(
1117
+ "[THETA][DEBUG][QUOTE][THETA][DEBUG][PANDAS][RESULT] asset=%s quote=%s current_dt=%s bid=%s ask=%s mid=%s last=%s source=%s",
1118
+ getattr(asset, "symbol", asset) if not isinstance(asset, str) else asset,
1119
+ getattr(quote, "symbol", quote),
1120
+ dt,
1121
+ getattr(quote_obj, "bid", None) if quote_obj else None,
1122
+ getattr(quote_obj, "ask", None) if quote_obj else None,
1123
+ getattr(quote_obj, "mid_price", None) if quote_obj else None,
1124
+ getattr(quote_obj, "last_price", None) if quote_obj else None,
1125
+ getattr(quote_obj, "source", None) if quote_obj else None,
1126
+ )
1127
+ return quote_obj
1128
+
1129
+ def get_chains(self, asset):
1130
+ """
1131
+ Get option chains using cached implementation (matches Polygon pattern).
1132
+
1133
+ Parameters
1134
+ ----------
1135
+ asset : Asset
1136
+ The asset to get data for.
1137
+
1138
+ Returns
1139
+ -------
1140
+ Chains:
1141
+ A Chains entity object (dict subclass) with the structure:
1142
+ {
1143
+ "Multiplier": 100,
1144
+ "Exchange": "SMART",
1145
+ "Chains": {
1146
+ "CALL": {
1147
+ "2023-07-31": [100.0, 101.0, ...],
1148
+ ...
1149
+ },
1150
+ "PUT": {
1151
+ "2023-07-31": [100.0, 101.0, ...],
1152
+ ...
1153
+ }
1154
+ }
1155
+ }
1156
+ """
1157
+ from lumibot.entities import Chains
1158
+
1159
+ chains_dict = thetadata_helper.get_chains_cached(
1160
+ username=self._username,
1161
+ password=self._password,
1162
+ asset=asset,
1163
+ current_date=self.get_datetime().date()
1164
+ )
1165
+
1166
+ # Wrap in Chains entity for modern API
1167
+ return Chains(chains_dict)