lumibot 4.0.23__py3-none-any.whl → 4.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lumibot might be problematic. Click here for more details.

Files changed (160) hide show
  1. lumibot/__pycache__/__init__.cpython-312.pyc +0 -0
  2. lumibot/__pycache__/constants.cpython-312.pyc +0 -0
  3. lumibot/__pycache__/credentials.cpython-312.pyc +0 -0
  4. lumibot/backtesting/__init__.py +6 -5
  5. lumibot/backtesting/__pycache__/__init__.cpython-312.pyc +0 -0
  6. lumibot/backtesting/__pycache__/alpaca_backtesting.cpython-312.pyc +0 -0
  7. lumibot/backtesting/__pycache__/alpha_vantage_backtesting.cpython-312.pyc +0 -0
  8. lumibot/backtesting/__pycache__/backtesting_broker.cpython-312.pyc +0 -0
  9. lumibot/backtesting/__pycache__/ccxt_backtesting.cpython-312.pyc +0 -0
  10. lumibot/backtesting/__pycache__/databento_backtesting.cpython-312.pyc +0 -0
  11. lumibot/backtesting/__pycache__/interactive_brokers_rest_backtesting.cpython-312.pyc +0 -0
  12. lumibot/backtesting/__pycache__/pandas_backtesting.cpython-312.pyc +0 -0
  13. lumibot/backtesting/__pycache__/polygon_backtesting.cpython-312.pyc +0 -0
  14. lumibot/backtesting/__pycache__/thetadata_backtesting.cpython-312.pyc +0 -0
  15. lumibot/backtesting/__pycache__/yahoo_backtesting.cpython-312.pyc +0 -0
  16. lumibot/backtesting/backtesting_broker.py +209 -9
  17. lumibot/backtesting/databento_backtesting.py +141 -24
  18. lumibot/backtesting/thetadata_backtesting.py +63 -42
  19. lumibot/brokers/__pycache__/__init__.cpython-312.pyc +0 -0
  20. lumibot/brokers/__pycache__/alpaca.cpython-312.pyc +0 -0
  21. lumibot/brokers/__pycache__/bitunix.cpython-312.pyc +0 -0
  22. lumibot/brokers/__pycache__/broker.cpython-312.pyc +0 -0
  23. lumibot/brokers/__pycache__/ccxt.cpython-312.pyc +0 -0
  24. lumibot/brokers/__pycache__/example_broker.cpython-312.pyc +0 -0
  25. lumibot/brokers/__pycache__/interactive_brokers.cpython-312.pyc +0 -0
  26. lumibot/brokers/__pycache__/interactive_brokers_rest.cpython-312.pyc +0 -0
  27. lumibot/brokers/__pycache__/projectx.cpython-312.pyc +0 -0
  28. lumibot/brokers/__pycache__/schwab.cpython-312.pyc +0 -0
  29. lumibot/brokers/__pycache__/tradier.cpython-312.pyc +0 -0
  30. lumibot/brokers/__pycache__/tradovate.cpython-312.pyc +0 -0
  31. lumibot/brokers/alpaca.py +11 -1
  32. lumibot/brokers/tradeovate.py +475 -0
  33. lumibot/components/grok_news_helper.py +284 -0
  34. lumibot/components/options_helper.py +90 -34
  35. lumibot/credentials.py +3 -0
  36. lumibot/data_sources/__pycache__/__init__.cpython-312.pyc +0 -0
  37. lumibot/data_sources/__pycache__/alpaca_data.cpython-312.pyc +0 -0
  38. lumibot/data_sources/__pycache__/alpha_vantage_data.cpython-312.pyc +0 -0
  39. lumibot/data_sources/__pycache__/bitunix_data.cpython-312.pyc +0 -0
  40. lumibot/data_sources/__pycache__/ccxt_backtesting_data.cpython-312.pyc +0 -0
  41. lumibot/data_sources/__pycache__/ccxt_data.cpython-312.pyc +0 -0
  42. lumibot/data_sources/__pycache__/data_source.cpython-312.pyc +0 -0
  43. lumibot/data_sources/__pycache__/data_source_backtesting.cpython-312.pyc +0 -0
  44. lumibot/data_sources/__pycache__/databento_data_polars_backtesting.cpython-312.pyc +0 -0
  45. lumibot/data_sources/__pycache__/databento_data_polars_live.cpython-312.pyc +0 -0
  46. lumibot/data_sources/__pycache__/example_broker_data.cpython-312.pyc +0 -0
  47. lumibot/data_sources/__pycache__/exceptions.cpython-312.pyc +0 -0
  48. lumibot/data_sources/__pycache__/interactive_brokers_data.cpython-312.pyc +0 -0
  49. lumibot/data_sources/__pycache__/interactive_brokers_rest_data.cpython-312.pyc +0 -0
  50. lumibot/data_sources/__pycache__/pandas_data.cpython-312.pyc +0 -0
  51. lumibot/data_sources/__pycache__/polars_mixin.cpython-312.pyc +0 -0
  52. lumibot/data_sources/__pycache__/polygon_data_polars.cpython-312.pyc +0 -0
  53. lumibot/data_sources/__pycache__/projectx_data.cpython-312.pyc +0 -0
  54. lumibot/data_sources/__pycache__/schwab_data.cpython-312.pyc +0 -0
  55. lumibot/data_sources/__pycache__/tradier_data.cpython-312.pyc +0 -0
  56. lumibot/data_sources/__pycache__/tradovate_data.cpython-312.pyc +0 -0
  57. lumibot/data_sources/__pycache__/yahoo_data_polars.cpython-312.pyc +0 -0
  58. lumibot/data_sources/data_source_backtesting.py +3 -5
  59. lumibot/data_sources/databento_data_polars_backtesting.py +194 -48
  60. lumibot/data_sources/pandas_data.py +6 -3
  61. lumibot/data_sources/polars_mixin.py +126 -21
  62. lumibot/data_sources/tradeovate_data.py +80 -0
  63. lumibot/data_sources/tradier_data.py +2 -1
  64. lumibot/entities/__pycache__/__init__.cpython-312.pyc +0 -0
  65. lumibot/entities/__pycache__/asset.cpython-312.pyc +0 -0
  66. lumibot/entities/__pycache__/bar.cpython-312.pyc +0 -0
  67. lumibot/entities/__pycache__/bars.cpython-312.pyc +0 -0
  68. lumibot/entities/__pycache__/chains.cpython-312.pyc +0 -0
  69. lumibot/entities/__pycache__/data.cpython-312.pyc +0 -0
  70. lumibot/entities/__pycache__/dataline.cpython-312.pyc +0 -0
  71. lumibot/entities/__pycache__/order.cpython-312.pyc +0 -0
  72. lumibot/entities/__pycache__/position.cpython-312.pyc +0 -0
  73. lumibot/entities/__pycache__/quote.cpython-312.pyc +0 -0
  74. lumibot/entities/__pycache__/trading_fee.cpython-312.pyc +0 -0
  75. lumibot/entities/asset.py +8 -0
  76. lumibot/entities/order.py +1 -1
  77. lumibot/entities/quote.py +14 -0
  78. lumibot/example_strategies/__pycache__/__init__.cpython-312.pyc +0 -0
  79. lumibot/example_strategies/__pycache__/test_broker_functions.cpython-312-pytest-8.4.1.pyc +0 -0
  80. lumibot/strategies/__pycache__/__init__.cpython-312.pyc +0 -0
  81. lumibot/strategies/__pycache__/_strategy.cpython-312.pyc +0 -0
  82. lumibot/strategies/__pycache__/strategy.cpython-312.pyc +0 -0
  83. lumibot/strategies/__pycache__/strategy_executor.cpython-312.pyc +0 -0
  84. lumibot/strategies/_strategy.py +95 -27
  85. lumibot/strategies/strategy.py +5 -6
  86. lumibot/strategies/strategy_executor.py +2 -2
  87. lumibot/tools/__pycache__/__init__.cpython-312.pyc +0 -0
  88. lumibot/tools/__pycache__/alpaca_helpers.cpython-312.pyc +0 -0
  89. lumibot/tools/__pycache__/bitunix_helpers.cpython-312.pyc +0 -0
  90. lumibot/tools/__pycache__/black_scholes.cpython-312.pyc +0 -0
  91. lumibot/tools/__pycache__/ccxt_data_store.cpython-312.pyc +0 -0
  92. lumibot/tools/__pycache__/databento_helper.cpython-312.pyc +0 -0
  93. lumibot/tools/__pycache__/databento_helper_polars.cpython-312.pyc +0 -0
  94. lumibot/tools/__pycache__/debugers.cpython-312.pyc +0 -0
  95. lumibot/tools/__pycache__/decorators.cpython-312.pyc +0 -0
  96. lumibot/tools/__pycache__/helpers.cpython-312.pyc +0 -0
  97. lumibot/tools/__pycache__/indicators.cpython-312.pyc +0 -0
  98. lumibot/tools/__pycache__/lumibot_logger.cpython-312.pyc +0 -0
  99. lumibot/tools/__pycache__/pandas.cpython-312.pyc +0 -0
  100. lumibot/tools/__pycache__/polygon_helper.cpython-312.pyc +0 -0
  101. lumibot/tools/__pycache__/polygon_helper_async.cpython-312.pyc +0 -0
  102. lumibot/tools/__pycache__/polygon_helper_polars_optimized.cpython-312.pyc +0 -0
  103. lumibot/tools/__pycache__/projectx_helpers.cpython-312.pyc +0 -0
  104. lumibot/tools/__pycache__/schwab_helper.cpython-312.pyc +0 -0
  105. lumibot/tools/__pycache__/thetadata_helper.cpython-312.pyc +0 -0
  106. lumibot/tools/__pycache__/types.cpython-312.pyc +0 -0
  107. lumibot/tools/__pycache__/yahoo_helper.cpython-312.pyc +0 -0
  108. lumibot/tools/__pycache__/yahoo_helper_polars_optimized.cpython-312.pyc +0 -0
  109. lumibot/tools/databento_helper.py +384 -133
  110. lumibot/tools/databento_helper_polars.py +218 -156
  111. lumibot/tools/databento_roll.py +216 -0
  112. lumibot/tools/lumibot_logger.py +32 -17
  113. lumibot/tools/polygon_helper.py +65 -0
  114. lumibot/tools/thetadata_helper.py +588 -70
  115. lumibot/traders/__pycache__/__init__.cpython-312.pyc +0 -0
  116. lumibot/traders/__pycache__/trader.cpython-312.pyc +0 -0
  117. lumibot/traders/trader.py +1 -1
  118. lumibot/trading_builtins/__pycache__/__init__.cpython-312.pyc +0 -0
  119. lumibot/trading_builtins/__pycache__/custom_stream.cpython-312.pyc +0 -0
  120. lumibot/trading_builtins/__pycache__/safe_list.cpython-312.pyc +0 -0
  121. {lumibot-4.0.23.dist-info → lumibot-4.1.0.dist-info}/METADATA +1 -2
  122. {lumibot-4.0.23.dist-info → lumibot-4.1.0.dist-info}/RECORD +160 -44
  123. tests/backtest/check_timing_offset.py +198 -0
  124. tests/backtest/check_volume_spike.py +112 -0
  125. tests/backtest/comprehensive_comparison.py +166 -0
  126. tests/backtest/debug_comparison.py +91 -0
  127. tests/backtest/diagnose_price_difference.py +97 -0
  128. tests/backtest/direct_api_comparison.py +203 -0
  129. tests/backtest/profile_thetadata_vs_polygon.py +255 -0
  130. tests/backtest/root_cause_analysis.py +109 -0
  131. tests/backtest/test_accuracy_verification.py +244 -0
  132. tests/backtest/test_daily_data_timestamp_comparison.py +801 -0
  133. tests/backtest/test_databento.py +4 -0
  134. tests/backtest/test_databento_comprehensive_trading.py +564 -0
  135. tests/backtest/test_debug_avg_fill_price.py +112 -0
  136. tests/backtest/test_dividends.py +8 -3
  137. tests/backtest/test_example_strategies.py +54 -47
  138. tests/backtest/test_futures_edge_cases.py +451 -0
  139. tests/backtest/test_futures_single_trade.py +270 -0
  140. tests/backtest/test_futures_ultra_simple.py +191 -0
  141. tests/backtest/test_index_data_verification.py +348 -0
  142. tests/backtest/test_polygon.py +45 -24
  143. tests/backtest/test_thetadata.py +246 -60
  144. tests/backtest/test_thetadata_comprehensive.py +729 -0
  145. tests/backtest/test_thetadata_vs_polygon.py +557 -0
  146. tests/backtest/test_yahoo.py +1 -2
  147. tests/conftest.py +20 -0
  148. tests/test_backtesting_data_source_env.py +249 -0
  149. tests/test_backtesting_quiet_logs_complete.py +10 -11
  150. tests/test_databento_helper.py +73 -86
  151. tests/test_databento_timezone_fixes.py +21 -4
  152. tests/test_get_historical_prices.py +6 -6
  153. tests/test_options_helper.py +162 -40
  154. tests/test_polygon_helper.py +21 -13
  155. tests/test_quiet_logs_requirements.py +5 -5
  156. tests/test_thetadata_helper.py +487 -171
  157. tests/test_yahoo_data.py +125 -0
  158. {lumibot-4.0.23.dist-info → lumibot-4.1.0.dist-info}/LICENSE +0 -0
  159. {lumibot-4.0.23.dist-info → lumibot-4.1.0.dist-info}/WHEEL +0 -0
  160. {lumibot-4.0.23.dist-info → lumibot-4.1.0.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,6 @@
1
1
  # This file contains helper functions for getting data from Polygon.io
2
2
  import time
3
+ import os
3
4
  from datetime import date, datetime, timedelta
4
5
  from pathlib import Path
5
6
  import pytz
@@ -9,7 +10,6 @@ import requests
9
10
  from lumibot import LUMIBOT_CACHE_FOLDER, LUMIBOT_DEFAULT_PYTZ
10
11
  from lumibot.tools.lumibot_logger import get_logger
11
12
  from lumibot.entities import Asset
12
- from thetadata import ThetaClient
13
13
  from tqdm import tqdm
14
14
 
15
15
  logger = get_logger(__name__)
@@ -19,6 +19,10 @@ MAX_DAYS = 30
19
19
  CACHE_SUBFOLDER = "thetadata"
20
20
  BASE_URL = "http://127.0.0.1:25510"
21
21
 
22
+ # Global process tracking for ThetaTerminal
23
+ THETA_DATA_PROCESS = None
24
+ THETA_DATA_PID = None
25
+
22
26
 
23
27
  def get_price_data(
24
28
  username: str,
@@ -29,7 +33,8 @@ def get_price_data(
29
33
  timespan: str = "minute",
30
34
  quote_asset: Asset = None,
31
35
  dt=None,
32
- datastyle: str = "ohlc"
36
+ datastyle: str = "ohlc",
37
+ include_after_hours: bool = True
33
38
  ):
34
39
  """
35
40
  Queries ThetaData for pricing data for the given asset and returns a DataFrame with the data. Data will be
@@ -53,6 +58,10 @@ def get_price_data(
53
58
  "month", "quarter"
54
59
  quote_asset : Asset
55
60
  The quote asset for the asset we are getting data for. This is only needed for Forex assets.
61
+ datastyle : str
62
+ The style of data to retrieve ("ohlc" or "quote")
63
+ include_after_hours : bool
64
+ Whether to include after-hours trading data (default True)
56
65
 
57
66
  Returns
58
67
  -------
@@ -60,6 +69,7 @@ def get_price_data(
60
69
  A DataFrame with the pricing data for the asset
61
70
 
62
71
  """
72
+ import pytz # Import at function level to avoid scope issues in nested calls
63
73
 
64
74
  # Check if we already have data for this asset in the cache file
65
75
  df_all = None
@@ -74,6 +84,37 @@ def get_price_data(
74
84
  # Check if we need to get more data
75
85
  missing_dates = get_missing_dates(df_all, asset, start, end)
76
86
  if not missing_dates:
87
+ # Filter cached data to requested date range before returning
88
+ if df_all is not None and not df_all.empty:
89
+ # For daily data, use date-based filtering (timestamps vary by provider)
90
+ # For intraday data, use precise datetime filtering
91
+ if timespan == "day":
92
+ # Convert index to dates for comparison
93
+ import pandas as pd
94
+ df_dates = pd.to_datetime(df_all.index).date
95
+ start_date = start.date() if hasattr(start, 'date') else start
96
+ end_date = end.date() if hasattr(end, 'date') else end
97
+ mask = (df_dates >= start_date) & (df_dates <= end_date)
98
+ df_all = df_all[mask]
99
+ else:
100
+ # Intraday: use precise datetime filtering
101
+ import datetime as dt
102
+ # Convert date to datetime if needed
103
+ if isinstance(start, dt.date) and not isinstance(start, dt.datetime):
104
+ start = dt.datetime.combine(start, dt.time.min)
105
+ if isinstance(end, dt.date) and not isinstance(end, dt.datetime):
106
+ end = dt.datetime.combine(end, dt.time.max)
107
+
108
+ # Handle datetime objects with midnight time (users often pass datetime(YYYY, MM, DD))
109
+ if isinstance(end, dt.datetime) and end.time() == dt.time.min:
110
+ # Convert end-of-period midnight to end-of-day
111
+ end = dt.datetime.combine(end.date(), dt.time.max)
112
+
113
+ if start.tzinfo is None:
114
+ start = LUMIBOT_DEFAULT_PYTZ.localize(start).astimezone(pytz.UTC)
115
+ if end.tzinfo is None:
116
+ end = LUMIBOT_DEFAULT_PYTZ.localize(end).astimezone(pytz.UTC)
117
+ df_all = df_all[(df_all.index >= start) & (df_all.index <= end)]
77
118
  return df_all
78
119
 
79
120
  start = missing_dates[0] # Data will start at 8am UTC (4am EST)
@@ -88,19 +129,43 @@ def get_price_data(
88
129
 
89
130
  delta = timedelta(days=MAX_DAYS)
90
131
 
91
- interval_ms = None
92
- # Calculate the interval in milliseconds
93
- if timespan == "second":
94
- interval_ms = 1000
95
- elif timespan == "minute":
96
- interval_ms = 60000
97
- elif timespan == "hour":
98
- interval_ms = 3600000
99
- elif timespan == "day":
100
- interval_ms = 86400000
101
- else:
102
- interval_ms = 60000
103
- logger.warning(f"Unsupported timespan: {timespan}, using default of 1 minute")
132
+ # For daily bars, use ThetaData's EOD endpoint for official daily OHLC
133
+ # The EOD endpoint includes the 16:00 closing auction and follows SIP sale-condition rules
134
+ # This matches Polygon and Yahoo Finance EXACTLY (zero tolerance)
135
+ if timespan == "day":
136
+ logger.info(f"Daily bars: using EOD endpoint for official close prices")
137
+
138
+ # Use EOD endpoint for official daily OHLC
139
+ result_df = get_historical_eod_data(
140
+ asset=asset,
141
+ start_dt=start,
142
+ end_dt=end,
143
+ username=username,
144
+ password=password,
145
+ datastyle=datastyle
146
+ )
147
+
148
+ return result_df
149
+
150
+ # Map timespan to milliseconds for intraday intervals
151
+ TIMESPAN_TO_MS = {
152
+ "second": 1000,
153
+ "minute": 60000,
154
+ "5minute": 300000,
155
+ "10minute": 600000,
156
+ "15minute": 900000,
157
+ "30minute": 1800000,
158
+ "hour": 3600000,
159
+ "2hour": 7200000,
160
+ "4hour": 14400000,
161
+ }
162
+
163
+ interval_ms = TIMESPAN_TO_MS.get(timespan)
164
+ if interval_ms is None:
165
+ raise ValueError(
166
+ f"Unsupported timespan '{timespan}'. "
167
+ f"Supported values: {list(TIMESPAN_TO_MS.keys())} or 'day'"
168
+ )
104
169
 
105
170
  while start <= missing_dates[-1]:
106
171
  # If we don't have a paid subscription, we need to wait 1 minute between requests because of
@@ -109,7 +174,7 @@ def get_price_data(
109
174
  if end > start + delta:
110
175
  end = start + delta
111
176
 
112
- result_df = get_historical_data(asset, start, end, interval_ms, username, password, datastyle=datastyle)
177
+ result_df = get_historical_data(asset, start, end, interval_ms, username, password, datastyle=datastyle, include_after_hours=include_after_hours)
113
178
 
114
179
  if result_df is None or len(result_df) == 0:
115
180
  logger.warning(
@@ -155,8 +220,8 @@ def get_trading_dates(asset: Asset, start: datetime, end: datetime):
155
220
  # Crypto trades every day, 24/7 so we don't need to check the calendar
156
221
  return [start.date() + timedelta(days=x) for x in range((end.date() - start.date()).days + 1)]
157
222
 
158
- # Stock/Option Asset for Backtesting - Assuming NYSE trading days
159
- elif asset.asset_type == "stock" or asset.asset_type == "option":
223
+ # Stock/Option/Index Asset for Backtesting - Assuming NYSE trading days
224
+ elif asset.asset_type == "stock" or asset.asset_type == "option" or asset.asset_type == "index":
160
225
  cal = mcal.get_calendar("NYSE")
161
226
 
162
227
  # Forex Asset for Backtesting - Forex trades weekdays, 24hrs starting Sunday 5pm EST
@@ -168,7 +233,9 @@ def get_trading_dates(asset: Asset, start: datetime, end: datetime):
168
233
  raise ValueError(f"Unsupported asset type for thetadata: {asset.asset_type}")
169
234
 
170
235
  # Get the trading days between the start and end dates
171
- df = cal.schedule(start_date=start.date(), end_date=end.date())
236
+ start_date = start.date() if hasattr(start, 'date') else start
237
+ end_date = end.date() if hasattr(end, 'date') else end
238
+ df = cal.schedule(start_date=start_date, end_date=end_date)
172
239
  trading_days = df.index.date.tolist()
173
240
  return trading_days
174
241
 
@@ -333,23 +400,143 @@ def update_df(df_all, result):
333
400
  df_all = pd.concat([df_all, df]).sort_index()
334
401
  df_all = df_all[~df_all.index.duplicated(keep="first")] # Remove any duplicate rows
335
402
 
336
- # df_all index - 1 min to match with polygon data index
337
- df_all.index = df_all.index - pd.Timedelta(minutes=1)
403
+ # NOTE: Timestamp correction is now done in get_historical_data() at line 569
404
+ # Do NOT subtract 1 minute here as it would double-correct
405
+ # df_all.index = df_all.index - pd.Timedelta(minutes=1)
338
406
  return df_all
339
407
 
340
408
 
409
+ def is_process_alive():
410
+ """Check if ThetaTerminal Java process is still running"""
411
+ import subprocess
412
+ global THETA_DATA_PROCESS
413
+
414
+ # First check if we have a process handle and it's still alive
415
+ if THETA_DATA_PROCESS is not None:
416
+ # poll() returns None if process is still running, otherwise returns exit code
417
+ if THETA_DATA_PROCESS.poll() is None:
418
+ return True
419
+
420
+ # If we don't have a process handle or it died, check if any ThetaTerminal process is running
421
+ # This handles cases where the process was started by a previous Python session
422
+ try:
423
+ result = subprocess.run(
424
+ ["pgrep", "-f", "ThetaTerminal.jar"],
425
+ capture_output=True,
426
+ text=True,
427
+ timeout=2
428
+ )
429
+ # pgrep returns 0 if processes found, 1 if none found
430
+ return result.returncode == 0
431
+ except Exception:
432
+ return False
433
+
434
+
341
435
  def start_theta_data_client(username: str, password: str):
436
+ import subprocess
437
+ import shutil
438
+ global THETA_DATA_PROCESS, THETA_DATA_PID
439
+
342
440
  # First try shutting down any existing connection
343
441
  try:
344
442
  requests.get(f"{BASE_URL}/v2/system/terminal/shutdown")
345
443
  except Exception:
346
444
  pass
347
445
 
348
- client = ThetaClient(username=username, passwd=password)
446
+ # Create creds.txt file to avoid passing password with special characters on command line
447
+ # This is the official ThetaData method and avoids shell escaping issues
448
+ # Security note: creds.txt with 0o600 permissions is MORE secure than command-line args
449
+ # which can be seen in process lists. Similar security profile to .env files.
450
+ theta_dir = Path.home() / "ThetaData" / "ThetaTerminal"
451
+ theta_dir.mkdir(parents=True, exist_ok=True)
452
+ creds_file = theta_dir / "creds.txt"
453
+
454
+ # IDEMPOTENT WRITE: Only write credentials if file doesn't exist or username changed
455
+ # This prevents overwriting production credentials with test credentials
456
+ should_write = False
457
+ if not creds_file.exists():
458
+ logger.info(f"Creating new creds.txt file at {creds_file}")
459
+ should_write = True
460
+ else:
461
+ # Check if username changed
462
+ try:
463
+ with open(creds_file, 'r') as f:
464
+ existing_username = f.readline().strip()
465
+ if existing_username != username:
466
+ logger.info(f"Username changed from {existing_username} to {username}, updating creds.txt")
467
+ should_write = True
468
+ else:
469
+ logger.debug(f"Using existing creds.txt for {username}")
470
+ except Exception as e:
471
+ logger.warning(f"Could not read existing creds.txt: {e}, will recreate")
472
+ should_write = True
473
+
474
+ if should_write:
475
+ # Write credentials to creds.txt (format: email on first line, password on second line)
476
+ with open(creds_file, 'w') as f:
477
+ f.write(f"{username}\n")
478
+ f.write(f"{password}\n")
479
+
480
+ # Set restrictive permissions on creds file (owner read/write only)
481
+ # This prevents other users on the system from reading the credentials
482
+ os.chmod(creds_file, 0o600)
483
+
484
+ logger.info(f"Updated creds.txt file for user: {username}")
485
+
486
+ # Launch ThetaTerminal directly with --creds-file to avoid shell escaping issues
487
+ # We bypass the thetadata library's launcher which doesn't support this option
488
+ # and has shell escaping bugs with special characters in passwords
489
+
490
+ # Verify Java is available
491
+ if not shutil.which("java"):
492
+ raise RuntimeError("Java is not installed. Please install Java 11+ to use ThetaData.")
493
+
494
+ # Find ThetaTerminal.jar
495
+ jar_file = theta_dir / "ThetaTerminal.jar"
496
+ if not jar_file.exists():
497
+ # Copy ThetaTerminal.jar from lumibot package to user's ThetaData directory
498
+ logger.info("ThetaTerminal.jar not found, copying from lumibot package...")
499
+ import shutil as shutil_copy
500
+
501
+ # Find the bundled jar file in the lumibot package
502
+ lumibot_jar = Path(__file__).parent.parent.parent / "ThetaTerminal.jar"
503
+
504
+ if lumibot_jar.exists():
505
+ logger.info(f"Copying ThetaTerminal.jar from {lumibot_jar} to {jar_file}")
506
+ shutil_copy.copy2(lumibot_jar, jar_file)
507
+ logger.info(f"Successfully copied ThetaTerminal.jar to {jar_file}")
508
+ else:
509
+ raise FileNotFoundError(
510
+ f"ThetaTerminal.jar not found at {lumibot_jar}. "
511
+ f"Please ensure ThetaTerminal.jar is included in the lumibot package, "
512
+ f"or manually place it at {jar_file}"
513
+ )
514
+
515
+ if not jar_file.exists():
516
+ raise FileNotFoundError(f"ThetaTerminal.jar not found at {jar_file}")
517
+
518
+ # Launch ThetaTerminal with --creds-file argument (no credentials on command line)
519
+ # This avoids all shell escaping issues and is the recommended approach
520
+ cmd = ["java", "-jar", str(jar_file), "--creds-file", str(creds_file)]
349
521
 
350
- time.sleep(1)
522
+ logger.info(f"Launching ThetaTerminal with creds file: {cmd}")
351
523
 
352
- return client
524
+ # Launch in background and store process handle
525
+ THETA_DATA_PROCESS = subprocess.Popen(
526
+ cmd,
527
+ stdout=subprocess.PIPE,
528
+ stderr=subprocess.PIPE,
529
+ cwd=str(theta_dir)
530
+ )
531
+ THETA_DATA_PID = THETA_DATA_PROCESS.pid
532
+ logger.info(f"ThetaTerminal started with PID: {THETA_DATA_PID}")
533
+
534
+ # Give it a moment to start
535
+ time.sleep(2)
536
+
537
+ # We don't return a ThetaClient object since we're launching manually
538
+ # The connection will be established via HTTP/WebSocket to localhost:25510
539
+ return THETA_DATA_PROCESS
353
540
 
354
541
 
355
542
  def check_connection(username: str, password: str):
@@ -358,26 +545,40 @@ def check_connection(username: str, password: str):
358
545
  counter = 0
359
546
  client = None
360
547
  connected = False
548
+
361
549
  while True:
550
+ # FIRST: Check if already connected (most important check!)
551
+ # This prevents unnecessary restarts that would overwrite creds.txt
362
552
  try:
363
- time.sleep(0.5)
364
553
  res = requests.get(f"{BASE_URL}/v2/system/mdds/status", timeout=1)
365
554
  con_text = res.text
366
555
 
367
556
  if con_text == "CONNECTED":
368
- logger.debug("Connected to Theta Data!")
557
+ logger.debug("Already connected to Theta Data!")
369
558
  connected = True
370
559
  break
371
560
  elif con_text == "DISCONNECTED":
372
- logger.debug("Disconnected from Theta Data!")
373
- counter += 1
561
+ logger.debug("Disconnected from Theta Data, will attempt to start...")
562
+ # Fall through to process check and restart logic
374
563
  else:
375
- logger.info(f"Unknown connection status: {con_text}, starting theta data client")
376
- client = start_theta_data_client(username=username, password=password)
377
- counter += 1
564
+ logger.debug(f"Unknown connection status: {con_text}")
565
+ # Fall through to process check and restart logic
378
566
  except Exception as e:
567
+ # Connection endpoint not responding - process might be dead
568
+ logger.debug(f"Cannot reach ThetaData status endpoint: {e}")
569
+ # Fall through to process check and restart logic
570
+
571
+ # SECOND: Check if the Java process is still alive
572
+ if not is_process_alive():
573
+ logger.warning("ThetaTerminal process is not running, starting...")
379
574
  client = start_theta_data_client(username=username, password=password)
380
575
  counter += 1
576
+ time.sleep(0.5)
577
+ continue
578
+
579
+ # THIRD: Process is alive but not connected - wait and retry
580
+ time.sleep(0.5)
581
+ counter += 1
381
582
 
382
583
  if counter > MAX_RETRIES:
383
584
  logger.error("Cannot connect to Theta Data!")
@@ -387,41 +588,211 @@ def check_connection(username: str, password: str):
387
588
 
388
589
 
389
590
  def get_request(url: str, headers: dict, querystring: dict, username: str, password: str):
390
- counter = 0
591
+ all_responses = []
592
+ next_page_url = None
593
+ page_count = 0
594
+
391
595
  while True:
392
- try:
393
- response = requests.get(url, headers=headers, params=querystring)
394
- # If status code is not 200, then we are not connected
395
- if response.status_code != 200:
396
- check_connection(username=username, password=password)
397
- else:
398
- json_resp = response.json()
399
-
400
- # Check if json_resp has error_type inside of header
401
- if "error_type" in json_resp["header"] and json_resp["header"]["error_type"] != "null":
402
- # Handle "NO_DATA" error
403
- if json_resp["header"]["error_type"] == "NO_DATA":
404
- logger.warning(
405
- f"No data returned for querystring: {querystring}")
406
- return None
407
- else:
408
- logger.error(
409
- f"Error getting data from Theta Data: {json_resp['header']['error_type']},\nquerystring: {querystring}")
410
- check_connection(username=username, password=password)
596
+ counter = 0
597
+ # Use next_page URL if available, otherwise use original URL with querystring
598
+ request_url = next_page_url if next_page_url else url
599
+ request_params = None if next_page_url else querystring
600
+
601
+ while True:
602
+ try:
603
+ response = requests.get(request_url, headers=headers, params=request_params)
604
+ # Status code 472 means "No data" - this is valid, return None
605
+ if response.status_code == 472:
606
+ logger.warning(f"No data available for request: {response.text[:200]}")
607
+ return None
608
+ # If status code is not 200, then we are not connected
609
+ elif response.status_code != 200:
610
+ logger.warning(f"Non-200 status code {response.status_code}: {response.text[:200]}")
611
+ check_connection(username=username, password=password)
411
612
  else:
412
- break
613
+ json_resp = response.json()
614
+
615
+ # Check if json_resp has error_type inside of header
616
+ if "error_type" in json_resp["header"] and json_resp["header"]["error_type"] != "null":
617
+ # Handle "NO_DATA" error
618
+ if json_resp["header"]["error_type"] == "NO_DATA":
619
+ logger.warning(
620
+ f"No data returned for querystring: {querystring}")
621
+ return None
622
+ else:
623
+ logger.error(
624
+ f"Error getting data from Theta Data: {json_resp['header']['error_type']},\nquerystring: {querystring}")
625
+ check_connection(username=username, password=password)
626
+ else:
627
+ break
413
628
 
414
- except Exception as e:
415
- check_connection(username=username, password=password)
629
+ except Exception as e:
630
+ logger.warning(f"Exception during request (attempt {counter + 1}): {e}")
631
+ check_connection(username=username, password=password)
632
+ # Give the process time to start after restart
633
+ if counter == 0:
634
+ logger.info("Waiting 5 seconds for ThetaTerminal to initialize...")
635
+ time.sleep(5)
416
636
 
417
- counter += 1
418
- if counter > 1:
419
- raise ValueError("Cannot connect to Theta Data!")
637
+ counter += 1
638
+ if counter > 1:
639
+ raise ValueError("Cannot connect to Theta Data!")
640
+
641
+ # Store this page's response data
642
+ page_count += 1
643
+ all_responses.append(json_resp["response"])
644
+
645
+ # Check for pagination - follow next_page if it exists
646
+ next_page = json_resp["header"].get("next_page")
647
+ if next_page and next_page != "null" and next_page != "":
648
+ logger.info(f"Following pagination: {page_count} page(s) downloaded, fetching next page...")
649
+ next_page_url = next_page
650
+ else:
651
+ # No more pages, we're done
652
+ break
653
+
654
+ # Merge all pages if we got multiple pages
655
+ if page_count > 1:
656
+ logger.info(f"Merged {page_count} pages from ThetaData ({sum(len(r) for r in all_responses)} total rows)")
657
+ json_resp["response"] = []
658
+ for page_response in all_responses:
659
+ json_resp["response"].extend(page_response)
420
660
 
421
661
  return json_resp
422
662
 
423
663
 
424
- def get_historical_data(asset: Asset, start_dt: datetime, end_dt: datetime, ivl: int, username: str, password: str, datastyle:str = "ohlc"):
664
+ def get_historical_eod_data(asset: Asset, start_dt: datetime, end_dt: datetime, username: str, password: str, datastyle: str = "ohlc"):
665
+ """
666
+ Get EOD (End of Day) data from ThetaData using the /v2/hist/{asset_type}/eod endpoint.
667
+
668
+ This endpoint provides official daily OHLC that includes the 16:00 closing auction
669
+ and follows SIP sale-condition rules, matching Polygon and Yahoo Finance exactly.
670
+
671
+ NOTE: ThetaData's EOD endpoint has been found to return incorrect open prices for stocks
672
+ that don't match Polygon/Yahoo. We fix this by using the first minute bar's open price.
673
+ Indexes don't have this issue since they are calculated values.
674
+
675
+ Parameters
676
+ ----------
677
+ asset : Asset
678
+ The asset we are getting data for
679
+ start_dt : datetime
680
+ The start date for the data we want
681
+ end_dt : datetime
682
+ The end date for the data we want
683
+ username : str
684
+ Your ThetaData username
685
+ password : str
686
+ Your ThetaData password
687
+ datastyle : str
688
+ The style of data to retrieve (default "ohlc")
689
+
690
+ Returns
691
+ -------
692
+ pd.DataFrame
693
+ A DataFrame with EOD data for the asset
694
+ """
695
+ # Convert start and end dates to strings
696
+ start_date = start_dt.strftime("%Y%m%d")
697
+ end_date = end_dt.strftime("%Y%m%d")
698
+
699
+ # Use v2 EOD API endpoint (supports stock, index, option)
700
+ url = f"{BASE_URL}/v2/hist/{asset.asset_type}/eod"
701
+
702
+ querystring = {
703
+ "root": asset.symbol,
704
+ "start_date": start_date,
705
+ "end_date": end_date
706
+ }
707
+
708
+ # For options, add strike, expiration, and right parameters
709
+ if asset.asset_type == "option":
710
+ expiration_str = asset.expiration.strftime("%Y%m%d")
711
+ strike = int(asset.strike * 1000)
712
+ querystring["exp"] = expiration_str
713
+ querystring["strike"] = strike
714
+ querystring["right"] = "C" if asset.right == "CALL" else "P"
715
+
716
+ headers = {"Accept": "application/json"}
717
+
718
+ # Send the request
719
+ json_resp = get_request(url=url, headers=headers, querystring=querystring,
720
+ username=username, password=password)
721
+ if json_resp is None:
722
+ return None
723
+
724
+ # Convert to pandas dataframe
725
+ df = pd.DataFrame(json_resp["response"], columns=json_resp["header"]["format"])
726
+
727
+ if df is None or df.empty:
728
+ return df
729
+
730
+ # Function to combine ms_of_day and date into datetime
731
+ def combine_datetime(row):
732
+ # Ensure the date is in integer format and then convert to string
733
+ date_str = str(int(row["date"]))
734
+ base_date = datetime.strptime(date_str, "%Y%m%d")
735
+ # EOD reports are normalized at ~17:15 ET but represent the trading day
736
+ # We use midnight of the trading day as the timestamp (consistent with daily bars)
737
+ return base_date
738
+
739
+ # Apply the function to each row to create a new datetime column
740
+ datetime_combined = df.apply(combine_datetime, axis=1)
741
+
742
+ # Assign the newly created datetime column
743
+ df = df.assign(datetime=datetime_combined)
744
+
745
+ # Convert the datetime column to a datetime and localize to UTC
746
+ df["datetime"] = pd.to_datetime(df["datetime"])
747
+ df["datetime"] = df["datetime"].dt.tz_localize("UTC")
748
+
749
+ # Set datetime as the index
750
+ df = df.set_index("datetime")
751
+
752
+ # Drop the ms_of_day, ms_of_day2, and date columns (not needed for daily bars)
753
+ df = df.drop(columns=["ms_of_day", "ms_of_day2", "date"], errors='ignore')
754
+
755
+ # Drop bid/ask columns if present (EOD includes NBBO but we only need OHLC)
756
+ df = df.drop(columns=["bid_size", "bid_exchange", "bid", "bid_condition",
757
+ "ask_size", "ask_exchange", "ask", "ask_condition"], errors='ignore')
758
+
759
+ # FIX: ThetaData's EOD endpoint returns incorrect open/high/low prices for STOCKS and OPTIONS
760
+ # that don't match Polygon/Yahoo. We fix this by using minute bar data.
761
+ # Solution: Fetch minute bars for each trading day and aggregate to get correct OHLC
762
+ # NOTE: Indexes don't need this fix since they are calculated values, not traded securities
763
+ if asset.asset_type in ["stock", "option"]:
764
+ logger.info(f"Fetching 9:30 AM minute bars to correct EOD open prices...")
765
+
766
+ # Get minute data for the date range to extract 9:30 AM opens
767
+ minute_df = get_historical_data(
768
+ asset=asset,
769
+ start_dt=start_dt,
770
+ end_dt=end_dt,
771
+ ivl=60000, # 1 minute
772
+ username=username,
773
+ password=password,
774
+ datastyle=datastyle,
775
+ include_after_hours=False # RTH only
776
+ )
777
+
778
+ if minute_df is not None and not minute_df.empty:
779
+ # Group by date and get the first bar's open for each day
780
+ minute_df_copy = minute_df.copy()
781
+ minute_df_copy['date'] = minute_df_copy.index.date
782
+
783
+ # For each date in df, find the corresponding 9:30 AM open from minute data
784
+ for idx in df.index:
785
+ trade_date = idx.date()
786
+ day_minutes = minute_df_copy[minute_df_copy['date'] == trade_date]
787
+ if len(day_minutes) > 0:
788
+ # Use the first minute bar's open (9:30 AM opening auction)
789
+ correct_open = day_minutes.iloc[0]['open']
790
+ df.loc[idx, 'open'] = correct_open
791
+
792
+ return df
793
+
794
+
795
+ def get_historical_data(asset: Asset, start_dt: datetime, end_dt: datetime, ivl: int, username: str, password: str, datastyle:str = "ohlc", include_after_hours: bool = True):
425
796
  """
426
797
  Get data from ThetaData
427
798
 
@@ -439,6 +810,10 @@ def get_historical_data(asset: Asset, start_dt: datetime, end_dt: datetime, ivl:
439
810
  Your ThetaData username
440
811
  password : str
441
812
  Your ThetaData password
813
+ datastyle : str
814
+ The style of data to retrieve ("ohlc" or "quote")
815
+ include_after_hours : bool
816
+ Whether to include after-hours trading data (default True)
442
817
 
443
818
  Returns
444
819
  -------
@@ -450,8 +825,8 @@ def get_historical_data(asset: Asset, start_dt: datetime, end_dt: datetime, ivl:
450
825
  start_date = start_dt.strftime("%Y%m%d")
451
826
  end_date = end_dt.strftime("%Y%m%d")
452
827
 
453
- # Create the url based on the asset type
454
- url = f"{BASE_URL}/hist/{asset.asset_type}/{datastyle}"
828
+ # Use v2 API for ALL asset types
829
+ url = f"{BASE_URL}/v2/hist/{asset.asset_type}/{datastyle}"
455
830
 
456
831
  if asset.asset_type == "option":
457
832
  # Convert the expiration date to a string
@@ -468,10 +843,30 @@ def get_historical_data(asset: Asset, start_dt: datetime, end_dt: datetime, ivl:
468
843
  "strike": strike, # "140000",
469
844
  "exp": expiration_str, # "20220930",
470
845
  "right": "C" if asset.right == "CALL" else "P",
471
- "rth": "false"
846
+ # include_after_hours=True means extended hours (rth=false)
847
+ # include_after_hours=False means regular hours only (rth=true)
848
+ "rth": "false" if include_after_hours else "true"
849
+ }
850
+ elif asset.asset_type == "index":
851
+ # For indexes (SPX, VIX, etc.), don't use rth parameter
852
+ # Indexes are calculated values, not traded securities
853
+ querystring = {
854
+ "root": asset.symbol,
855
+ "start_date": start_date,
856
+ "end_date": end_date,
857
+ "ivl": ivl
472
858
  }
473
859
  else:
474
- querystring = {"root": asset.symbol, "start_date": start_date, "end_date": end_date, "ivl": ivl}
860
+ # For stocks, respect include_after_hours parameter
861
+ # rth=false means extended hours (pre-market + regular + after-hours)
862
+ # rth=true means 9:30 AM - 4:00 PM ET (regular market hours only)
863
+ querystring = {
864
+ "root": asset.symbol,
865
+ "start_date": start_date,
866
+ "end_date": end_date,
867
+ "ivl": ivl,
868
+ "rth": "false" if include_after_hours else "true"
869
+ }
475
870
 
476
871
  headers = {"Accept": "application/json"}
477
872
 
@@ -486,9 +881,11 @@ def get_historical_data(asset: Asset, start_dt: datetime, end_dt: datetime, ivl:
486
881
  df = pd.DataFrame(json_resp["response"], columns=json_resp["header"]["format"])
487
882
 
488
883
  # Remove any rows where count is 0 (no data - the prices will be 0 at these times too)
884
+ # NOTE: Indexes always have count=0 since they're calculated values, not traded securities
489
885
  if "quote" in datastyle.lower():
490
886
  df = df[(df["bid_size"] != 0) | (df["ask_size"] != 0)]
491
- else:
887
+ elif asset.asset_type != "index":
888
+ # Don't filter indexes by count - they're always 0
492
889
  df = df[df["count"] != 0]
493
890
 
494
891
  if df is None or df.empty:
@@ -499,7 +896,7 @@ def get_historical_data(asset: Asset, start_dt: datetime, end_dt: datetime, ivl:
499
896
  # Ensure the date is in integer format and then convert to string
500
897
  date_str = str(int(row["date"]))
501
898
  base_date = datetime.strptime(date_str, "%Y%m%d")
502
- # Adding the milliseconds of the day to the base date
899
+ # v2 API returns correct start-stamped bars - no adjustment needed
503
900
  datetime_value = base_date + timedelta(milliseconds=int(row["ms_of_day"]))
504
901
  return datetime_value
505
902
 
@@ -511,11 +908,17 @@ def get_historical_data(asset: Asset, start_dt: datetime, end_dt: datetime, ivl:
511
908
  # Assign the newly created datetime column
512
909
  df = df.assign(datetime=datetime_combined)
513
910
 
514
- # Convert the datetime column to a datetime
911
+ # Convert the datetime column to a datetime and localize to Eastern Time
515
912
  df["datetime"] = pd.to_datetime(df["datetime"])
516
913
 
914
+ # Localize to Eastern Time (ThetaData returns times in ET)
915
+ df["datetime"] = df["datetime"].dt.tz_localize("America/New_York")
916
+
917
+ # Set datetime as the index
918
+ df = df.set_index("datetime")
919
+
517
920
  # Drop the ms_of_day and date columns
518
- df = df.drop(columns=["ms_of_day", "date"])
921
+ df = df.drop(columns=["ms_of_day", "date"], errors='ignore')
519
922
 
520
923
  return df
521
924
 
@@ -538,8 +941,8 @@ def get_expirations(username: str, password: str, ticker: str, after_date: date)
538
941
  list[str]
539
942
  A list of expiration dates for the given ticker
540
943
  """
541
- # Create the url based on the request type
542
- url = f"{BASE_URL}/list/expirations"
944
+ # Use v2 API endpoint
945
+ url = f"{BASE_URL}/v2/list/expirations"
543
946
 
544
947
  querystring = {"root": ticker}
545
948
 
@@ -592,8 +995,8 @@ def get_strikes(username: str, password: str, ticker: str, expiration: datetime)
592
995
  list[float]
593
996
  A list of strike prices for the given ticker and expiration date
594
997
  """
595
- # Create the url based on the request type
596
- url = f"{BASE_URL}/list/strikes"
998
+ # Use v2 API endpoint
999
+ url = f"{BASE_URL}/v2/list/strikes"
597
1000
 
598
1001
  # Convert the expiration date to a string
599
1002
  expiration_str = expiration.strftime("%Y%m%d")
@@ -615,3 +1018,118 @@ def get_strikes(username: str, password: str, ticker: str, expiration: datetime)
615
1018
  strikes = [x / 1000.0 for x in strikes]
616
1019
 
617
1020
  return strikes
1021
+
1022
+
1023
+ def get_chains_cached(
1024
+ username: str,
1025
+ password: str,
1026
+ asset: Asset,
1027
+ current_date: date = None
1028
+ ) -> dict:
1029
+ """
1030
+ Retrieve option chain with caching (MATCHES POLYGON PATTERN).
1031
+
1032
+ This function follows the EXACT same caching strategy as Polygon:
1033
+ 1. Check cache: LUMIBOT_CACHE_FOLDER/thetadata/option_chains/{symbol}_{date}.parquet
1034
+ 2. Reuse files within RECENT_FILE_TOLERANCE_DAYS (default 7 days)
1035
+ 3. If not found, fetch from ThetaData and save to cache
1036
+ 4. Use pyarrow engine with snappy compression
1037
+
1038
+ Parameters
1039
+ ----------
1040
+ username : str
1041
+ ThetaData username
1042
+ password : str
1043
+ ThetaData password
1044
+ asset : Asset
1045
+ Underlying asset (e.g., Asset("SPY"))
1046
+ current_date : date
1047
+ Historical date for backtest (required)
1048
+
1049
+ Returns
1050
+ -------
1051
+ dict : {
1052
+ "Multiplier": 100,
1053
+ "Exchange": "SMART",
1054
+ "Chains": {
1055
+ "CALL": {"2025-09-19": [140.0, 145.0, ...], ...},
1056
+ "PUT": {"2025-09-19": [140.0, 145.0, ...], ...}
1057
+ }
1058
+ }
1059
+ """
1060
+ from collections import defaultdict
1061
+
1062
+ logger.debug(f"get_chains_cached called for {asset.symbol} on {current_date}")
1063
+
1064
+ # 1) If current_date is None => bail out
1065
+ if current_date is None:
1066
+ logger.debug("No current_date provided; returning None.")
1067
+ return None
1068
+
1069
+ # 2) Build cache folder path
1070
+ chain_folder = Path(LUMIBOT_CACHE_FOLDER) / "thetadata" / "option_chains"
1071
+ chain_folder.mkdir(parents=True, exist_ok=True)
1072
+
1073
+ # 3) Check for recent cached file (within RECENT_FILE_TOLERANCE_DAYS)
1074
+ RECENT_FILE_TOLERANCE_DAYS = 7
1075
+ earliest_okay_date = current_date - timedelta(days=RECENT_FILE_TOLERANCE_DAYS)
1076
+ pattern = f"{asset.symbol}_*.parquet"
1077
+ potential_files = sorted(chain_folder.glob(pattern), reverse=True)
1078
+
1079
+ for fpath in potential_files:
1080
+ fname = fpath.stem # e.g., "SPY_2025-09-15"
1081
+ parts = fname.split("_", maxsplit=1)
1082
+ if len(parts) != 2:
1083
+ continue
1084
+ file_symbol, date_str = parts
1085
+ if file_symbol != asset.symbol:
1086
+ continue
1087
+
1088
+ try:
1089
+ file_date = date.fromisoformat(date_str)
1090
+ except ValueError:
1091
+ continue
1092
+
1093
+ # If file is recent enough, reuse it
1094
+ if earliest_okay_date <= file_date <= current_date:
1095
+ logger.debug(f"Reusing chain file {fpath} (file_date={file_date})")
1096
+ df_cached = pd.read_parquet(fpath, engine='pyarrow')
1097
+
1098
+ # Convert back to dict with lists (not numpy arrays)
1099
+ data = df_cached["data"][0]
1100
+ for right in data["Chains"]:
1101
+ for exp_date in data["Chains"][right]:
1102
+ data["Chains"][right][exp_date] = list(data["Chains"][right][exp_date])
1103
+
1104
+ return data
1105
+
1106
+ # 4) No suitable file => fetch from ThetaData
1107
+ logger.debug(f"No suitable file found for {asset.symbol} on {current_date}. Downloading...")
1108
+ print(f"\nDownloading option chain for {asset} on {current_date}. This will be cached for future use.")
1109
+
1110
+ # Get expirations and strikes using existing functions
1111
+ expirations = get_expirations(username, password, asset.symbol, current_date)
1112
+
1113
+ chains_dict = {
1114
+ "Multiplier": 100,
1115
+ "Exchange": "SMART",
1116
+ "Chains": {
1117
+ "CALL": defaultdict(list),
1118
+ "PUT": defaultdict(list)
1119
+ }
1120
+ }
1121
+
1122
+ for expiration_str in expirations:
1123
+ expiration = date.fromisoformat(expiration_str)
1124
+ strikes = get_strikes(username, password, asset.symbol, expiration)
1125
+
1126
+ chains_dict["Chains"]["CALL"][expiration_str] = sorted(strikes)
1127
+ chains_dict["Chains"]["PUT"][expiration_str] = sorted(strikes)
1128
+
1129
+ # 5) Save to cache file for future reuse
1130
+ cache_file = chain_folder / f"{asset.symbol}_{current_date.isoformat()}.parquet"
1131
+ df_to_cache = pd.DataFrame({"data": [chains_dict]})
1132
+ df_to_cache.to_parquet(cache_file, compression='snappy', engine='pyarrow')
1133
+ logger.debug(f"Saved chain cache: {cache_file}")
1134
+
1135
+ return chains_dict