lumibot 4.0.23__py3-none-any.whl → 4.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lumibot might be problematic. Click here for more details.
- lumibot/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/__pycache__/constants.cpython-312.pyc +0 -0
- lumibot/__pycache__/credentials.cpython-312.pyc +0 -0
- lumibot/backtesting/__init__.py +6 -5
- lumibot/backtesting/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/alpaca_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/alpha_vantage_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/backtesting_broker.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/ccxt_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/databento_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/interactive_brokers_rest_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/pandas_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/polygon_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/thetadata_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/yahoo_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/backtesting_broker.py +209 -9
- lumibot/backtesting/databento_backtesting.py +141 -24
- lumibot/backtesting/thetadata_backtesting.py +63 -42
- lumibot/brokers/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/alpaca.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/bitunix.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/broker.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/ccxt.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/example_broker.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/interactive_brokers.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/interactive_brokers_rest.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/projectx.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/schwab.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/tradier.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/tradovate.cpython-312.pyc +0 -0
- lumibot/brokers/alpaca.py +11 -1
- lumibot/brokers/tradeovate.py +475 -0
- lumibot/components/grok_news_helper.py +284 -0
- lumibot/components/options_helper.py +90 -34
- lumibot/credentials.py +3 -0
- lumibot/data_sources/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/alpaca_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/alpha_vantage_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/bitunix_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/ccxt_backtesting_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/ccxt_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/data_source.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/data_source_backtesting.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/databento_data_polars_backtesting.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/databento_data_polars_live.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/example_broker_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/exceptions.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/interactive_brokers_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/interactive_brokers_rest_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/pandas_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/polars_mixin.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/polygon_data_polars.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/projectx_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/schwab_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/tradier_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/tradovate_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/yahoo_data_polars.cpython-312.pyc +0 -0
- lumibot/data_sources/data_source_backtesting.py +3 -5
- lumibot/data_sources/databento_data_polars_backtesting.py +194 -48
- lumibot/data_sources/pandas_data.py +6 -3
- lumibot/data_sources/polars_mixin.py +126 -21
- lumibot/data_sources/tradeovate_data.py +80 -0
- lumibot/data_sources/tradier_data.py +2 -1
- lumibot/entities/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/asset.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/bar.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/bars.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/chains.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/data.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/dataline.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/order.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/position.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/quote.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/trading_fee.cpython-312.pyc +0 -0
- lumibot/entities/asset.py +8 -0
- lumibot/entities/order.py +1 -1
- lumibot/entities/quote.py +14 -0
- lumibot/example_strategies/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/example_strategies/__pycache__/test_broker_functions.cpython-312-pytest-8.4.1.pyc +0 -0
- lumibot/strategies/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/strategies/__pycache__/_strategy.cpython-312.pyc +0 -0
- lumibot/strategies/__pycache__/strategy.cpython-312.pyc +0 -0
- lumibot/strategies/__pycache__/strategy_executor.cpython-312.pyc +0 -0
- lumibot/strategies/_strategy.py +95 -27
- lumibot/strategies/strategy.py +5 -6
- lumibot/strategies/strategy_executor.py +2 -2
- lumibot/tools/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/alpaca_helpers.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/bitunix_helpers.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/black_scholes.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/ccxt_data_store.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/databento_helper.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/databento_helper_polars.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/debugers.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/decorators.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/helpers.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/indicators.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/lumibot_logger.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/pandas.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/polygon_helper.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/polygon_helper_async.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/polygon_helper_polars_optimized.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/projectx_helpers.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/schwab_helper.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/thetadata_helper.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/types.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/yahoo_helper.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/yahoo_helper_polars_optimized.cpython-312.pyc +0 -0
- lumibot/tools/databento_helper.py +384 -133
- lumibot/tools/databento_helper_polars.py +218 -156
- lumibot/tools/databento_roll.py +216 -0
- lumibot/tools/lumibot_logger.py +32 -17
- lumibot/tools/polygon_helper.py +65 -0
- lumibot/tools/thetadata_helper.py +588 -70
- lumibot/traders/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/traders/__pycache__/trader.cpython-312.pyc +0 -0
- lumibot/traders/trader.py +1 -1
- lumibot/trading_builtins/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/trading_builtins/__pycache__/custom_stream.cpython-312.pyc +0 -0
- lumibot/trading_builtins/__pycache__/safe_list.cpython-312.pyc +0 -0
- {lumibot-4.0.23.dist-info → lumibot-4.1.0.dist-info}/METADATA +1 -2
- {lumibot-4.0.23.dist-info → lumibot-4.1.0.dist-info}/RECORD +160 -44
- tests/backtest/check_timing_offset.py +198 -0
- tests/backtest/check_volume_spike.py +112 -0
- tests/backtest/comprehensive_comparison.py +166 -0
- tests/backtest/debug_comparison.py +91 -0
- tests/backtest/diagnose_price_difference.py +97 -0
- tests/backtest/direct_api_comparison.py +203 -0
- tests/backtest/profile_thetadata_vs_polygon.py +255 -0
- tests/backtest/root_cause_analysis.py +109 -0
- tests/backtest/test_accuracy_verification.py +244 -0
- tests/backtest/test_daily_data_timestamp_comparison.py +801 -0
- tests/backtest/test_databento.py +4 -0
- tests/backtest/test_databento_comprehensive_trading.py +564 -0
- tests/backtest/test_debug_avg_fill_price.py +112 -0
- tests/backtest/test_dividends.py +8 -3
- tests/backtest/test_example_strategies.py +54 -47
- tests/backtest/test_futures_edge_cases.py +451 -0
- tests/backtest/test_futures_single_trade.py +270 -0
- tests/backtest/test_futures_ultra_simple.py +191 -0
- tests/backtest/test_index_data_verification.py +348 -0
- tests/backtest/test_polygon.py +45 -24
- tests/backtest/test_thetadata.py +246 -60
- tests/backtest/test_thetadata_comprehensive.py +729 -0
- tests/backtest/test_thetadata_vs_polygon.py +557 -0
- tests/backtest/test_yahoo.py +1 -2
- tests/conftest.py +20 -0
- tests/test_backtesting_data_source_env.py +249 -0
- tests/test_backtesting_quiet_logs_complete.py +10 -11
- tests/test_databento_helper.py +73 -86
- tests/test_databento_timezone_fixes.py +21 -4
- tests/test_get_historical_prices.py +6 -6
- tests/test_options_helper.py +162 -40
- tests/test_polygon_helper.py +21 -13
- tests/test_quiet_logs_requirements.py +5 -5
- tests/test_thetadata_helper.py +487 -171
- tests/test_yahoo_data.py +125 -0
- {lumibot-4.0.23.dist-info → lumibot-4.1.0.dist-info}/LICENSE +0 -0
- {lumibot-4.0.23.dist-info → lumibot-4.1.0.dist-info}/WHEEL +0 -0
- {lumibot-4.0.23.dist-info → lumibot-4.1.0.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# This file contains helper functions for getting data from Polygon.io
|
|
2
2
|
import time
|
|
3
|
+
import os
|
|
3
4
|
from datetime import date, datetime, timedelta
|
|
4
5
|
from pathlib import Path
|
|
5
6
|
import pytz
|
|
@@ -9,7 +10,6 @@ import requests
|
|
|
9
10
|
from lumibot import LUMIBOT_CACHE_FOLDER, LUMIBOT_DEFAULT_PYTZ
|
|
10
11
|
from lumibot.tools.lumibot_logger import get_logger
|
|
11
12
|
from lumibot.entities import Asset
|
|
12
|
-
from thetadata import ThetaClient
|
|
13
13
|
from tqdm import tqdm
|
|
14
14
|
|
|
15
15
|
logger = get_logger(__name__)
|
|
@@ -19,6 +19,10 @@ MAX_DAYS = 30
|
|
|
19
19
|
CACHE_SUBFOLDER = "thetadata"
|
|
20
20
|
BASE_URL = "http://127.0.0.1:25510"
|
|
21
21
|
|
|
22
|
+
# Global process tracking for ThetaTerminal
|
|
23
|
+
THETA_DATA_PROCESS = None
|
|
24
|
+
THETA_DATA_PID = None
|
|
25
|
+
|
|
22
26
|
|
|
23
27
|
def get_price_data(
|
|
24
28
|
username: str,
|
|
@@ -29,7 +33,8 @@ def get_price_data(
|
|
|
29
33
|
timespan: str = "minute",
|
|
30
34
|
quote_asset: Asset = None,
|
|
31
35
|
dt=None,
|
|
32
|
-
datastyle: str = "ohlc"
|
|
36
|
+
datastyle: str = "ohlc",
|
|
37
|
+
include_after_hours: bool = True
|
|
33
38
|
):
|
|
34
39
|
"""
|
|
35
40
|
Queries ThetaData for pricing data for the given asset and returns a DataFrame with the data. Data will be
|
|
@@ -53,6 +58,10 @@ def get_price_data(
|
|
|
53
58
|
"month", "quarter"
|
|
54
59
|
quote_asset : Asset
|
|
55
60
|
The quote asset for the asset we are getting data for. This is only needed for Forex assets.
|
|
61
|
+
datastyle : str
|
|
62
|
+
The style of data to retrieve ("ohlc" or "quote")
|
|
63
|
+
include_after_hours : bool
|
|
64
|
+
Whether to include after-hours trading data (default True)
|
|
56
65
|
|
|
57
66
|
Returns
|
|
58
67
|
-------
|
|
@@ -60,6 +69,7 @@ def get_price_data(
|
|
|
60
69
|
A DataFrame with the pricing data for the asset
|
|
61
70
|
|
|
62
71
|
"""
|
|
72
|
+
import pytz # Import at function level to avoid scope issues in nested calls
|
|
63
73
|
|
|
64
74
|
# Check if we already have data for this asset in the cache file
|
|
65
75
|
df_all = None
|
|
@@ -74,6 +84,37 @@ def get_price_data(
|
|
|
74
84
|
# Check if we need to get more data
|
|
75
85
|
missing_dates = get_missing_dates(df_all, asset, start, end)
|
|
76
86
|
if not missing_dates:
|
|
87
|
+
# Filter cached data to requested date range before returning
|
|
88
|
+
if df_all is not None and not df_all.empty:
|
|
89
|
+
# For daily data, use date-based filtering (timestamps vary by provider)
|
|
90
|
+
# For intraday data, use precise datetime filtering
|
|
91
|
+
if timespan == "day":
|
|
92
|
+
# Convert index to dates for comparison
|
|
93
|
+
import pandas as pd
|
|
94
|
+
df_dates = pd.to_datetime(df_all.index).date
|
|
95
|
+
start_date = start.date() if hasattr(start, 'date') else start
|
|
96
|
+
end_date = end.date() if hasattr(end, 'date') else end
|
|
97
|
+
mask = (df_dates >= start_date) & (df_dates <= end_date)
|
|
98
|
+
df_all = df_all[mask]
|
|
99
|
+
else:
|
|
100
|
+
# Intraday: use precise datetime filtering
|
|
101
|
+
import datetime as dt
|
|
102
|
+
# Convert date to datetime if needed
|
|
103
|
+
if isinstance(start, dt.date) and not isinstance(start, dt.datetime):
|
|
104
|
+
start = dt.datetime.combine(start, dt.time.min)
|
|
105
|
+
if isinstance(end, dt.date) and not isinstance(end, dt.datetime):
|
|
106
|
+
end = dt.datetime.combine(end, dt.time.max)
|
|
107
|
+
|
|
108
|
+
# Handle datetime objects with midnight time (users often pass datetime(YYYY, MM, DD))
|
|
109
|
+
if isinstance(end, dt.datetime) and end.time() == dt.time.min:
|
|
110
|
+
# Convert end-of-period midnight to end-of-day
|
|
111
|
+
end = dt.datetime.combine(end.date(), dt.time.max)
|
|
112
|
+
|
|
113
|
+
if start.tzinfo is None:
|
|
114
|
+
start = LUMIBOT_DEFAULT_PYTZ.localize(start).astimezone(pytz.UTC)
|
|
115
|
+
if end.tzinfo is None:
|
|
116
|
+
end = LUMIBOT_DEFAULT_PYTZ.localize(end).astimezone(pytz.UTC)
|
|
117
|
+
df_all = df_all[(df_all.index >= start) & (df_all.index <= end)]
|
|
77
118
|
return df_all
|
|
78
119
|
|
|
79
120
|
start = missing_dates[0] # Data will start at 8am UTC (4am EST)
|
|
@@ -88,19 +129,43 @@ def get_price_data(
|
|
|
88
129
|
|
|
89
130
|
delta = timedelta(days=MAX_DAYS)
|
|
90
131
|
|
|
91
|
-
|
|
92
|
-
#
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
132
|
+
# For daily bars, use ThetaData's EOD endpoint for official daily OHLC
|
|
133
|
+
# The EOD endpoint includes the 16:00 closing auction and follows SIP sale-condition rules
|
|
134
|
+
# This matches Polygon and Yahoo Finance EXACTLY (zero tolerance)
|
|
135
|
+
if timespan == "day":
|
|
136
|
+
logger.info(f"Daily bars: using EOD endpoint for official close prices")
|
|
137
|
+
|
|
138
|
+
# Use EOD endpoint for official daily OHLC
|
|
139
|
+
result_df = get_historical_eod_data(
|
|
140
|
+
asset=asset,
|
|
141
|
+
start_dt=start,
|
|
142
|
+
end_dt=end,
|
|
143
|
+
username=username,
|
|
144
|
+
password=password,
|
|
145
|
+
datastyle=datastyle
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
return result_df
|
|
149
|
+
|
|
150
|
+
# Map timespan to milliseconds for intraday intervals
|
|
151
|
+
TIMESPAN_TO_MS = {
|
|
152
|
+
"second": 1000,
|
|
153
|
+
"minute": 60000,
|
|
154
|
+
"5minute": 300000,
|
|
155
|
+
"10minute": 600000,
|
|
156
|
+
"15minute": 900000,
|
|
157
|
+
"30minute": 1800000,
|
|
158
|
+
"hour": 3600000,
|
|
159
|
+
"2hour": 7200000,
|
|
160
|
+
"4hour": 14400000,
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
interval_ms = TIMESPAN_TO_MS.get(timespan)
|
|
164
|
+
if interval_ms is None:
|
|
165
|
+
raise ValueError(
|
|
166
|
+
f"Unsupported timespan '{timespan}'. "
|
|
167
|
+
f"Supported values: {list(TIMESPAN_TO_MS.keys())} or 'day'"
|
|
168
|
+
)
|
|
104
169
|
|
|
105
170
|
while start <= missing_dates[-1]:
|
|
106
171
|
# If we don't have a paid subscription, we need to wait 1 minute between requests because of
|
|
@@ -109,7 +174,7 @@ def get_price_data(
|
|
|
109
174
|
if end > start + delta:
|
|
110
175
|
end = start + delta
|
|
111
176
|
|
|
112
|
-
result_df = get_historical_data(asset, start, end, interval_ms, username, password, datastyle=datastyle)
|
|
177
|
+
result_df = get_historical_data(asset, start, end, interval_ms, username, password, datastyle=datastyle, include_after_hours=include_after_hours)
|
|
113
178
|
|
|
114
179
|
if result_df is None or len(result_df) == 0:
|
|
115
180
|
logger.warning(
|
|
@@ -155,8 +220,8 @@ def get_trading_dates(asset: Asset, start: datetime, end: datetime):
|
|
|
155
220
|
# Crypto trades every day, 24/7 so we don't need to check the calendar
|
|
156
221
|
return [start.date() + timedelta(days=x) for x in range((end.date() - start.date()).days + 1)]
|
|
157
222
|
|
|
158
|
-
# Stock/Option Asset for Backtesting - Assuming NYSE trading days
|
|
159
|
-
elif asset.asset_type == "stock" or asset.asset_type == "option":
|
|
223
|
+
# Stock/Option/Index Asset for Backtesting - Assuming NYSE trading days
|
|
224
|
+
elif asset.asset_type == "stock" or asset.asset_type == "option" or asset.asset_type == "index":
|
|
160
225
|
cal = mcal.get_calendar("NYSE")
|
|
161
226
|
|
|
162
227
|
# Forex Asset for Backtesting - Forex trades weekdays, 24hrs starting Sunday 5pm EST
|
|
@@ -168,7 +233,9 @@ def get_trading_dates(asset: Asset, start: datetime, end: datetime):
|
|
|
168
233
|
raise ValueError(f"Unsupported asset type for thetadata: {asset.asset_type}")
|
|
169
234
|
|
|
170
235
|
# Get the trading days between the start and end dates
|
|
171
|
-
|
|
236
|
+
start_date = start.date() if hasattr(start, 'date') else start
|
|
237
|
+
end_date = end.date() if hasattr(end, 'date') else end
|
|
238
|
+
df = cal.schedule(start_date=start_date, end_date=end_date)
|
|
172
239
|
trading_days = df.index.date.tolist()
|
|
173
240
|
return trading_days
|
|
174
241
|
|
|
@@ -333,23 +400,143 @@ def update_df(df_all, result):
|
|
|
333
400
|
df_all = pd.concat([df_all, df]).sort_index()
|
|
334
401
|
df_all = df_all[~df_all.index.duplicated(keep="first")] # Remove any duplicate rows
|
|
335
402
|
|
|
336
|
-
#
|
|
337
|
-
|
|
403
|
+
# NOTE: Timestamp correction is now done in get_historical_data() at line 569
|
|
404
|
+
# Do NOT subtract 1 minute here as it would double-correct
|
|
405
|
+
# df_all.index = df_all.index - pd.Timedelta(minutes=1)
|
|
338
406
|
return df_all
|
|
339
407
|
|
|
340
408
|
|
|
409
|
+
def is_process_alive():
|
|
410
|
+
"""Check if ThetaTerminal Java process is still running"""
|
|
411
|
+
import subprocess
|
|
412
|
+
global THETA_DATA_PROCESS
|
|
413
|
+
|
|
414
|
+
# First check if we have a process handle and it's still alive
|
|
415
|
+
if THETA_DATA_PROCESS is not None:
|
|
416
|
+
# poll() returns None if process is still running, otherwise returns exit code
|
|
417
|
+
if THETA_DATA_PROCESS.poll() is None:
|
|
418
|
+
return True
|
|
419
|
+
|
|
420
|
+
# If we don't have a process handle or it died, check if any ThetaTerminal process is running
|
|
421
|
+
# This handles cases where the process was started by a previous Python session
|
|
422
|
+
try:
|
|
423
|
+
result = subprocess.run(
|
|
424
|
+
["pgrep", "-f", "ThetaTerminal.jar"],
|
|
425
|
+
capture_output=True,
|
|
426
|
+
text=True,
|
|
427
|
+
timeout=2
|
|
428
|
+
)
|
|
429
|
+
# pgrep returns 0 if processes found, 1 if none found
|
|
430
|
+
return result.returncode == 0
|
|
431
|
+
except Exception:
|
|
432
|
+
return False
|
|
433
|
+
|
|
434
|
+
|
|
341
435
|
def start_theta_data_client(username: str, password: str):
|
|
436
|
+
import subprocess
|
|
437
|
+
import shutil
|
|
438
|
+
global THETA_DATA_PROCESS, THETA_DATA_PID
|
|
439
|
+
|
|
342
440
|
# First try shutting down any existing connection
|
|
343
441
|
try:
|
|
344
442
|
requests.get(f"{BASE_URL}/v2/system/terminal/shutdown")
|
|
345
443
|
except Exception:
|
|
346
444
|
pass
|
|
347
445
|
|
|
348
|
-
|
|
446
|
+
# Create creds.txt file to avoid passing password with special characters on command line
|
|
447
|
+
# This is the official ThetaData method and avoids shell escaping issues
|
|
448
|
+
# Security note: creds.txt with 0o600 permissions is MORE secure than command-line args
|
|
449
|
+
# which can be seen in process lists. Similar security profile to .env files.
|
|
450
|
+
theta_dir = Path.home() / "ThetaData" / "ThetaTerminal"
|
|
451
|
+
theta_dir.mkdir(parents=True, exist_ok=True)
|
|
452
|
+
creds_file = theta_dir / "creds.txt"
|
|
453
|
+
|
|
454
|
+
# IDEMPOTENT WRITE: Only write credentials if file doesn't exist or username changed
|
|
455
|
+
# This prevents overwriting production credentials with test credentials
|
|
456
|
+
should_write = False
|
|
457
|
+
if not creds_file.exists():
|
|
458
|
+
logger.info(f"Creating new creds.txt file at {creds_file}")
|
|
459
|
+
should_write = True
|
|
460
|
+
else:
|
|
461
|
+
# Check if username changed
|
|
462
|
+
try:
|
|
463
|
+
with open(creds_file, 'r') as f:
|
|
464
|
+
existing_username = f.readline().strip()
|
|
465
|
+
if existing_username != username:
|
|
466
|
+
logger.info(f"Username changed from {existing_username} to {username}, updating creds.txt")
|
|
467
|
+
should_write = True
|
|
468
|
+
else:
|
|
469
|
+
logger.debug(f"Using existing creds.txt for {username}")
|
|
470
|
+
except Exception as e:
|
|
471
|
+
logger.warning(f"Could not read existing creds.txt: {e}, will recreate")
|
|
472
|
+
should_write = True
|
|
473
|
+
|
|
474
|
+
if should_write:
|
|
475
|
+
# Write credentials to creds.txt (format: email on first line, password on second line)
|
|
476
|
+
with open(creds_file, 'w') as f:
|
|
477
|
+
f.write(f"{username}\n")
|
|
478
|
+
f.write(f"{password}\n")
|
|
479
|
+
|
|
480
|
+
# Set restrictive permissions on creds file (owner read/write only)
|
|
481
|
+
# This prevents other users on the system from reading the credentials
|
|
482
|
+
os.chmod(creds_file, 0o600)
|
|
483
|
+
|
|
484
|
+
logger.info(f"Updated creds.txt file for user: {username}")
|
|
485
|
+
|
|
486
|
+
# Launch ThetaTerminal directly with --creds-file to avoid shell escaping issues
|
|
487
|
+
# We bypass the thetadata library's launcher which doesn't support this option
|
|
488
|
+
# and has shell escaping bugs with special characters in passwords
|
|
489
|
+
|
|
490
|
+
# Verify Java is available
|
|
491
|
+
if not shutil.which("java"):
|
|
492
|
+
raise RuntimeError("Java is not installed. Please install Java 11+ to use ThetaData.")
|
|
493
|
+
|
|
494
|
+
# Find ThetaTerminal.jar
|
|
495
|
+
jar_file = theta_dir / "ThetaTerminal.jar"
|
|
496
|
+
if not jar_file.exists():
|
|
497
|
+
# Copy ThetaTerminal.jar from lumibot package to user's ThetaData directory
|
|
498
|
+
logger.info("ThetaTerminal.jar not found, copying from lumibot package...")
|
|
499
|
+
import shutil as shutil_copy
|
|
500
|
+
|
|
501
|
+
# Find the bundled jar file in the lumibot package
|
|
502
|
+
lumibot_jar = Path(__file__).parent.parent.parent / "ThetaTerminal.jar"
|
|
503
|
+
|
|
504
|
+
if lumibot_jar.exists():
|
|
505
|
+
logger.info(f"Copying ThetaTerminal.jar from {lumibot_jar} to {jar_file}")
|
|
506
|
+
shutil_copy.copy2(lumibot_jar, jar_file)
|
|
507
|
+
logger.info(f"Successfully copied ThetaTerminal.jar to {jar_file}")
|
|
508
|
+
else:
|
|
509
|
+
raise FileNotFoundError(
|
|
510
|
+
f"ThetaTerminal.jar not found at {lumibot_jar}. "
|
|
511
|
+
f"Please ensure ThetaTerminal.jar is included in the lumibot package, "
|
|
512
|
+
f"or manually place it at {jar_file}"
|
|
513
|
+
)
|
|
514
|
+
|
|
515
|
+
if not jar_file.exists():
|
|
516
|
+
raise FileNotFoundError(f"ThetaTerminal.jar not found at {jar_file}")
|
|
517
|
+
|
|
518
|
+
# Launch ThetaTerminal with --creds-file argument (no credentials on command line)
|
|
519
|
+
# This avoids all shell escaping issues and is the recommended approach
|
|
520
|
+
cmd = ["java", "-jar", str(jar_file), "--creds-file", str(creds_file)]
|
|
349
521
|
|
|
350
|
-
|
|
522
|
+
logger.info(f"Launching ThetaTerminal with creds file: {cmd}")
|
|
351
523
|
|
|
352
|
-
|
|
524
|
+
# Launch in background and store process handle
|
|
525
|
+
THETA_DATA_PROCESS = subprocess.Popen(
|
|
526
|
+
cmd,
|
|
527
|
+
stdout=subprocess.PIPE,
|
|
528
|
+
stderr=subprocess.PIPE,
|
|
529
|
+
cwd=str(theta_dir)
|
|
530
|
+
)
|
|
531
|
+
THETA_DATA_PID = THETA_DATA_PROCESS.pid
|
|
532
|
+
logger.info(f"ThetaTerminal started with PID: {THETA_DATA_PID}")
|
|
533
|
+
|
|
534
|
+
# Give it a moment to start
|
|
535
|
+
time.sleep(2)
|
|
536
|
+
|
|
537
|
+
# We don't return a ThetaClient object since we're launching manually
|
|
538
|
+
# The connection will be established via HTTP/WebSocket to localhost:25510
|
|
539
|
+
return THETA_DATA_PROCESS
|
|
353
540
|
|
|
354
541
|
|
|
355
542
|
def check_connection(username: str, password: str):
|
|
@@ -358,26 +545,40 @@ def check_connection(username: str, password: str):
|
|
|
358
545
|
counter = 0
|
|
359
546
|
client = None
|
|
360
547
|
connected = False
|
|
548
|
+
|
|
361
549
|
while True:
|
|
550
|
+
# FIRST: Check if already connected (most important check!)
|
|
551
|
+
# This prevents unnecessary restarts that would overwrite creds.txt
|
|
362
552
|
try:
|
|
363
|
-
time.sleep(0.5)
|
|
364
553
|
res = requests.get(f"{BASE_URL}/v2/system/mdds/status", timeout=1)
|
|
365
554
|
con_text = res.text
|
|
366
555
|
|
|
367
556
|
if con_text == "CONNECTED":
|
|
368
|
-
logger.debug("
|
|
557
|
+
logger.debug("Already connected to Theta Data!")
|
|
369
558
|
connected = True
|
|
370
559
|
break
|
|
371
560
|
elif con_text == "DISCONNECTED":
|
|
372
|
-
logger.debug("Disconnected from Theta Data
|
|
373
|
-
|
|
561
|
+
logger.debug("Disconnected from Theta Data, will attempt to start...")
|
|
562
|
+
# Fall through to process check and restart logic
|
|
374
563
|
else:
|
|
375
|
-
logger.
|
|
376
|
-
|
|
377
|
-
counter += 1
|
|
564
|
+
logger.debug(f"Unknown connection status: {con_text}")
|
|
565
|
+
# Fall through to process check and restart logic
|
|
378
566
|
except Exception as e:
|
|
567
|
+
# Connection endpoint not responding - process might be dead
|
|
568
|
+
logger.debug(f"Cannot reach ThetaData status endpoint: {e}")
|
|
569
|
+
# Fall through to process check and restart logic
|
|
570
|
+
|
|
571
|
+
# SECOND: Check if the Java process is still alive
|
|
572
|
+
if not is_process_alive():
|
|
573
|
+
logger.warning("ThetaTerminal process is not running, starting...")
|
|
379
574
|
client = start_theta_data_client(username=username, password=password)
|
|
380
575
|
counter += 1
|
|
576
|
+
time.sleep(0.5)
|
|
577
|
+
continue
|
|
578
|
+
|
|
579
|
+
# THIRD: Process is alive but not connected - wait and retry
|
|
580
|
+
time.sleep(0.5)
|
|
581
|
+
counter += 1
|
|
381
582
|
|
|
382
583
|
if counter > MAX_RETRIES:
|
|
383
584
|
logger.error("Cannot connect to Theta Data!")
|
|
@@ -387,41 +588,211 @@ def check_connection(username: str, password: str):
|
|
|
387
588
|
|
|
388
589
|
|
|
389
590
|
def get_request(url: str, headers: dict, querystring: dict, username: str, password: str):
|
|
390
|
-
|
|
591
|
+
all_responses = []
|
|
592
|
+
next_page_url = None
|
|
593
|
+
page_count = 0
|
|
594
|
+
|
|
391
595
|
while True:
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
#
|
|
401
|
-
if
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
logger.error(
|
|
409
|
-
f"Error getting data from Theta Data: {json_resp['header']['error_type']},\nquerystring: {querystring}")
|
|
410
|
-
check_connection(username=username, password=password)
|
|
596
|
+
counter = 0
|
|
597
|
+
# Use next_page URL if available, otherwise use original URL with querystring
|
|
598
|
+
request_url = next_page_url if next_page_url else url
|
|
599
|
+
request_params = None if next_page_url else querystring
|
|
600
|
+
|
|
601
|
+
while True:
|
|
602
|
+
try:
|
|
603
|
+
response = requests.get(request_url, headers=headers, params=request_params)
|
|
604
|
+
# Status code 472 means "No data" - this is valid, return None
|
|
605
|
+
if response.status_code == 472:
|
|
606
|
+
logger.warning(f"No data available for request: {response.text[:200]}")
|
|
607
|
+
return None
|
|
608
|
+
# If status code is not 200, then we are not connected
|
|
609
|
+
elif response.status_code != 200:
|
|
610
|
+
logger.warning(f"Non-200 status code {response.status_code}: {response.text[:200]}")
|
|
611
|
+
check_connection(username=username, password=password)
|
|
411
612
|
else:
|
|
412
|
-
|
|
613
|
+
json_resp = response.json()
|
|
614
|
+
|
|
615
|
+
# Check if json_resp has error_type inside of header
|
|
616
|
+
if "error_type" in json_resp["header"] and json_resp["header"]["error_type"] != "null":
|
|
617
|
+
# Handle "NO_DATA" error
|
|
618
|
+
if json_resp["header"]["error_type"] == "NO_DATA":
|
|
619
|
+
logger.warning(
|
|
620
|
+
f"No data returned for querystring: {querystring}")
|
|
621
|
+
return None
|
|
622
|
+
else:
|
|
623
|
+
logger.error(
|
|
624
|
+
f"Error getting data from Theta Data: {json_resp['header']['error_type']},\nquerystring: {querystring}")
|
|
625
|
+
check_connection(username=username, password=password)
|
|
626
|
+
else:
|
|
627
|
+
break
|
|
413
628
|
|
|
414
|
-
|
|
415
|
-
|
|
629
|
+
except Exception as e:
|
|
630
|
+
logger.warning(f"Exception during request (attempt {counter + 1}): {e}")
|
|
631
|
+
check_connection(username=username, password=password)
|
|
632
|
+
# Give the process time to start after restart
|
|
633
|
+
if counter == 0:
|
|
634
|
+
logger.info("Waiting 5 seconds for ThetaTerminal to initialize...")
|
|
635
|
+
time.sleep(5)
|
|
416
636
|
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
637
|
+
counter += 1
|
|
638
|
+
if counter > 1:
|
|
639
|
+
raise ValueError("Cannot connect to Theta Data!")
|
|
640
|
+
|
|
641
|
+
# Store this page's response data
|
|
642
|
+
page_count += 1
|
|
643
|
+
all_responses.append(json_resp["response"])
|
|
644
|
+
|
|
645
|
+
# Check for pagination - follow next_page if it exists
|
|
646
|
+
next_page = json_resp["header"].get("next_page")
|
|
647
|
+
if next_page and next_page != "null" and next_page != "":
|
|
648
|
+
logger.info(f"Following pagination: {page_count} page(s) downloaded, fetching next page...")
|
|
649
|
+
next_page_url = next_page
|
|
650
|
+
else:
|
|
651
|
+
# No more pages, we're done
|
|
652
|
+
break
|
|
653
|
+
|
|
654
|
+
# Merge all pages if we got multiple pages
|
|
655
|
+
if page_count > 1:
|
|
656
|
+
logger.info(f"Merged {page_count} pages from ThetaData ({sum(len(r) for r in all_responses)} total rows)")
|
|
657
|
+
json_resp["response"] = []
|
|
658
|
+
for page_response in all_responses:
|
|
659
|
+
json_resp["response"].extend(page_response)
|
|
420
660
|
|
|
421
661
|
return json_resp
|
|
422
662
|
|
|
423
663
|
|
|
424
|
-
def
|
|
664
|
+
def get_historical_eod_data(asset: Asset, start_dt: datetime, end_dt: datetime, username: str, password: str, datastyle: str = "ohlc"):
|
|
665
|
+
"""
|
|
666
|
+
Get EOD (End of Day) data from ThetaData using the /v2/hist/{asset_type}/eod endpoint.
|
|
667
|
+
|
|
668
|
+
This endpoint provides official daily OHLC that includes the 16:00 closing auction
|
|
669
|
+
and follows SIP sale-condition rules, matching Polygon and Yahoo Finance exactly.
|
|
670
|
+
|
|
671
|
+
NOTE: ThetaData's EOD endpoint has been found to return incorrect open prices for stocks
|
|
672
|
+
that don't match Polygon/Yahoo. We fix this by using the first minute bar's open price.
|
|
673
|
+
Indexes don't have this issue since they are calculated values.
|
|
674
|
+
|
|
675
|
+
Parameters
|
|
676
|
+
----------
|
|
677
|
+
asset : Asset
|
|
678
|
+
The asset we are getting data for
|
|
679
|
+
start_dt : datetime
|
|
680
|
+
The start date for the data we want
|
|
681
|
+
end_dt : datetime
|
|
682
|
+
The end date for the data we want
|
|
683
|
+
username : str
|
|
684
|
+
Your ThetaData username
|
|
685
|
+
password : str
|
|
686
|
+
Your ThetaData password
|
|
687
|
+
datastyle : str
|
|
688
|
+
The style of data to retrieve (default "ohlc")
|
|
689
|
+
|
|
690
|
+
Returns
|
|
691
|
+
-------
|
|
692
|
+
pd.DataFrame
|
|
693
|
+
A DataFrame with EOD data for the asset
|
|
694
|
+
"""
|
|
695
|
+
# Convert start and end dates to strings
|
|
696
|
+
start_date = start_dt.strftime("%Y%m%d")
|
|
697
|
+
end_date = end_dt.strftime("%Y%m%d")
|
|
698
|
+
|
|
699
|
+
# Use v2 EOD API endpoint (supports stock, index, option)
|
|
700
|
+
url = f"{BASE_URL}/v2/hist/{asset.asset_type}/eod"
|
|
701
|
+
|
|
702
|
+
querystring = {
|
|
703
|
+
"root": asset.symbol,
|
|
704
|
+
"start_date": start_date,
|
|
705
|
+
"end_date": end_date
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
# For options, add strike, expiration, and right parameters
|
|
709
|
+
if asset.asset_type == "option":
|
|
710
|
+
expiration_str = asset.expiration.strftime("%Y%m%d")
|
|
711
|
+
strike = int(asset.strike * 1000)
|
|
712
|
+
querystring["exp"] = expiration_str
|
|
713
|
+
querystring["strike"] = strike
|
|
714
|
+
querystring["right"] = "C" if asset.right == "CALL" else "P"
|
|
715
|
+
|
|
716
|
+
headers = {"Accept": "application/json"}
|
|
717
|
+
|
|
718
|
+
# Send the request
|
|
719
|
+
json_resp = get_request(url=url, headers=headers, querystring=querystring,
|
|
720
|
+
username=username, password=password)
|
|
721
|
+
if json_resp is None:
|
|
722
|
+
return None
|
|
723
|
+
|
|
724
|
+
# Convert to pandas dataframe
|
|
725
|
+
df = pd.DataFrame(json_resp["response"], columns=json_resp["header"]["format"])
|
|
726
|
+
|
|
727
|
+
if df is None or df.empty:
|
|
728
|
+
return df
|
|
729
|
+
|
|
730
|
+
# Function to combine ms_of_day and date into datetime
|
|
731
|
+
def combine_datetime(row):
|
|
732
|
+
# Ensure the date is in integer format and then convert to string
|
|
733
|
+
date_str = str(int(row["date"]))
|
|
734
|
+
base_date = datetime.strptime(date_str, "%Y%m%d")
|
|
735
|
+
# EOD reports are normalized at ~17:15 ET but represent the trading day
|
|
736
|
+
# We use midnight of the trading day as the timestamp (consistent with daily bars)
|
|
737
|
+
return base_date
|
|
738
|
+
|
|
739
|
+
# Apply the function to each row to create a new datetime column
|
|
740
|
+
datetime_combined = df.apply(combine_datetime, axis=1)
|
|
741
|
+
|
|
742
|
+
# Assign the newly created datetime column
|
|
743
|
+
df = df.assign(datetime=datetime_combined)
|
|
744
|
+
|
|
745
|
+
# Convert the datetime column to a datetime and localize to UTC
|
|
746
|
+
df["datetime"] = pd.to_datetime(df["datetime"])
|
|
747
|
+
df["datetime"] = df["datetime"].dt.tz_localize("UTC")
|
|
748
|
+
|
|
749
|
+
# Set datetime as the index
|
|
750
|
+
df = df.set_index("datetime")
|
|
751
|
+
|
|
752
|
+
# Drop the ms_of_day, ms_of_day2, and date columns (not needed for daily bars)
|
|
753
|
+
df = df.drop(columns=["ms_of_day", "ms_of_day2", "date"], errors='ignore')
|
|
754
|
+
|
|
755
|
+
# Drop bid/ask columns if present (EOD includes NBBO but we only need OHLC)
|
|
756
|
+
df = df.drop(columns=["bid_size", "bid_exchange", "bid", "bid_condition",
|
|
757
|
+
"ask_size", "ask_exchange", "ask", "ask_condition"], errors='ignore')
|
|
758
|
+
|
|
759
|
+
# FIX: ThetaData's EOD endpoint returns incorrect open/high/low prices for STOCKS and OPTIONS
|
|
760
|
+
# that don't match Polygon/Yahoo. We fix this by using minute bar data.
|
|
761
|
+
# Solution: Fetch minute bars for each trading day and aggregate to get correct OHLC
|
|
762
|
+
# NOTE: Indexes don't need this fix since they are calculated values, not traded securities
|
|
763
|
+
if asset.asset_type in ["stock", "option"]:
|
|
764
|
+
logger.info(f"Fetching 9:30 AM minute bars to correct EOD open prices...")
|
|
765
|
+
|
|
766
|
+
# Get minute data for the date range to extract 9:30 AM opens
|
|
767
|
+
minute_df = get_historical_data(
|
|
768
|
+
asset=asset,
|
|
769
|
+
start_dt=start_dt,
|
|
770
|
+
end_dt=end_dt,
|
|
771
|
+
ivl=60000, # 1 minute
|
|
772
|
+
username=username,
|
|
773
|
+
password=password,
|
|
774
|
+
datastyle=datastyle,
|
|
775
|
+
include_after_hours=False # RTH only
|
|
776
|
+
)
|
|
777
|
+
|
|
778
|
+
if minute_df is not None and not minute_df.empty:
|
|
779
|
+
# Group by date and get the first bar's open for each day
|
|
780
|
+
minute_df_copy = minute_df.copy()
|
|
781
|
+
minute_df_copy['date'] = minute_df_copy.index.date
|
|
782
|
+
|
|
783
|
+
# For each date in df, find the corresponding 9:30 AM open from minute data
|
|
784
|
+
for idx in df.index:
|
|
785
|
+
trade_date = idx.date()
|
|
786
|
+
day_minutes = minute_df_copy[minute_df_copy['date'] == trade_date]
|
|
787
|
+
if len(day_minutes) > 0:
|
|
788
|
+
# Use the first minute bar's open (9:30 AM opening auction)
|
|
789
|
+
correct_open = day_minutes.iloc[0]['open']
|
|
790
|
+
df.loc[idx, 'open'] = correct_open
|
|
791
|
+
|
|
792
|
+
return df
|
|
793
|
+
|
|
794
|
+
|
|
795
|
+
def get_historical_data(asset: Asset, start_dt: datetime, end_dt: datetime, ivl: int, username: str, password: str, datastyle:str = "ohlc", include_after_hours: bool = True):
|
|
425
796
|
"""
|
|
426
797
|
Get data from ThetaData
|
|
427
798
|
|
|
@@ -439,6 +810,10 @@ def get_historical_data(asset: Asset, start_dt: datetime, end_dt: datetime, ivl:
|
|
|
439
810
|
Your ThetaData username
|
|
440
811
|
password : str
|
|
441
812
|
Your ThetaData password
|
|
813
|
+
datastyle : str
|
|
814
|
+
The style of data to retrieve ("ohlc" or "quote")
|
|
815
|
+
include_after_hours : bool
|
|
816
|
+
Whether to include after-hours trading data (default True)
|
|
442
817
|
|
|
443
818
|
Returns
|
|
444
819
|
-------
|
|
@@ -450,8 +825,8 @@ def get_historical_data(asset: Asset, start_dt: datetime, end_dt: datetime, ivl:
|
|
|
450
825
|
start_date = start_dt.strftime("%Y%m%d")
|
|
451
826
|
end_date = end_dt.strftime("%Y%m%d")
|
|
452
827
|
|
|
453
|
-
#
|
|
454
|
-
url = f"{BASE_URL}/hist/{asset.asset_type}/{datastyle}"
|
|
828
|
+
# Use v2 API for ALL asset types
|
|
829
|
+
url = f"{BASE_URL}/v2/hist/{asset.asset_type}/{datastyle}"
|
|
455
830
|
|
|
456
831
|
if asset.asset_type == "option":
|
|
457
832
|
# Convert the expiration date to a string
|
|
@@ -468,10 +843,30 @@ def get_historical_data(asset: Asset, start_dt: datetime, end_dt: datetime, ivl:
|
|
|
468
843
|
"strike": strike, # "140000",
|
|
469
844
|
"exp": expiration_str, # "20220930",
|
|
470
845
|
"right": "C" if asset.right == "CALL" else "P",
|
|
471
|
-
|
|
846
|
+
# include_after_hours=True means extended hours (rth=false)
|
|
847
|
+
# include_after_hours=False means regular hours only (rth=true)
|
|
848
|
+
"rth": "false" if include_after_hours else "true"
|
|
849
|
+
}
|
|
850
|
+
elif asset.asset_type == "index":
|
|
851
|
+
# For indexes (SPX, VIX, etc.), don't use rth parameter
|
|
852
|
+
# Indexes are calculated values, not traded securities
|
|
853
|
+
querystring = {
|
|
854
|
+
"root": asset.symbol,
|
|
855
|
+
"start_date": start_date,
|
|
856
|
+
"end_date": end_date,
|
|
857
|
+
"ivl": ivl
|
|
472
858
|
}
|
|
473
859
|
else:
|
|
474
|
-
|
|
860
|
+
# For stocks, respect include_after_hours parameter
|
|
861
|
+
# rth=false means extended hours (pre-market + regular + after-hours)
|
|
862
|
+
# rth=true means 9:30 AM - 4:00 PM ET (regular market hours only)
|
|
863
|
+
querystring = {
|
|
864
|
+
"root": asset.symbol,
|
|
865
|
+
"start_date": start_date,
|
|
866
|
+
"end_date": end_date,
|
|
867
|
+
"ivl": ivl,
|
|
868
|
+
"rth": "false" if include_after_hours else "true"
|
|
869
|
+
}
|
|
475
870
|
|
|
476
871
|
headers = {"Accept": "application/json"}
|
|
477
872
|
|
|
@@ -486,9 +881,11 @@ def get_historical_data(asset: Asset, start_dt: datetime, end_dt: datetime, ivl:
|
|
|
486
881
|
df = pd.DataFrame(json_resp["response"], columns=json_resp["header"]["format"])
|
|
487
882
|
|
|
488
883
|
# Remove any rows where count is 0 (no data - the prices will be 0 at these times too)
|
|
884
|
+
# NOTE: Indexes always have count=0 since they're calculated values, not traded securities
|
|
489
885
|
if "quote" in datastyle.lower():
|
|
490
886
|
df = df[(df["bid_size"] != 0) | (df["ask_size"] != 0)]
|
|
491
|
-
|
|
887
|
+
elif asset.asset_type != "index":
|
|
888
|
+
# Don't filter indexes by count - they're always 0
|
|
492
889
|
df = df[df["count"] != 0]
|
|
493
890
|
|
|
494
891
|
if df is None or df.empty:
|
|
@@ -499,7 +896,7 @@ def get_historical_data(asset: Asset, start_dt: datetime, end_dt: datetime, ivl:
|
|
|
499
896
|
# Ensure the date is in integer format and then convert to string
|
|
500
897
|
date_str = str(int(row["date"]))
|
|
501
898
|
base_date = datetime.strptime(date_str, "%Y%m%d")
|
|
502
|
-
#
|
|
899
|
+
# v2 API returns correct start-stamped bars - no adjustment needed
|
|
503
900
|
datetime_value = base_date + timedelta(milliseconds=int(row["ms_of_day"]))
|
|
504
901
|
return datetime_value
|
|
505
902
|
|
|
@@ -511,11 +908,17 @@ def get_historical_data(asset: Asset, start_dt: datetime, end_dt: datetime, ivl:
|
|
|
511
908
|
# Assign the newly created datetime column
|
|
512
909
|
df = df.assign(datetime=datetime_combined)
|
|
513
910
|
|
|
514
|
-
# Convert the datetime column to a datetime
|
|
911
|
+
# Convert the datetime column to a datetime and localize to Eastern Time
|
|
515
912
|
df["datetime"] = pd.to_datetime(df["datetime"])
|
|
516
913
|
|
|
914
|
+
# Localize to Eastern Time (ThetaData returns times in ET)
|
|
915
|
+
df["datetime"] = df["datetime"].dt.tz_localize("America/New_York")
|
|
916
|
+
|
|
917
|
+
# Set datetime as the index
|
|
918
|
+
df = df.set_index("datetime")
|
|
919
|
+
|
|
517
920
|
# Drop the ms_of_day and date columns
|
|
518
|
-
df = df.drop(columns=["ms_of_day", "date"])
|
|
921
|
+
df = df.drop(columns=["ms_of_day", "date"], errors='ignore')
|
|
519
922
|
|
|
520
923
|
return df
|
|
521
924
|
|
|
@@ -538,8 +941,8 @@ def get_expirations(username: str, password: str, ticker: str, after_date: date)
|
|
|
538
941
|
list[str]
|
|
539
942
|
A list of expiration dates for the given ticker
|
|
540
943
|
"""
|
|
541
|
-
#
|
|
542
|
-
url = f"{BASE_URL}/list/expirations"
|
|
944
|
+
# Use v2 API endpoint
|
|
945
|
+
url = f"{BASE_URL}/v2/list/expirations"
|
|
543
946
|
|
|
544
947
|
querystring = {"root": ticker}
|
|
545
948
|
|
|
@@ -592,8 +995,8 @@ def get_strikes(username: str, password: str, ticker: str, expiration: datetime)
|
|
|
592
995
|
list[float]
|
|
593
996
|
A list of strike prices for the given ticker and expiration date
|
|
594
997
|
"""
|
|
595
|
-
#
|
|
596
|
-
url = f"{BASE_URL}/list/strikes"
|
|
998
|
+
# Use v2 API endpoint
|
|
999
|
+
url = f"{BASE_URL}/v2/list/strikes"
|
|
597
1000
|
|
|
598
1001
|
# Convert the expiration date to a string
|
|
599
1002
|
expiration_str = expiration.strftime("%Y%m%d")
|
|
@@ -615,3 +1018,118 @@ def get_strikes(username: str, password: str, ticker: str, expiration: datetime)
|
|
|
615
1018
|
strikes = [x / 1000.0 for x in strikes]
|
|
616
1019
|
|
|
617
1020
|
return strikes
|
|
1021
|
+
|
|
1022
|
+
|
|
1023
|
+
def get_chains_cached(
|
|
1024
|
+
username: str,
|
|
1025
|
+
password: str,
|
|
1026
|
+
asset: Asset,
|
|
1027
|
+
current_date: date = None
|
|
1028
|
+
) -> dict:
|
|
1029
|
+
"""
|
|
1030
|
+
Retrieve option chain with caching (MATCHES POLYGON PATTERN).
|
|
1031
|
+
|
|
1032
|
+
This function follows the EXACT same caching strategy as Polygon:
|
|
1033
|
+
1. Check cache: LUMIBOT_CACHE_FOLDER/thetadata/option_chains/{symbol}_{date}.parquet
|
|
1034
|
+
2. Reuse files within RECENT_FILE_TOLERANCE_DAYS (default 7 days)
|
|
1035
|
+
3. If not found, fetch from ThetaData and save to cache
|
|
1036
|
+
4. Use pyarrow engine with snappy compression
|
|
1037
|
+
|
|
1038
|
+
Parameters
|
|
1039
|
+
----------
|
|
1040
|
+
username : str
|
|
1041
|
+
ThetaData username
|
|
1042
|
+
password : str
|
|
1043
|
+
ThetaData password
|
|
1044
|
+
asset : Asset
|
|
1045
|
+
Underlying asset (e.g., Asset("SPY"))
|
|
1046
|
+
current_date : date
|
|
1047
|
+
Historical date for backtest (required)
|
|
1048
|
+
|
|
1049
|
+
Returns
|
|
1050
|
+
-------
|
|
1051
|
+
dict : {
|
|
1052
|
+
"Multiplier": 100,
|
|
1053
|
+
"Exchange": "SMART",
|
|
1054
|
+
"Chains": {
|
|
1055
|
+
"CALL": {"2025-09-19": [140.0, 145.0, ...], ...},
|
|
1056
|
+
"PUT": {"2025-09-19": [140.0, 145.0, ...], ...}
|
|
1057
|
+
}
|
|
1058
|
+
}
|
|
1059
|
+
"""
|
|
1060
|
+
from collections import defaultdict
|
|
1061
|
+
|
|
1062
|
+
logger.debug(f"get_chains_cached called for {asset.symbol} on {current_date}")
|
|
1063
|
+
|
|
1064
|
+
# 1) If current_date is None => bail out
|
|
1065
|
+
if current_date is None:
|
|
1066
|
+
logger.debug("No current_date provided; returning None.")
|
|
1067
|
+
return None
|
|
1068
|
+
|
|
1069
|
+
# 2) Build cache folder path
|
|
1070
|
+
chain_folder = Path(LUMIBOT_CACHE_FOLDER) / "thetadata" / "option_chains"
|
|
1071
|
+
chain_folder.mkdir(parents=True, exist_ok=True)
|
|
1072
|
+
|
|
1073
|
+
# 3) Check for recent cached file (within RECENT_FILE_TOLERANCE_DAYS)
|
|
1074
|
+
RECENT_FILE_TOLERANCE_DAYS = 7
|
|
1075
|
+
earliest_okay_date = current_date - timedelta(days=RECENT_FILE_TOLERANCE_DAYS)
|
|
1076
|
+
pattern = f"{asset.symbol}_*.parquet"
|
|
1077
|
+
potential_files = sorted(chain_folder.glob(pattern), reverse=True)
|
|
1078
|
+
|
|
1079
|
+
for fpath in potential_files:
|
|
1080
|
+
fname = fpath.stem # e.g., "SPY_2025-09-15"
|
|
1081
|
+
parts = fname.split("_", maxsplit=1)
|
|
1082
|
+
if len(parts) != 2:
|
|
1083
|
+
continue
|
|
1084
|
+
file_symbol, date_str = parts
|
|
1085
|
+
if file_symbol != asset.symbol:
|
|
1086
|
+
continue
|
|
1087
|
+
|
|
1088
|
+
try:
|
|
1089
|
+
file_date = date.fromisoformat(date_str)
|
|
1090
|
+
except ValueError:
|
|
1091
|
+
continue
|
|
1092
|
+
|
|
1093
|
+
# If file is recent enough, reuse it
|
|
1094
|
+
if earliest_okay_date <= file_date <= current_date:
|
|
1095
|
+
logger.debug(f"Reusing chain file {fpath} (file_date={file_date})")
|
|
1096
|
+
df_cached = pd.read_parquet(fpath, engine='pyarrow')
|
|
1097
|
+
|
|
1098
|
+
# Convert back to dict with lists (not numpy arrays)
|
|
1099
|
+
data = df_cached["data"][0]
|
|
1100
|
+
for right in data["Chains"]:
|
|
1101
|
+
for exp_date in data["Chains"][right]:
|
|
1102
|
+
data["Chains"][right][exp_date] = list(data["Chains"][right][exp_date])
|
|
1103
|
+
|
|
1104
|
+
return data
|
|
1105
|
+
|
|
1106
|
+
# 4) No suitable file => fetch from ThetaData
|
|
1107
|
+
logger.debug(f"No suitable file found for {asset.symbol} on {current_date}. Downloading...")
|
|
1108
|
+
print(f"\nDownloading option chain for {asset} on {current_date}. This will be cached for future use.")
|
|
1109
|
+
|
|
1110
|
+
# Get expirations and strikes using existing functions
|
|
1111
|
+
expirations = get_expirations(username, password, asset.symbol, current_date)
|
|
1112
|
+
|
|
1113
|
+
chains_dict = {
|
|
1114
|
+
"Multiplier": 100,
|
|
1115
|
+
"Exchange": "SMART",
|
|
1116
|
+
"Chains": {
|
|
1117
|
+
"CALL": defaultdict(list),
|
|
1118
|
+
"PUT": defaultdict(list)
|
|
1119
|
+
}
|
|
1120
|
+
}
|
|
1121
|
+
|
|
1122
|
+
for expiration_str in expirations:
|
|
1123
|
+
expiration = date.fromisoformat(expiration_str)
|
|
1124
|
+
strikes = get_strikes(username, password, asset.symbol, expiration)
|
|
1125
|
+
|
|
1126
|
+
chains_dict["Chains"]["CALL"][expiration_str] = sorted(strikes)
|
|
1127
|
+
chains_dict["Chains"]["PUT"][expiration_str] = sorted(strikes)
|
|
1128
|
+
|
|
1129
|
+
# 5) Save to cache file for future reuse
|
|
1130
|
+
cache_file = chain_folder / f"{asset.symbol}_{current_date.isoformat()}.parquet"
|
|
1131
|
+
df_to_cache = pd.DataFrame({"data": [chains_dict]})
|
|
1132
|
+
df_to_cache.to_parquet(cache_file, compression='snappy', engine='pyarrow')
|
|
1133
|
+
logger.debug(f"Saved chain cache: {cache_file}")
|
|
1134
|
+
|
|
1135
|
+
return chains_dict
|