lumibot 4.1.3__py3-none-any.whl → 4.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lumibot might be problematic. Click here for more details.
- lumibot/backtesting/__init__.py +19 -5
- lumibot/backtesting/backtesting_broker.py +98 -18
- lumibot/backtesting/databento_backtesting.py +5 -686
- lumibot/backtesting/databento_backtesting_pandas.py +738 -0
- lumibot/backtesting/databento_backtesting_polars.py +860 -546
- lumibot/backtesting/fix_debug.py +37 -0
- lumibot/backtesting/thetadata_backtesting.py +9 -355
- lumibot/backtesting/thetadata_backtesting_pandas.py +1178 -0
- lumibot/brokers/alpaca.py +8 -1
- lumibot/brokers/schwab.py +12 -2
- lumibot/credentials.py +13 -0
- lumibot/data_sources/__init__.py +5 -8
- lumibot/data_sources/data_source.py +6 -2
- lumibot/data_sources/data_source_backtesting.py +30 -0
- lumibot/data_sources/databento_data.py +5 -390
- lumibot/data_sources/databento_data_pandas.py +440 -0
- lumibot/data_sources/databento_data_polars.py +15 -9
- lumibot/data_sources/pandas_data.py +30 -17
- lumibot/data_sources/polars_data.py +986 -0
- lumibot/data_sources/polars_mixin.py +472 -96
- lumibot/data_sources/polygon_data_polars.py +5 -0
- lumibot/data_sources/yahoo_data.py +9 -2
- lumibot/data_sources/yahoo_data_polars.py +5 -0
- lumibot/entities/__init__.py +15 -0
- lumibot/entities/asset.py +5 -28
- lumibot/entities/bars.py +89 -20
- lumibot/entities/data.py +29 -6
- lumibot/entities/data_polars.py +668 -0
- lumibot/entities/position.py +38 -4
- lumibot/strategies/_strategy.py +2 -1
- lumibot/strategies/strategy.py +61 -49
- lumibot/tools/backtest_cache.py +284 -0
- lumibot/tools/databento_helper.py +35 -35
- lumibot/tools/databento_helper_polars.py +738 -775
- lumibot/tools/futures_roll.py +251 -0
- lumibot/tools/indicators.py +135 -104
- lumibot/tools/polars_utils.py +142 -0
- lumibot/tools/thetadata_helper.py +1068 -134
- {lumibot-4.1.3.dist-info → lumibot-4.2.0.dist-info}/METADATA +9 -1
- {lumibot-4.1.3.dist-info → lumibot-4.2.0.dist-info}/RECORD +71 -147
- tests/backtest/test_databento.py +37 -6
- tests/backtest/test_databento_comprehensive_trading.py +8 -4
- tests/backtest/test_databento_parity.py +4 -2
- tests/backtest/test_debug_avg_fill_price.py +1 -1
- tests/backtest/test_example_strategies.py +11 -1
- tests/backtest/test_futures_edge_cases.py +3 -3
- tests/backtest/test_futures_single_trade.py +2 -2
- tests/backtest/test_futures_ultra_simple.py +2 -2
- tests/backtest/test_polars_lru_eviction.py +470 -0
- tests/backtest/test_yahoo.py +42 -0
- tests/test_asset.py +4 -4
- tests/test_backtest_cache_manager.py +149 -0
- tests/test_backtesting_data_source_env.py +6 -0
- tests/test_continuous_futures_resolution.py +60 -48
- tests/test_data_polars_parity.py +160 -0
- tests/test_databento_asset_validation.py +23 -5
- tests/test_databento_backtesting.py +1 -1
- tests/test_databento_backtesting_polars.py +312 -192
- tests/test_databento_data.py +220 -463
- tests/test_databento_live.py +10 -10
- tests/test_futures_roll.py +38 -0
- tests/test_indicator_subplots.py +101 -0
- tests/test_market_infinite_loop_bug.py +77 -3
- tests/test_polars_resample.py +67 -0
- tests/test_polygon_helper.py +46 -0
- tests/test_thetadata_backwards_compat.py +97 -0
- tests/test_thetadata_helper.py +222 -23
- tests/test_thetadata_pandas_verification.py +186 -0
- lumibot/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/__pycache__/constants.cpython-312.pyc +0 -0
- lumibot/__pycache__/credentials.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/alpaca_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/alpha_vantage_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/backtesting_broker.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/ccxt_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/databento_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/interactive_brokers_rest_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/pandas_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/polygon_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/thetadata_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/yahoo_backtesting.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/alpaca.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/bitunix.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/broker.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/ccxt.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/example_broker.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/interactive_brokers.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/interactive_brokers_rest.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/projectx.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/schwab.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/tradier.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/tradovate.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/alpaca_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/alpha_vantage_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/bitunix_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/ccxt_backtesting_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/ccxt_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/data_source.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/data_source_backtesting.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/databento_data_polars_backtesting.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/databento_data_polars_live.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/example_broker_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/exceptions.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/interactive_brokers_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/interactive_brokers_rest_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/pandas_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/polars_mixin.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/polygon_data_polars.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/projectx_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/schwab_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/tradier_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/tradovate_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/yahoo_data_polars.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/asset.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/bar.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/bars.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/chains.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/data.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/dataline.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/order.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/position.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/quote.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/trading_fee.cpython-312.pyc +0 -0
- lumibot/example_strategies/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/example_strategies/__pycache__/test_broker_functions.cpython-312-pytest-8.4.1.pyc +0 -0
- lumibot/strategies/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/strategies/__pycache__/_strategy.cpython-312.pyc +0 -0
- lumibot/strategies/__pycache__/strategy.cpython-312.pyc +0 -0
- lumibot/strategies/__pycache__/strategy_executor.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/alpaca_helpers.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/bitunix_helpers.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/black_scholes.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/ccxt_data_store.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/databento_helper.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/databento_helper_polars.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/debugers.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/decorators.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/helpers.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/indicators.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/lumibot_logger.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/pandas.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/polygon_helper.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/polygon_helper_async.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/polygon_helper_polars_optimized.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/projectx_helpers.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/schwab_helper.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/thetadata_helper.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/types.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/yahoo_helper.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/yahoo_helper_polars_optimized.cpython-312.pyc +0 -0
- lumibot/traders/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/traders/__pycache__/trader.cpython-312.pyc +0 -0
- lumibot/trading_builtins/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/trading_builtins/__pycache__/custom_stream.cpython-312.pyc +0 -0
- lumibot/trading_builtins/__pycache__/safe_list.cpython-312.pyc +0 -0
- {lumibot-4.1.3.dist-info → lumibot-4.2.0.dist-info}/WHEEL +0 -0
- {lumibot-4.1.3.dist-info → lumibot-4.2.0.dist-info}/licenses/LICENSE +0 -0
- {lumibot-4.1.3.dist-info → lumibot-4.2.0.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
# This file contains helper functions for getting data from Polygon.io
|
|
2
2
|
import time
|
|
3
3
|
import os
|
|
4
|
-
|
|
4
|
+
import signal
|
|
5
|
+
from typing import Dict, List, Optional
|
|
6
|
+
from datetime import date, datetime, timedelta, timezone
|
|
5
7
|
from pathlib import Path
|
|
6
8
|
import pytz
|
|
7
9
|
import pandas as pd
|
|
@@ -11,6 +13,7 @@ from lumibot import LUMIBOT_CACHE_FOLDER, LUMIBOT_DEFAULT_PYTZ
|
|
|
11
13
|
from lumibot.tools.lumibot_logger import get_logger
|
|
12
14
|
from lumibot.entities import Asset
|
|
13
15
|
from tqdm import tqdm
|
|
16
|
+
from lumibot.tools.backtest_cache import CacheMode, get_backtest_cache
|
|
14
17
|
|
|
15
18
|
logger = get_logger(__name__)
|
|
16
19
|
|
|
@@ -18,10 +21,172 @@ WAIT_TIME = 60
|
|
|
18
21
|
MAX_DAYS = 30
|
|
19
22
|
CACHE_SUBFOLDER = "thetadata"
|
|
20
23
|
BASE_URL = "http://127.0.0.1:25510"
|
|
24
|
+
CONNECTION_RETRY_SLEEP = 1.0
|
|
25
|
+
CONNECTION_MAX_RETRIES = 60
|
|
26
|
+
BOOT_GRACE_PERIOD = 5.0
|
|
27
|
+
MAX_RESTART_ATTEMPTS = 3
|
|
21
28
|
|
|
22
29
|
# Global process tracking for ThetaTerminal
|
|
23
30
|
THETA_DATA_PROCESS = None
|
|
24
31
|
THETA_DATA_PID = None
|
|
32
|
+
THETA_DATA_LOG_HANDLE = None
|
|
33
|
+
|
|
34
|
+
def reset_connection_diagnostics():
|
|
35
|
+
"""Reset ThetaData connection counters (useful for tests)."""
|
|
36
|
+
CONNECTION_DIAGNOSTICS.update({
|
|
37
|
+
"check_connection_calls": 0,
|
|
38
|
+
"start_terminal_calls": 0,
|
|
39
|
+
"network_requests": 0,
|
|
40
|
+
"placeholder_writes": 0,
|
|
41
|
+
})
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def ensure_missing_column(df: Optional[pd.DataFrame]) -> Optional[pd.DataFrame]:
|
|
45
|
+
"""Ensure the dataframe includes a `missing` flag column (True for placeholders)."""
|
|
46
|
+
if df is None or len(df) == 0:
|
|
47
|
+
return df
|
|
48
|
+
if "missing" not in df.columns:
|
|
49
|
+
df["missing"] = False
|
|
50
|
+
logger.debug(
|
|
51
|
+
"[THETA][DEBUG][THETADATA-CACHE] added 'missing' column to frame (rows=%d)",
|
|
52
|
+
len(df),
|
|
53
|
+
)
|
|
54
|
+
return df
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def restore_numeric_dtypes(df: Optional[pd.DataFrame]) -> Optional[pd.DataFrame]:
|
|
58
|
+
"""Try to convert object columns back to numeric types after placeholder removal."""
|
|
59
|
+
if df is None or len(df) == 0:
|
|
60
|
+
return df
|
|
61
|
+
for column in df.columns:
|
|
62
|
+
if df[column].dtype == object:
|
|
63
|
+
try:
|
|
64
|
+
df[column] = pd.to_numeric(df[column])
|
|
65
|
+
except (ValueError, TypeError):
|
|
66
|
+
continue
|
|
67
|
+
return df
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def append_missing_markers(
|
|
71
|
+
df_all: Optional[pd.DataFrame],
|
|
72
|
+
missing_dates: List[datetime.date],
|
|
73
|
+
) -> Optional[pd.DataFrame]:
|
|
74
|
+
"""Append placeholder rows for dates that returned no data."""
|
|
75
|
+
if not missing_dates:
|
|
76
|
+
if df_all is not None and not df_all.empty and "missing" in df_all.columns:
|
|
77
|
+
df_all = df_all[~df_all["missing"].astype(bool)].drop(columns=["missing"])
|
|
78
|
+
df_all = restore_numeric_dtypes(df_all)
|
|
79
|
+
return df_all
|
|
80
|
+
|
|
81
|
+
base_columns = ["open", "high", "low", "close", "volume"]
|
|
82
|
+
|
|
83
|
+
if df_all is None or len(df_all) == 0:
|
|
84
|
+
df_all = pd.DataFrame(columns=base_columns + ["missing"])
|
|
85
|
+
df_all.index = pd.DatetimeIndex([], name="datetime")
|
|
86
|
+
|
|
87
|
+
df_all = ensure_missing_column(df_all)
|
|
88
|
+
|
|
89
|
+
rows = []
|
|
90
|
+
for d in missing_dates:
|
|
91
|
+
dt = datetime(d.year, d.month, d.day, tzinfo=pytz.UTC)
|
|
92
|
+
row = {col: pd.NA for col in df_all.columns if col != "missing"}
|
|
93
|
+
row["datetime"] = dt
|
|
94
|
+
row["missing"] = True
|
|
95
|
+
rows.append(row)
|
|
96
|
+
|
|
97
|
+
if rows:
|
|
98
|
+
CONNECTION_DIAGNOSTICS["placeholder_writes"] = CONNECTION_DIAGNOSTICS.get("placeholder_writes", 0) + len(rows)
|
|
99
|
+
|
|
100
|
+
# DEBUG-LOG: Placeholder injection
|
|
101
|
+
logger.info(
|
|
102
|
+
"[THETA][DEBUG][PLACEHOLDER][INJECT] count=%d dates=%s",
|
|
103
|
+
len(rows),
|
|
104
|
+
", ".join(sorted({d.isoformat() for d in missing_dates}))
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
placeholder_df = pd.DataFrame(rows).set_index("datetime")
|
|
108
|
+
for col in df_all.columns:
|
|
109
|
+
if col not in placeholder_df.columns:
|
|
110
|
+
placeholder_df[col] = pd.NA if col != "missing" else True
|
|
111
|
+
placeholder_df = placeholder_df[df_all.columns]
|
|
112
|
+
if len(df_all) == 0:
|
|
113
|
+
df_all = placeholder_df
|
|
114
|
+
else:
|
|
115
|
+
df_all = pd.concat([df_all, placeholder_df]).sort_index()
|
|
116
|
+
df_all = df_all[~df_all.index.duplicated(keep="last")]
|
|
117
|
+
logger.info(
|
|
118
|
+
"[THETA][DEBUG][THETADATA-CACHE] recorded %d placeholder day(s): %s",
|
|
119
|
+
len(rows),
|
|
120
|
+
", ".join(sorted({d.isoformat() for d in missing_dates})),
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
return df_all
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def remove_missing_markers(
|
|
127
|
+
df_all: Optional[pd.DataFrame],
|
|
128
|
+
available_dates: List[datetime.date],
|
|
129
|
+
) -> Optional[pd.DataFrame]:
|
|
130
|
+
"""Drop placeholder rows when real data becomes available."""
|
|
131
|
+
if df_all is None or len(df_all) == 0 or not available_dates:
|
|
132
|
+
return df_all
|
|
133
|
+
|
|
134
|
+
df_all = ensure_missing_column(df_all)
|
|
135
|
+
available_set = set(available_dates)
|
|
136
|
+
|
|
137
|
+
mask = df_all["missing"].eq(True) & df_all.index.map(
|
|
138
|
+
lambda ts: ts.date() in available_set
|
|
139
|
+
)
|
|
140
|
+
if mask.any():
|
|
141
|
+
removed_dates = sorted({ts.date().isoformat() for ts in df_all.index[mask]})
|
|
142
|
+
df_all = df_all.loc[~mask]
|
|
143
|
+
logger.info(
|
|
144
|
+
"[THETA][DEBUG][THETADATA-CACHE] cleared %d placeholder row(s) for dates: %s",
|
|
145
|
+
mask.sum(),
|
|
146
|
+
", ".join(removed_dates),
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
return df_all
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _clamp_option_end(asset: Asset, dt: datetime) -> datetime:
|
|
153
|
+
"""Ensure intraday pulls for options never extend beyond expiration."""
|
|
154
|
+
if isinstance(dt, datetime):
|
|
155
|
+
end_dt = dt
|
|
156
|
+
else:
|
|
157
|
+
end_dt = datetime.combine(dt, datetime.max.time())
|
|
158
|
+
|
|
159
|
+
if end_dt.tzinfo is None:
|
|
160
|
+
end_dt = end_dt.replace(tzinfo=pytz.UTC)
|
|
161
|
+
|
|
162
|
+
if asset.asset_type == "option" and asset.expiration:
|
|
163
|
+
expiration_dt = datetime.combine(asset.expiration, datetime.max.time())
|
|
164
|
+
expiration_dt = expiration_dt.replace(tzinfo=end_dt.tzinfo)
|
|
165
|
+
if end_dt > expiration_dt:
|
|
166
|
+
return expiration_dt
|
|
167
|
+
|
|
168
|
+
return end_dt
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def reset_theta_terminal_tracking():
|
|
172
|
+
"""Clear cached ThetaTerminal process references."""
|
|
173
|
+
global THETA_DATA_PROCESS, THETA_DATA_PID, THETA_DATA_LOG_HANDLE
|
|
174
|
+
THETA_DATA_PROCESS = None
|
|
175
|
+
THETA_DATA_PID = None
|
|
176
|
+
if THETA_DATA_LOG_HANDLE is not None:
|
|
177
|
+
try:
|
|
178
|
+
THETA_DATA_LOG_HANDLE.close()
|
|
179
|
+
except Exception:
|
|
180
|
+
pass
|
|
181
|
+
THETA_DATA_LOG_HANDLE = None
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
CONNECTION_DIAGNOSTICS = {
|
|
185
|
+
"check_connection_calls": 0,
|
|
186
|
+
"start_terminal_calls": 0,
|
|
187
|
+
"network_requests": 0,
|
|
188
|
+
"placeholder_writes": 0,
|
|
189
|
+
}
|
|
25
190
|
|
|
26
191
|
|
|
27
192
|
def get_price_data(
|
|
@@ -34,13 +199,17 @@ def get_price_data(
|
|
|
34
199
|
quote_asset: Asset = None,
|
|
35
200
|
dt=None,
|
|
36
201
|
datastyle: str = "ohlc",
|
|
37
|
-
include_after_hours: bool = True
|
|
38
|
-
|
|
202
|
+
include_after_hours: bool = True,
|
|
203
|
+
return_polars: bool = False
|
|
204
|
+
) -> Optional[pd.DataFrame]:
|
|
39
205
|
"""
|
|
40
206
|
Queries ThetaData for pricing data for the given asset and returns a DataFrame with the data. Data will be
|
|
41
207
|
cached in the LUMIBOT_CACHE_FOLDER/{CACHE_SUBFOLDER} folder so that it can be reused later and we don't have to query
|
|
42
208
|
ThetaData every time we run a backtest.
|
|
43
209
|
|
|
210
|
+
Returns pandas DataFrames for backwards compatibility. Polars output is not
|
|
211
|
+
currently supported; callers requesting polars will receive a ValueError.
|
|
212
|
+
|
|
44
213
|
Parameters
|
|
45
214
|
----------
|
|
46
215
|
username : str
|
|
@@ -62,35 +231,153 @@ def get_price_data(
|
|
|
62
231
|
The style of data to retrieve ("ohlc" or "quote")
|
|
63
232
|
include_after_hours : bool
|
|
64
233
|
Whether to include after-hours trading data (default True)
|
|
234
|
+
return_polars : bool
|
|
235
|
+
ThetaData currently supports pandas output only. Passing True raises a ValueError.
|
|
65
236
|
|
|
66
237
|
Returns
|
|
67
238
|
-------
|
|
68
|
-
pd.DataFrame
|
|
69
|
-
A DataFrame with the pricing data for the asset
|
|
239
|
+
Optional[pd.DataFrame]
|
|
240
|
+
A pandas DataFrame with the pricing data for the asset
|
|
70
241
|
|
|
71
242
|
"""
|
|
72
243
|
import pytz # Import at function level to avoid scope issues in nested calls
|
|
73
244
|
|
|
245
|
+
# DEBUG-LOG: Entry point for ThetaData request
|
|
246
|
+
logger.debug(
|
|
247
|
+
"[THETA][DEBUG][REQUEST][ENTRY] asset=%s quote=%s start=%s end=%s dt=%s timespan=%s datastyle=%s include_after_hours=%s return_polars=%s",
|
|
248
|
+
asset,
|
|
249
|
+
quote_asset,
|
|
250
|
+
start.isoformat() if hasattr(start, 'isoformat') else start,
|
|
251
|
+
end.isoformat() if hasattr(end, 'isoformat') else end,
|
|
252
|
+
dt.isoformat() if dt and hasattr(dt, 'isoformat') else dt,
|
|
253
|
+
timespan,
|
|
254
|
+
datastyle,
|
|
255
|
+
include_after_hours,
|
|
256
|
+
return_polars
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
if return_polars:
|
|
260
|
+
raise ValueError("ThetaData polars output is not available; pass return_polars=False.")
|
|
261
|
+
|
|
262
|
+
# Preserve original bounds for final filtering
|
|
263
|
+
requested_start = start
|
|
264
|
+
requested_end = end
|
|
265
|
+
|
|
74
266
|
# Check if we already have data for this asset in the cache file
|
|
75
267
|
df_all = None
|
|
76
268
|
df_cached = None
|
|
77
269
|
cache_file = build_cache_filename(asset, timespan, datastyle)
|
|
270
|
+
remote_payload = build_remote_cache_payload(asset, timespan, datastyle)
|
|
271
|
+
cache_manager = get_backtest_cache()
|
|
272
|
+
|
|
273
|
+
if cache_manager.enabled:
|
|
274
|
+
try:
|
|
275
|
+
fetched_remote = cache_manager.ensure_local_file(cache_file, payload=remote_payload)
|
|
276
|
+
if fetched_remote:
|
|
277
|
+
logger.info(
|
|
278
|
+
"[THETA][DEBUG][CACHE][REMOTE_DOWNLOAD] asset=%s timespan=%s datastyle=%s cache_file=%s",
|
|
279
|
+
asset,
|
|
280
|
+
timespan,
|
|
281
|
+
datastyle,
|
|
282
|
+
cache_file,
|
|
283
|
+
)
|
|
284
|
+
except Exception as exc:
|
|
285
|
+
logger.exception(
|
|
286
|
+
"[THETA][DEBUG][CACHE][REMOTE_DOWNLOAD_ERROR] asset=%s cache_file=%s error=%s",
|
|
287
|
+
asset,
|
|
288
|
+
cache_file,
|
|
289
|
+
exc,
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
# DEBUG-LOG: Cache file check
|
|
293
|
+
logger.info(
|
|
294
|
+
"[THETA][DEBUG][CACHE][CHECK] asset=%s timespan=%s datastyle=%s cache_file=%s exists=%s",
|
|
295
|
+
asset,
|
|
296
|
+
timespan,
|
|
297
|
+
datastyle,
|
|
298
|
+
cache_file,
|
|
299
|
+
cache_file.exists()
|
|
300
|
+
)
|
|
301
|
+
|
|
78
302
|
if cache_file.exists():
|
|
79
303
|
logger.info(f"\nLoading '{datastyle}' pricing data for {asset} / {quote_asset} with '{timespan}' timespan from cache file...")
|
|
80
304
|
df_cached = load_cache(cache_file)
|
|
81
305
|
if df_cached is not None and not df_cached.empty:
|
|
82
306
|
df_all = df_cached.copy() # Make a copy so we can check the original later for differences
|
|
83
307
|
|
|
308
|
+
cached_rows = 0 if df_all is None else len(df_all)
|
|
309
|
+
placeholder_rows = 0
|
|
310
|
+
if df_all is not None and not df_all.empty and "missing" in df_all.columns:
|
|
311
|
+
placeholder_rows = int(df_all["missing"].sum())
|
|
312
|
+
|
|
313
|
+
# DEBUG-LOG: Cache load result
|
|
314
|
+
logger.info(
|
|
315
|
+
"[THETA][DEBUG][CACHE][LOADED] asset=%s cached_rows=%d placeholder_rows=%d real_rows=%d",
|
|
316
|
+
asset,
|
|
317
|
+
cached_rows,
|
|
318
|
+
placeholder_rows,
|
|
319
|
+
cached_rows - placeholder_rows
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
logger.debug(
|
|
323
|
+
"[THETA][DEBUG][THETADATA-CACHE] pre-fetch rows=%d placeholders=%d for %s %s %s",
|
|
324
|
+
cached_rows,
|
|
325
|
+
placeholder_rows,
|
|
326
|
+
asset,
|
|
327
|
+
timespan,
|
|
328
|
+
datastyle,
|
|
329
|
+
)
|
|
330
|
+
|
|
84
331
|
# Check if we need to get more data
|
|
332
|
+
logger.info(
|
|
333
|
+
"[THETA][DEBUG][CACHE][DECISION_START] asset=%s | "
|
|
334
|
+
"calling get_missing_dates(start=%s, end=%s)",
|
|
335
|
+
asset.symbol if hasattr(asset, 'symbol') else str(asset),
|
|
336
|
+
start.isoformat() if hasattr(start, 'isoformat') else start,
|
|
337
|
+
end.isoformat() if hasattr(end, 'isoformat') else end
|
|
338
|
+
)
|
|
339
|
+
|
|
85
340
|
missing_dates = get_missing_dates(df_all, asset, start, end)
|
|
341
|
+
|
|
342
|
+
logger.info(
|
|
343
|
+
"[THETA][DEBUG][CACHE][DECISION_RESULT] asset=%s | "
|
|
344
|
+
"missing_dates=%d | "
|
|
345
|
+
"decision=%s",
|
|
346
|
+
asset.symbol if hasattr(asset, 'symbol') else str(asset),
|
|
347
|
+
len(missing_dates),
|
|
348
|
+
"CACHE_HIT" if not missing_dates else "CACHE_MISS"
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
cache_file = build_cache_filename(asset, timespan, datastyle)
|
|
352
|
+
logger.debug(
|
|
353
|
+
"[THETA][DEBUG][THETADATA-CACHE] asset=%s/%s timespan=%s datastyle=%s cache_file=%s exists=%s missing=%d",
|
|
354
|
+
asset,
|
|
355
|
+
quote_asset.symbol if quote_asset else None,
|
|
356
|
+
timespan,
|
|
357
|
+
datastyle,
|
|
358
|
+
cache_file,
|
|
359
|
+
cache_file.exists(),
|
|
360
|
+
len(missing_dates),
|
|
361
|
+
)
|
|
86
362
|
if not missing_dates:
|
|
363
|
+
if df_all is not None and not df_all.empty:
|
|
364
|
+
logger.info("ThetaData cache HIT for %s %s %s (%d rows).", asset, timespan, datastyle, len(df_all))
|
|
365
|
+
# DEBUG-LOG: Cache hit
|
|
366
|
+
logger.info(
|
|
367
|
+
"[THETA][DEBUG][CACHE][HIT] asset=%s timespan=%s datastyle=%s rows=%d start=%s end=%s",
|
|
368
|
+
asset,
|
|
369
|
+
timespan,
|
|
370
|
+
datastyle,
|
|
371
|
+
len(df_all),
|
|
372
|
+
start.isoformat() if hasattr(start, 'isoformat') else start,
|
|
373
|
+
end.isoformat() if hasattr(end, 'isoformat') else end
|
|
374
|
+
)
|
|
87
375
|
# Filter cached data to requested date range before returning
|
|
88
376
|
if df_all is not None and not df_all.empty:
|
|
89
377
|
# For daily data, use date-based filtering (timestamps vary by provider)
|
|
90
378
|
# For intraday data, use precise datetime filtering
|
|
91
379
|
if timespan == "day":
|
|
92
380
|
# Convert index to dates for comparison
|
|
93
|
-
import pandas as pd
|
|
94
381
|
df_dates = pd.to_datetime(df_all.index).date
|
|
95
382
|
start_date = start.date() if hasattr(start, 'date') else start
|
|
96
383
|
end_date = end.date() if hasattr(end, 'date') else end
|
|
@@ -98,30 +385,119 @@ def get_price_data(
|
|
|
98
385
|
df_all = df_all[mask]
|
|
99
386
|
else:
|
|
100
387
|
# Intraday: use precise datetime filtering
|
|
101
|
-
import datetime as dt
|
|
388
|
+
import datetime as datetime_module # RENAMED to avoid shadowing dt parameter!
|
|
389
|
+
|
|
390
|
+
# DEBUG-LOG: Entry to intraday filter
|
|
391
|
+
rows_before_any_filter = len(df_all)
|
|
392
|
+
max_ts_before_any_filter = df_all.index.max() if len(df_all) > 0 else None
|
|
393
|
+
logger.info(
|
|
394
|
+
"[THETA][DEBUG][FILTER][INTRADAY_ENTRY] asset=%s | "
|
|
395
|
+
"rows_before=%d max_ts_before=%s | "
|
|
396
|
+
"start_param=%s end_param=%s dt_param=%s dt_type=%s",
|
|
397
|
+
asset.symbol if hasattr(asset, 'symbol') else str(asset),
|
|
398
|
+
rows_before_any_filter,
|
|
399
|
+
max_ts_before_any_filter.isoformat() if max_ts_before_any_filter else None,
|
|
400
|
+
start.isoformat() if hasattr(start, 'isoformat') else start,
|
|
401
|
+
end.isoformat() if hasattr(end, 'isoformat') else end,
|
|
402
|
+
dt.isoformat() if dt and hasattr(dt, 'isoformat') else dt,
|
|
403
|
+
type(dt).__name__ if dt else None
|
|
404
|
+
)
|
|
405
|
+
|
|
102
406
|
# Convert date to datetime if needed
|
|
103
|
-
if isinstance(start,
|
|
104
|
-
start =
|
|
105
|
-
|
|
106
|
-
|
|
407
|
+
if isinstance(start, datetime_module.date) and not isinstance(start, datetime_module.datetime):
|
|
408
|
+
start = datetime_module.datetime.combine(start, datetime_module.time.min)
|
|
409
|
+
logger.info(
|
|
410
|
+
"[THETA][DEBUG][FILTER][DATE_CONVERSION] converted start from date to datetime: %s",
|
|
411
|
+
start.isoformat()
|
|
412
|
+
)
|
|
413
|
+
if isinstance(end, datetime_module.date) and not isinstance(end, datetime_module.datetime):
|
|
414
|
+
end = datetime_module.datetime.combine(end, datetime_module.time.max)
|
|
415
|
+
logger.info(
|
|
416
|
+
"[THETA][DEBUG][FILTER][DATE_CONVERSION] converted end from date to datetime: %s",
|
|
417
|
+
end.isoformat()
|
|
418
|
+
)
|
|
107
419
|
|
|
108
420
|
# Handle datetime objects with midnight time (users often pass datetime(YYYY, MM, DD))
|
|
109
|
-
if isinstance(end,
|
|
421
|
+
if isinstance(end, datetime_module.datetime) and end.time() == datetime_module.time.min:
|
|
110
422
|
# Convert end-of-period midnight to end-of-day
|
|
111
|
-
end =
|
|
423
|
+
end = datetime_module.datetime.combine(end.date(), datetime_module.time.max)
|
|
424
|
+
logger.info(
|
|
425
|
+
"[THETA][DEBUG][FILTER][MIDNIGHT_FIX] converted end from midnight to end-of-day: %s",
|
|
426
|
+
end.isoformat()
|
|
427
|
+
)
|
|
112
428
|
|
|
113
429
|
if start.tzinfo is None:
|
|
114
430
|
start = LUMIBOT_DEFAULT_PYTZ.localize(start).astimezone(pytz.UTC)
|
|
431
|
+
logger.info(
|
|
432
|
+
"[THETA][DEBUG][FILTER][TZ_LOCALIZE] localized start to UTC: %s",
|
|
433
|
+
start.isoformat()
|
|
434
|
+
)
|
|
115
435
|
if end.tzinfo is None:
|
|
116
436
|
end = LUMIBOT_DEFAULT_PYTZ.localize(end).astimezone(pytz.UTC)
|
|
437
|
+
logger.info(
|
|
438
|
+
"[THETA][DEBUG][FILTER][TZ_LOCALIZE] localized end to UTC: %s",
|
|
439
|
+
end.isoformat()
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
# REMOVED: Look-ahead bias protection was too aggressive
|
|
443
|
+
# The dt filtering was breaking negative timeshift (intentional look-ahead for fills)
|
|
444
|
+
# Look-ahead bias protection should happen at get_bars() level, not cache retrieval
|
|
445
|
+
#
|
|
446
|
+
# NEW APPROACH: Always return full [start, end] range from cache
|
|
447
|
+
# Let Data/DataPolars.get_bars() handle look-ahead bias protection
|
|
448
|
+
logger.info(
|
|
449
|
+
"[THETA][DEBUG][FILTER][NO_DT_FILTER] asset=%s | "
|
|
450
|
+
"using end=%s for upper bound (dt parameter ignored for cache retrieval)",
|
|
451
|
+
asset.symbol if hasattr(asset, 'symbol') else str(asset),
|
|
452
|
+
end.isoformat()
|
|
453
|
+
)
|
|
117
454
|
df_all = df_all[(df_all.index >= start) & (df_all.index <= end)]
|
|
455
|
+
|
|
456
|
+
# DEBUG-LOG: After date range filtering, before missing removal
|
|
457
|
+
if df_all is not None and not df_all.empty:
|
|
458
|
+
logger.info(
|
|
459
|
+
"[THETA][DEBUG][FILTER][AFTER] asset=%s rows=%d first_ts=%s last_ts=%s dt_filter=%s",
|
|
460
|
+
asset,
|
|
461
|
+
len(df_all),
|
|
462
|
+
df_all.index.min().isoformat() if len(df_all) > 0 else None,
|
|
463
|
+
df_all.index.max().isoformat() if len(df_all) > 0 else None,
|
|
464
|
+
dt.isoformat() if dt and hasattr(dt, 'isoformat') else dt
|
|
465
|
+
)
|
|
466
|
+
|
|
467
|
+
if df_all is not None and not df_all.empty and "missing" in df_all.columns:
|
|
468
|
+
df_all = df_all[~df_all["missing"].astype(bool)].drop(columns=["missing"])
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
# DEBUG-LOG: Before pandas return
|
|
472
|
+
if df_all is not None and not df_all.empty:
|
|
473
|
+
logger.info(
|
|
474
|
+
"[THETA][DEBUG][RETURN][PANDAS] asset=%s rows=%d first_ts=%s last_ts=%s",
|
|
475
|
+
asset,
|
|
476
|
+
len(df_all),
|
|
477
|
+
df_all.index.min().isoformat() if len(df_all) > 0 else None,
|
|
478
|
+
df_all.index.max().isoformat() if len(df_all) > 0 else None
|
|
479
|
+
)
|
|
118
480
|
return df_all
|
|
119
481
|
|
|
120
|
-
|
|
121
|
-
|
|
482
|
+
logger.info("ThetaData cache MISS for %s %s %s; fetching %d interval(s) from ThetaTerminal.", asset, timespan, datastyle, len(missing_dates))
|
|
483
|
+
|
|
484
|
+
# DEBUG-LOG: Cache miss
|
|
485
|
+
logger.info(
|
|
486
|
+
"[THETA][DEBUG][CACHE][MISS] asset=%s timespan=%s datastyle=%s missing_intervals=%d first=%s last=%s",
|
|
487
|
+
asset,
|
|
488
|
+
timespan,
|
|
489
|
+
datastyle,
|
|
490
|
+
len(missing_dates),
|
|
491
|
+
missing_dates[0] if missing_dates else None,
|
|
492
|
+
missing_dates[-1] if missing_dates else None
|
|
493
|
+
)
|
|
494
|
+
|
|
495
|
+
|
|
496
|
+
fetch_start = missing_dates[0] # Data will start at 8am UTC (4am EST)
|
|
497
|
+
fetch_end = missing_dates[-1] # Data will end at 23:59 UTC (7:59pm EST)
|
|
122
498
|
|
|
123
499
|
# Initialize tqdm progress bar
|
|
124
|
-
total_days = (
|
|
500
|
+
total_days = (fetch_end - fetch_start).days + 1
|
|
125
501
|
total_queries = (total_days // MAX_DAYS) + 1
|
|
126
502
|
description = f"\nDownloading '{datastyle}' data for {asset} / {quote_asset} with '{timespan}' from ThetaData..."
|
|
127
503
|
logger.info(description)
|
|
@@ -133,19 +509,134 @@ def get_price_data(
|
|
|
133
509
|
# The EOD endpoint includes the 16:00 closing auction and follows SIP sale-condition rules
|
|
134
510
|
# This matches Polygon and Yahoo Finance EXACTLY (zero tolerance)
|
|
135
511
|
if timespan == "day":
|
|
136
|
-
|
|
512
|
+
requested_dates = list(missing_dates)
|
|
513
|
+
logger.info("Daily bars: using EOD endpoint for official close prices")
|
|
514
|
+
logger.debug(
|
|
515
|
+
"[THETA][DEBUG][THETADATA-EOD] requesting %d trading day(s) for %s from %s to %s",
|
|
516
|
+
len(requested_dates),
|
|
517
|
+
asset,
|
|
518
|
+
fetch_start,
|
|
519
|
+
fetch_end,
|
|
520
|
+
)
|
|
137
521
|
|
|
138
522
|
# Use EOD endpoint for official daily OHLC
|
|
139
523
|
result_df = get_historical_eod_data(
|
|
140
524
|
asset=asset,
|
|
141
|
-
start_dt=
|
|
142
|
-
end_dt=
|
|
525
|
+
start_dt=fetch_start,
|
|
526
|
+
end_dt=fetch_end,
|
|
143
527
|
username=username,
|
|
144
528
|
password=password,
|
|
145
529
|
datastyle=datastyle
|
|
146
530
|
)
|
|
531
|
+
logger.debug(
|
|
532
|
+
"[THETA][DEBUG][THETADATA-EOD] fetched rows=%s for %s",
|
|
533
|
+
0 if result_df is None else len(result_df),
|
|
534
|
+
asset,
|
|
535
|
+
)
|
|
536
|
+
|
|
537
|
+
if result_df is None or result_df.empty:
|
|
538
|
+
expired_range = (
|
|
539
|
+
asset.asset_type == "option"
|
|
540
|
+
and asset.expiration is not None
|
|
541
|
+
and requested_dates
|
|
542
|
+
and all(day > asset.expiration for day in requested_dates)
|
|
543
|
+
)
|
|
544
|
+
if expired_range:
|
|
545
|
+
logger.info(
|
|
546
|
+
"[THETA][DEBUG][THETADATA-EOD] Option %s expired on %s; cache reuse for range %s -> %s.",
|
|
547
|
+
asset,
|
|
548
|
+
asset.expiration,
|
|
549
|
+
fetch_start,
|
|
550
|
+
fetch_end,
|
|
551
|
+
)
|
|
552
|
+
else:
|
|
553
|
+
logger.warning(
|
|
554
|
+
"[THETA][DEBUG][THETADATA-EOD] No rows returned for %s between %s and %s; recording placeholders.",
|
|
555
|
+
asset,
|
|
556
|
+
fetch_start,
|
|
557
|
+
fetch_end,
|
|
558
|
+
)
|
|
559
|
+
df_all = append_missing_markers(df_all, requested_dates)
|
|
560
|
+
update_cache(
|
|
561
|
+
cache_file,
|
|
562
|
+
df_all,
|
|
563
|
+
df_cached,
|
|
564
|
+
missing_dates=requested_dates,
|
|
565
|
+
remote_payload=remote_payload,
|
|
566
|
+
)
|
|
567
|
+
df_clean = df_all.copy() if df_all is not None else None
|
|
568
|
+
if df_clean is not None and not df_clean.empty and "missing" in df_clean.columns:
|
|
569
|
+
df_clean = df_clean[~df_clean["missing"].astype(bool)].drop(columns=["missing"])
|
|
570
|
+
df_clean = restore_numeric_dtypes(df_clean)
|
|
571
|
+
logger.info(
|
|
572
|
+
"ThetaData cache updated for %s %s %s with placeholders only (missing=%d).",
|
|
573
|
+
asset,
|
|
574
|
+
timespan,
|
|
575
|
+
datastyle,
|
|
576
|
+
len(requested_dates),
|
|
577
|
+
)
|
|
578
|
+
|
|
579
|
+
if df_clean is not None and not df_clean.empty and timespan == "day":
|
|
580
|
+
start_date = requested_start.date() if hasattr(requested_start, "date") else requested_start
|
|
581
|
+
end_date = requested_end.date() if hasattr(requested_end, "date") else requested_end
|
|
582
|
+
dates = pd.to_datetime(df_clean.index).date
|
|
583
|
+
df_clean = df_clean[(dates >= start_date) & (dates <= end_date)]
|
|
584
|
+
|
|
585
|
+
return df_clean if df_clean is not None else pd.DataFrame()
|
|
586
|
+
|
|
587
|
+
df_all = update_df(df_all, result_df)
|
|
588
|
+
logger.debug(
|
|
589
|
+
"[THETA][DEBUG][THETADATA-EOD] merged cache rows=%d (cached=%d new=%d)",
|
|
590
|
+
0 if df_all is None else len(df_all),
|
|
591
|
+
0 if df_cached is None else len(df_cached),
|
|
592
|
+
len(result_df),
|
|
593
|
+
)
|
|
594
|
+
|
|
595
|
+
trading_days = get_trading_dates(asset, fetch_start, fetch_end)
|
|
596
|
+
if "datetime" in result_df.columns:
|
|
597
|
+
covered_index = pd.DatetimeIndex(pd.to_datetime(result_df["datetime"], utc=True))
|
|
598
|
+
else:
|
|
599
|
+
covered_index = pd.DatetimeIndex(result_df.index)
|
|
600
|
+
if covered_index.tz is None:
|
|
601
|
+
covered_index = covered_index.tz_localize(pytz.UTC)
|
|
602
|
+
else:
|
|
603
|
+
covered_index = covered_index.tz_convert(pytz.UTC)
|
|
604
|
+
covered_days = set(covered_index.date)
|
|
605
|
+
|
|
606
|
+
df_all = remove_missing_markers(df_all, list(covered_days))
|
|
607
|
+
missing_within_range = [day for day in trading_days if day not in covered_days]
|
|
608
|
+
placeholder_count = len(missing_within_range)
|
|
609
|
+
df_all = append_missing_markers(df_all, missing_within_range)
|
|
610
|
+
|
|
611
|
+
update_cache(
|
|
612
|
+
cache_file,
|
|
613
|
+
df_all,
|
|
614
|
+
df_cached,
|
|
615
|
+
missing_dates=missing_within_range,
|
|
616
|
+
remote_payload=remote_payload,
|
|
617
|
+
)
|
|
147
618
|
|
|
148
|
-
|
|
619
|
+
df_clean = df_all.copy() if df_all is not None else None
|
|
620
|
+
if df_clean is not None and not df_clean.empty and "missing" in df_clean.columns:
|
|
621
|
+
df_clean = df_clean[~df_clean["missing"].astype(bool)].drop(columns=["missing"])
|
|
622
|
+
df_clean = restore_numeric_dtypes(df_clean)
|
|
623
|
+
|
|
624
|
+
logger.info(
|
|
625
|
+
"ThetaData cache updated for %s %s %s (rows=%d placeholders=%d).",
|
|
626
|
+
asset,
|
|
627
|
+
timespan,
|
|
628
|
+
datastyle,
|
|
629
|
+
0 if df_all is None else len(df_all),
|
|
630
|
+
placeholder_count,
|
|
631
|
+
)
|
|
632
|
+
|
|
633
|
+
if df_clean is not None and not df_clean.empty and timespan == "day":
|
|
634
|
+
start_date = requested_start.date() if hasattr(requested_start, "date") else requested_start
|
|
635
|
+
end_date = requested_end.date() if hasattr(requested_end, "date") else requested_end
|
|
636
|
+
dates = pd.to_datetime(df_clean.index).date
|
|
637
|
+
df_clean = df_clean[(dates >= start_date) & (dates <= end_date)]
|
|
638
|
+
|
|
639
|
+
return df_clean if df_clean is not None else pd.DataFrame()
|
|
149
640
|
|
|
150
641
|
# Map timespan to milliseconds for intraday intervals
|
|
151
642
|
TIMESPAN_TO_MS = {
|
|
@@ -167,33 +658,84 @@ def get_price_data(
|
|
|
167
658
|
f"Supported values: {list(TIMESPAN_TO_MS.keys())} or 'day'"
|
|
168
659
|
)
|
|
169
660
|
|
|
170
|
-
|
|
661
|
+
current_start = fetch_start
|
|
662
|
+
current_end = fetch_start + delta
|
|
663
|
+
|
|
664
|
+
while current_start <= fetch_end:
|
|
171
665
|
# If we don't have a paid subscription, we need to wait 1 minute between requests because of
|
|
172
666
|
# the rate limit. Wait every other query so that we don't spend too much time waiting.
|
|
173
667
|
|
|
174
|
-
if
|
|
175
|
-
|
|
668
|
+
if current_end > fetch_end:
|
|
669
|
+
current_end = fetch_end
|
|
670
|
+
if current_end > current_start + delta:
|
|
671
|
+
current_end = current_start + delta
|
|
176
672
|
|
|
177
|
-
result_df = get_historical_data(asset,
|
|
673
|
+
result_df = get_historical_data(asset, current_start, current_end, interval_ms, username, password, datastyle=datastyle, include_after_hours=include_after_hours)
|
|
674
|
+
chunk_end = _clamp_option_end(asset, current_end)
|
|
178
675
|
|
|
179
676
|
if result_df is None or len(result_df) == 0:
|
|
180
|
-
|
|
181
|
-
|
|
677
|
+
expired_chunk = (
|
|
678
|
+
asset.asset_type == "option"
|
|
679
|
+
and asset.expiration is not None
|
|
680
|
+
and chunk_end.date() >= asset.expiration
|
|
182
681
|
)
|
|
682
|
+
if expired_chunk:
|
|
683
|
+
logger.info(
|
|
684
|
+
"[THETA][DEBUG][THETADATA] Option %s considered expired on %s; reusing cached data between %s and %s.",
|
|
685
|
+
asset,
|
|
686
|
+
asset.expiration,
|
|
687
|
+
current_start,
|
|
688
|
+
chunk_end,
|
|
689
|
+
)
|
|
690
|
+
else:
|
|
691
|
+
logger.warning(
|
|
692
|
+
f"No data returned for {asset} / {quote_asset} with '{timespan}' timespan between {current_start} and {current_end}"
|
|
693
|
+
)
|
|
694
|
+
missing_chunk = get_trading_dates(asset, current_start, chunk_end)
|
|
695
|
+
df_all = append_missing_markers(df_all, missing_chunk)
|
|
696
|
+
pbar.update(1)
|
|
183
697
|
|
|
184
698
|
else:
|
|
185
699
|
df_all = update_df(df_all, result_df)
|
|
700
|
+
available_chunk = get_trading_dates(asset, current_start, chunk_end)
|
|
701
|
+
df_all = remove_missing_markers(df_all, available_chunk)
|
|
702
|
+
if "datetime" in result_df.columns:
|
|
703
|
+
chunk_index = pd.DatetimeIndex(pd.to_datetime(result_df["datetime"], utc=True))
|
|
704
|
+
else:
|
|
705
|
+
chunk_index = pd.DatetimeIndex(result_df.index)
|
|
706
|
+
if chunk_index.tz is None:
|
|
707
|
+
chunk_index = chunk_index.tz_localize(pytz.UTC)
|
|
708
|
+
else:
|
|
709
|
+
chunk_index = chunk_index.tz_convert(pytz.UTC)
|
|
710
|
+
covered_days = {ts.date() for ts in chunk_index}
|
|
711
|
+
missing_within_chunk = [day for day in available_chunk if day not in covered_days]
|
|
712
|
+
if missing_within_chunk:
|
|
713
|
+
df_all = append_missing_markers(df_all, missing_within_chunk)
|
|
186
714
|
pbar.update(1)
|
|
187
715
|
|
|
188
|
-
|
|
189
|
-
|
|
716
|
+
current_start = current_end + timedelta(days=1)
|
|
717
|
+
current_end = current_start + delta
|
|
190
718
|
|
|
191
|
-
if asset.expiration and
|
|
719
|
+
if asset.expiration and current_start > asset.expiration:
|
|
192
720
|
break
|
|
193
721
|
|
|
194
|
-
update_cache(cache_file, df_all, df_cached)
|
|
722
|
+
update_cache(cache_file, df_all, df_cached, remote_payload=remote_payload)
|
|
723
|
+
if df_all is not None:
|
|
724
|
+
logger.debug("[THETA][DEBUG][THETADATA-CACHE-WRITE] wrote %s rows=%d", cache_file, len(df_all))
|
|
725
|
+
if df_all is not None:
|
|
726
|
+
logger.info("ThetaData cache updated for %s %s %s (%d rows).", asset, timespan, datastyle, len(df_all))
|
|
195
727
|
# Close the progress bar when done
|
|
196
728
|
pbar.close()
|
|
729
|
+
if df_all is not None and not df_all.empty and "missing" in df_all.columns:
|
|
730
|
+
df_all = df_all[~df_all["missing"].astype(bool)].drop(columns=["missing"])
|
|
731
|
+
df_all = restore_numeric_dtypes(df_all)
|
|
732
|
+
|
|
733
|
+
if df_all is not None and not df_all.empty and timespan == "day":
|
|
734
|
+
start_date = requested_start.date() if hasattr(requested_start, "date") else requested_start
|
|
735
|
+
end_date = requested_end.date() if hasattr(requested_end, "date") else requested_end
|
|
736
|
+
dates = pd.to_datetime(df_all.index).date
|
|
737
|
+
df_all = df_all[(dates >= start_date) & (dates <= end_date)]
|
|
738
|
+
|
|
197
739
|
return df_all
|
|
198
740
|
|
|
199
741
|
|
|
@@ -261,6 +803,28 @@ def build_cache_filename(asset: Asset, timespan: str, datastyle: str = "ohlc"):
|
|
|
261
803
|
return cache_file
|
|
262
804
|
|
|
263
805
|
|
|
806
|
+
def build_remote_cache_payload(asset: Asset, timespan: str, datastyle: str = "ohlc") -> Dict[str, object]:
|
|
807
|
+
"""Generate metadata describing the cache entry for remote storage."""
|
|
808
|
+
payload: Dict[str, object] = {
|
|
809
|
+
"provider": "thetadata",
|
|
810
|
+
"timespan": timespan,
|
|
811
|
+
"datastyle": datastyle,
|
|
812
|
+
"asset_type": getattr(asset, "asset_type", None),
|
|
813
|
+
"symbol": getattr(asset, "symbol", str(asset)),
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
if getattr(asset, "asset_type", None) == "option":
|
|
817
|
+
payload.update(
|
|
818
|
+
{
|
|
819
|
+
"expiration": getattr(asset, "expiration", None),
|
|
820
|
+
"strike": getattr(asset, "strike", None),
|
|
821
|
+
"right": getattr(asset, "right", None),
|
|
822
|
+
}
|
|
823
|
+
)
|
|
824
|
+
|
|
825
|
+
return payload
|
|
826
|
+
|
|
827
|
+
|
|
264
828
|
def get_missing_dates(df_all, asset, start, end):
|
|
265
829
|
"""
|
|
266
830
|
Check if we have data for the full range
|
|
@@ -283,27 +847,116 @@ def get_missing_dates(df_all, asset, start, end):
|
|
|
283
847
|
list[datetime.date]
|
|
284
848
|
A list of dates that we need to get data for
|
|
285
849
|
"""
|
|
850
|
+
# DEBUG-LOG: Entry to get_missing_dates
|
|
851
|
+
logger.info(
|
|
852
|
+
"[THETA][DEBUG][CACHE][MISSING_DATES_CHECK] asset=%s | "
|
|
853
|
+
"start=%s end=%s | "
|
|
854
|
+
"cache_rows=%d",
|
|
855
|
+
asset.symbol if hasattr(asset, 'symbol') else str(asset),
|
|
856
|
+
start.isoformat() if hasattr(start, 'isoformat') else start,
|
|
857
|
+
end.isoformat() if hasattr(end, 'isoformat') else end,
|
|
858
|
+
0 if df_all is None else len(df_all)
|
|
859
|
+
)
|
|
860
|
+
|
|
286
861
|
trading_dates = get_trading_dates(asset, start, end)
|
|
862
|
+
|
|
863
|
+
logger.info(
|
|
864
|
+
"[THETA][DEBUG][CACHE][TRADING_DATES] asset=%s | "
|
|
865
|
+
"trading_dates_count=%d first=%s last=%s",
|
|
866
|
+
asset.symbol if hasattr(asset, 'symbol') else str(asset),
|
|
867
|
+
len(trading_dates),
|
|
868
|
+
trading_dates[0] if trading_dates else None,
|
|
869
|
+
trading_dates[-1] if trading_dates else None
|
|
870
|
+
)
|
|
871
|
+
|
|
287
872
|
if df_all is None or not len(df_all):
|
|
873
|
+
logger.info(
|
|
874
|
+
"[THETA][DEBUG][CACHE][EMPTY] asset=%s | "
|
|
875
|
+
"cache is EMPTY -> all %d trading days are missing",
|
|
876
|
+
asset.symbol if hasattr(asset, 'symbol') else str(asset),
|
|
877
|
+
len(trading_dates)
|
|
878
|
+
)
|
|
288
879
|
return trading_dates
|
|
289
880
|
|
|
290
881
|
# It is possible to have full day gap in the data if previous queries were far apart
|
|
291
882
|
# Example: Query for 8/1/2023, then 8/31/2023, then 8/7/2023
|
|
292
883
|
# Whole days are easy to check for because we can just check the dates in the index
|
|
293
884
|
dates = pd.Series(df_all.index.date).unique()
|
|
885
|
+
cached_dates_count = len(dates)
|
|
886
|
+
cached_first = min(dates) if len(dates) > 0 else None
|
|
887
|
+
cached_last = max(dates) if len(dates) > 0 else None
|
|
888
|
+
|
|
889
|
+
logger.info(
|
|
890
|
+
"[THETA][DEBUG][CACHE][CACHED_DATES] asset=%s | "
|
|
891
|
+
"cached_dates_count=%d first=%s last=%s",
|
|
892
|
+
asset.symbol if hasattr(asset, 'symbol') else str(asset),
|
|
893
|
+
cached_dates_count,
|
|
894
|
+
cached_first,
|
|
895
|
+
cached_last
|
|
896
|
+
)
|
|
897
|
+
|
|
294
898
|
missing_dates = sorted(set(trading_dates) - set(dates))
|
|
295
899
|
|
|
296
900
|
# For Options, don't need any dates passed the expiration date
|
|
297
901
|
if asset.asset_type == "option":
|
|
902
|
+
before_expiry_filter = len(missing_dates)
|
|
298
903
|
missing_dates = [x for x in missing_dates if x <= asset.expiration]
|
|
904
|
+
after_expiry_filter = len(missing_dates)
|
|
905
|
+
|
|
906
|
+
if before_expiry_filter != after_expiry_filter:
|
|
907
|
+
logger.info(
|
|
908
|
+
"[THETA][DEBUG][CACHE][OPTION_EXPIRY_FILTER] asset=%s | "
|
|
909
|
+
"filtered %d dates after expiration=%s | "
|
|
910
|
+
"missing_dates: %d -> %d",
|
|
911
|
+
asset.symbol if hasattr(asset, 'symbol') else str(asset),
|
|
912
|
+
before_expiry_filter - after_expiry_filter,
|
|
913
|
+
asset.expiration,
|
|
914
|
+
before_expiry_filter,
|
|
915
|
+
after_expiry_filter
|
|
916
|
+
)
|
|
917
|
+
|
|
918
|
+
logger.info(
|
|
919
|
+
"[THETA][DEBUG][CACHE][MISSING_RESULT] asset=%s | "
|
|
920
|
+
"missing_dates_count=%d | "
|
|
921
|
+
"first_missing=%s last_missing=%s",
|
|
922
|
+
asset.symbol if hasattr(asset, 'symbol') else str(asset),
|
|
923
|
+
len(missing_dates),
|
|
924
|
+
missing_dates[0] if missing_dates else None,
|
|
925
|
+
missing_dates[-1] if missing_dates else None
|
|
926
|
+
)
|
|
299
927
|
|
|
300
928
|
return missing_dates
|
|
301
929
|
|
|
302
930
|
|
|
303
931
|
def load_cache(cache_file):
|
|
304
932
|
"""Load the data from the cache file and return a DataFrame with a DateTimeIndex"""
|
|
933
|
+
# DEBUG-LOG: Start loading cache
|
|
934
|
+
logger.info(
|
|
935
|
+
"[THETA][DEBUG][CACHE][LOAD_START] cache_file=%s | "
|
|
936
|
+
"exists=%s size_bytes=%d",
|
|
937
|
+
cache_file.name,
|
|
938
|
+
cache_file.exists(),
|
|
939
|
+
cache_file.stat().st_size if cache_file.exists() else 0
|
|
940
|
+
)
|
|
941
|
+
|
|
942
|
+
if not cache_file.exists():
|
|
943
|
+
logger.info(
|
|
944
|
+
"[THETA][DEBUG][CACHE][LOAD_MISSING] cache_file=%s | returning=None",
|
|
945
|
+
cache_file.name,
|
|
946
|
+
)
|
|
947
|
+
return None
|
|
948
|
+
|
|
305
949
|
df = pd.read_parquet(cache_file, engine='pyarrow')
|
|
306
950
|
|
|
951
|
+
rows_after_read = len(df)
|
|
952
|
+
logger.info(
|
|
953
|
+
"[THETA][DEBUG][CACHE][LOAD_READ] cache_file=%s | "
|
|
954
|
+
"rows_read=%d columns=%s",
|
|
955
|
+
cache_file.name,
|
|
956
|
+
rows_after_read,
|
|
957
|
+
list(df.columns)
|
|
958
|
+
)
|
|
959
|
+
|
|
307
960
|
# Set the 'datetime' column as the index of the DataFrame
|
|
308
961
|
df.set_index("datetime", inplace=True)
|
|
309
962
|
|
|
@@ -316,26 +969,135 @@ def load_cache(cache_file):
|
|
|
316
969
|
if df.index.tzinfo is None:
|
|
317
970
|
# Set the timezone to UTC
|
|
318
971
|
df.index = df.index.tz_localize("UTC")
|
|
972
|
+
logger.info(
|
|
973
|
+
"[THETA][DEBUG][CACHE][LOAD_TZ] cache_file=%s | "
|
|
974
|
+
"localized index to UTC",
|
|
975
|
+
cache_file.name
|
|
976
|
+
)
|
|
977
|
+
|
|
978
|
+
df = ensure_missing_column(df)
|
|
979
|
+
|
|
980
|
+
min_ts = df.index.min() if len(df) > 0 else None
|
|
981
|
+
max_ts = df.index.max() if len(df) > 0 else None
|
|
982
|
+
placeholder_count = int(df["missing"].sum()) if "missing" in df.columns else 0
|
|
983
|
+
|
|
984
|
+
logger.info(
|
|
985
|
+
"[THETA][DEBUG][CACHE][LOAD_SUCCESS] cache_file=%s | "
|
|
986
|
+
"total_rows=%d real_rows=%d placeholders=%d | "
|
|
987
|
+
"min_ts=%s max_ts=%s",
|
|
988
|
+
cache_file.name,
|
|
989
|
+
len(df),
|
|
990
|
+
len(df) - placeholder_count,
|
|
991
|
+
placeholder_count,
|
|
992
|
+
min_ts.isoformat() if min_ts else None,
|
|
993
|
+
max_ts.isoformat() if max_ts else None
|
|
994
|
+
)
|
|
319
995
|
|
|
320
996
|
return df
|
|
321
997
|
|
|
322
998
|
|
|
323
|
-
def update_cache(cache_file, df_all, df_cached):
|
|
324
|
-
"""Update the cache file with the new data"""
|
|
325
|
-
#
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
999
|
+
def update_cache(cache_file, df_all, df_cached, missing_dates=None, remote_payload=None):
|
|
1000
|
+
"""Update the cache file with the new data and optional placeholder markers."""
|
|
1001
|
+
# DEBUG-LOG: Entry to update_cache
|
|
1002
|
+
logger.info(
|
|
1003
|
+
"[THETA][DEBUG][CACHE][UPDATE_ENTRY] cache_file=%s | "
|
|
1004
|
+
"df_all_rows=%d df_cached_rows=%d missing_dates=%d",
|
|
1005
|
+
cache_file.name,
|
|
1006
|
+
0 if df_all is None else len(df_all),
|
|
1007
|
+
0 if df_cached is None else len(df_cached),
|
|
1008
|
+
0 if not missing_dates else len(missing_dates)
|
|
1009
|
+
)
|
|
1010
|
+
|
|
1011
|
+
if df_all is None or len(df_all) == 0:
|
|
1012
|
+
if not missing_dates:
|
|
1013
|
+
logger.info(
|
|
1014
|
+
"[THETA][DEBUG][CACHE][UPDATE_SKIP] cache_file=%s | "
|
|
1015
|
+
"df_all is empty and no missing_dates, skipping cache update",
|
|
1016
|
+
cache_file.name
|
|
1017
|
+
)
|
|
329
1018
|
return
|
|
1019
|
+
logger.info(
|
|
1020
|
+
"[THETA][DEBUG][CACHE][UPDATE_PLACEHOLDERS_ONLY] cache_file=%s | "
|
|
1021
|
+
"df_all is empty, writing %d placeholders",
|
|
1022
|
+
cache_file.name,
|
|
1023
|
+
len(missing_dates)
|
|
1024
|
+
)
|
|
1025
|
+
df_working = append_missing_markers(None, missing_dates)
|
|
1026
|
+
else:
|
|
1027
|
+
df_working = ensure_missing_column(df_all.copy())
|
|
1028
|
+
if missing_dates:
|
|
1029
|
+
logger.info(
|
|
1030
|
+
"[THETA][DEBUG][CACHE][UPDATE_APPEND_PLACEHOLDERS] cache_file=%s | "
|
|
1031
|
+
"appending %d placeholders to %d existing rows",
|
|
1032
|
+
cache_file.name,
|
|
1033
|
+
len(missing_dates),
|
|
1034
|
+
len(df_working)
|
|
1035
|
+
)
|
|
1036
|
+
df_working = append_missing_markers(df_working, missing_dates)
|
|
330
1037
|
|
|
331
|
-
|
|
332
|
-
|
|
1038
|
+
if df_working is None or len(df_working) == 0:
|
|
1039
|
+
logger.info(
|
|
1040
|
+
"[THETA][DEBUG][CACHE][UPDATE_SKIP_EMPTY] cache_file=%s | "
|
|
1041
|
+
"df_working is empty after processing, skipping write",
|
|
1042
|
+
cache_file.name
|
|
1043
|
+
)
|
|
1044
|
+
return
|
|
1045
|
+
|
|
1046
|
+
df_cached_cmp = None
|
|
1047
|
+
if df_cached is not None and len(df_cached) > 0:
|
|
1048
|
+
df_cached_cmp = ensure_missing_column(df_cached.copy())
|
|
1049
|
+
|
|
1050
|
+
if df_cached_cmp is not None and df_working.equals(df_cached_cmp):
|
|
1051
|
+
logger.info(
|
|
1052
|
+
"[THETA][DEBUG][CACHE][UPDATE_NO_CHANGES] cache_file=%s | "
|
|
1053
|
+
"df_working equals df_cached (rows=%d), skipping write",
|
|
1054
|
+
cache_file.name,
|
|
1055
|
+
len(df_working)
|
|
1056
|
+
)
|
|
1057
|
+
return
|
|
1058
|
+
|
|
1059
|
+
cache_file.parent.mkdir(parents=True, exist_ok=True)
|
|
1060
|
+
df_to_save = df_working.reset_index()
|
|
1061
|
+
|
|
1062
|
+
placeholder_count = int(df_working["missing"].sum()) if "missing" in df_working.columns else 0
|
|
1063
|
+
real_rows = len(df_working) - placeholder_count
|
|
1064
|
+
min_ts = df_working.index.min() if len(df_working) > 0 else None
|
|
1065
|
+
max_ts = df_working.index.max() if len(df_working) > 0 else None
|
|
1066
|
+
|
|
1067
|
+
def _format_ts(value):
|
|
1068
|
+
if value is None:
|
|
1069
|
+
return None
|
|
1070
|
+
return value.isoformat() if hasattr(value, "isoformat") else value
|
|
1071
|
+
|
|
1072
|
+
logger.info(
|
|
1073
|
+
"[THETA][DEBUG][CACHE][UPDATE_WRITE] cache_file=%s | "
|
|
1074
|
+
"total_rows=%d real_rows=%d placeholders=%d | "
|
|
1075
|
+
"min_ts=%s max_ts=%s",
|
|
1076
|
+
cache_file.name,
|
|
1077
|
+
len(df_working),
|
|
1078
|
+
real_rows,
|
|
1079
|
+
placeholder_count,
|
|
1080
|
+
_format_ts(min_ts),
|
|
1081
|
+
_format_ts(max_ts)
|
|
1082
|
+
)
|
|
1083
|
+
|
|
1084
|
+
df_to_save.to_parquet(cache_file, engine="pyarrow", compression="snappy")
|
|
333
1085
|
|
|
334
|
-
|
|
335
|
-
|
|
1086
|
+
logger.info(
|
|
1087
|
+
"[THETA][DEBUG][CACHE][UPDATE_SUCCESS] cache_file=%s written successfully",
|
|
1088
|
+
cache_file.name
|
|
1089
|
+
)
|
|
336
1090
|
|
|
337
|
-
|
|
338
|
-
|
|
1091
|
+
cache_manager = get_backtest_cache()
|
|
1092
|
+
if cache_manager.mode == CacheMode.S3_READWRITE:
|
|
1093
|
+
try:
|
|
1094
|
+
cache_manager.on_local_update(cache_file, payload=remote_payload)
|
|
1095
|
+
except Exception as exc:
|
|
1096
|
+
logger.exception(
|
|
1097
|
+
"[THETA][DEBUG][CACHE][REMOTE_UPLOAD_ERROR] cache_file=%s error=%s",
|
|
1098
|
+
cache_file,
|
|
1099
|
+
exc,
|
|
1100
|
+
)
|
|
339
1101
|
|
|
340
1102
|
|
|
341
1103
|
def update_df(df_all, result):
|
|
@@ -366,6 +1128,7 @@ def update_df(df_all, result):
|
|
|
366
1128
|
ny_tz = LUMIBOT_DEFAULT_PYTZ
|
|
367
1129
|
df = pd.DataFrame(result)
|
|
368
1130
|
if not df.empty:
|
|
1131
|
+
df["missing"] = False
|
|
369
1132
|
if "datetime" not in df.index.names:
|
|
370
1133
|
# check if df has a column named "datetime", if not raise key error
|
|
371
1134
|
if "datetime" not in df.columns:
|
|
@@ -398,51 +1161,79 @@ def update_df(df_all, result):
|
|
|
398
1161
|
df_all = df
|
|
399
1162
|
else:
|
|
400
1163
|
df_all = pd.concat([df_all, df]).sort_index()
|
|
401
|
-
df_all = df_all[~df_all.index.duplicated(keep="
|
|
1164
|
+
df_all = df_all[~df_all.index.duplicated(keep="last")] # Keep newest data over placeholders
|
|
402
1165
|
|
|
403
1166
|
# NOTE: Timestamp correction is now done in get_historical_data() at line 569
|
|
404
1167
|
# Do NOT subtract 1 minute here as it would double-correct
|
|
405
1168
|
# df_all.index = df_all.index - pd.Timedelta(minutes=1)
|
|
1169
|
+
df_all = ensure_missing_column(df_all)
|
|
406
1170
|
return df_all
|
|
407
1171
|
|
|
408
1172
|
|
|
409
1173
|
def is_process_alive():
|
|
410
1174
|
"""Check if ThetaTerminal Java process is still running"""
|
|
1175
|
+
import os
|
|
411
1176
|
import subprocess
|
|
412
|
-
global THETA_DATA_PROCESS
|
|
413
1177
|
|
|
414
|
-
|
|
1178
|
+
global THETA_DATA_PROCESS, THETA_DATA_PID, THETA_DATA_LOG_HANDLE
|
|
1179
|
+
|
|
1180
|
+
# If we have a subprocess handle, trust it first
|
|
415
1181
|
if THETA_DATA_PROCESS is not None:
|
|
416
|
-
# poll() returns None if process is still running, otherwise returns exit code
|
|
417
1182
|
if THETA_DATA_PROCESS.poll() is None:
|
|
418
1183
|
return True
|
|
1184
|
+
# Process exited—clear cached handle and PID
|
|
1185
|
+
reset_theta_terminal_tracking()
|
|
419
1186
|
|
|
420
|
-
# If we
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
return result.returncode == 0
|
|
431
|
-
except Exception:
|
|
432
|
-
return False
|
|
1187
|
+
# If we know the PID, probe it directly
|
|
1188
|
+
if THETA_DATA_PID:
|
|
1189
|
+
try:
|
|
1190
|
+
# Sending signal 0 simply tests liveness
|
|
1191
|
+
os.kill(THETA_DATA_PID, 0)
|
|
1192
|
+
return True
|
|
1193
|
+
except OSError:
|
|
1194
|
+
reset_theta_terminal_tracking()
|
|
1195
|
+
|
|
1196
|
+
return False
|
|
433
1197
|
|
|
434
1198
|
|
|
435
1199
|
def start_theta_data_client(username: str, password: str):
|
|
436
1200
|
import subprocess
|
|
437
1201
|
import shutil
|
|
438
1202
|
global THETA_DATA_PROCESS, THETA_DATA_PID
|
|
1203
|
+
CONNECTION_DIAGNOSTICS["start_terminal_calls"] += 1
|
|
439
1204
|
|
|
440
1205
|
# First try shutting down any existing connection
|
|
1206
|
+
graceful_shutdown_requested = False
|
|
441
1207
|
try:
|
|
442
|
-
requests.get(f"{BASE_URL}/v2/system/terminal/shutdown")
|
|
1208
|
+
requests.get(f"{BASE_URL}/v2/system/terminal/shutdown", timeout=1)
|
|
1209
|
+
graceful_shutdown_requested = True
|
|
443
1210
|
except Exception:
|
|
444
1211
|
pass
|
|
445
1212
|
|
|
1213
|
+
shutdown_deadline = time.time() + 15
|
|
1214
|
+
while True:
|
|
1215
|
+
process_alive = is_process_alive()
|
|
1216
|
+
status_alive = False
|
|
1217
|
+
try:
|
|
1218
|
+
status_text = requests.get(f"{BASE_URL}/v2/system/mdds/status", timeout=0.5).text
|
|
1219
|
+
status_alive = status_text in ("CONNECTED", "DISCONNECTED")
|
|
1220
|
+
except Exception:
|
|
1221
|
+
status_alive = False
|
|
1222
|
+
|
|
1223
|
+
if not process_alive and not status_alive:
|
|
1224
|
+
break
|
|
1225
|
+
|
|
1226
|
+
if time.time() >= shutdown_deadline:
|
|
1227
|
+
if process_alive and THETA_DATA_PID:
|
|
1228
|
+
kill_signal = getattr(signal, "SIGKILL", signal.SIGTERM)
|
|
1229
|
+
try:
|
|
1230
|
+
os.kill(THETA_DATA_PID, kill_signal)
|
|
1231
|
+
except Exception as kill_exc:
|
|
1232
|
+
logger.warning("Failed to force kill ThetaTerminal PID %s: %s", THETA_DATA_PID, kill_exc)
|
|
1233
|
+
break
|
|
1234
|
+
|
|
1235
|
+
time.sleep(0.5)
|
|
1236
|
+
|
|
446
1237
|
# Create creds.txt file to avoid passing password with special characters on command line
|
|
447
1238
|
# This is the official ThetaData method and avoids shell escaping issues
|
|
448
1239
|
# Security note: creds.txt with 0o600 permissions is MORE secure than command-line args
|
|
@@ -451,37 +1242,43 @@ def start_theta_data_client(username: str, password: str):
|
|
|
451
1242
|
theta_dir.mkdir(parents=True, exist_ok=True)
|
|
452
1243
|
creds_file = theta_dir / "creds.txt"
|
|
453
1244
|
|
|
454
|
-
#
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
if
|
|
458
|
-
logger.info(f"Creating new creds.txt file at {creds_file}")
|
|
459
|
-
should_write = True
|
|
460
|
-
else:
|
|
461
|
-
# Check if username changed
|
|
1245
|
+
# Read previous credentials if they exist so we can decide whether to overwrite
|
|
1246
|
+
existing_username = None
|
|
1247
|
+
existing_password = None
|
|
1248
|
+
if creds_file.exists():
|
|
462
1249
|
try:
|
|
463
1250
|
with open(creds_file, 'r') as f:
|
|
464
|
-
existing_username = f.readline().strip()
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
1251
|
+
existing_username = (f.readline().strip() or None)
|
|
1252
|
+
existing_password = (f.readline().strip() or None)
|
|
1253
|
+
except Exception as exc:
|
|
1254
|
+
logger.warning(f"Could not read existing creds.txt: {exc}; will recreate the file.")
|
|
1255
|
+
existing_username = None
|
|
1256
|
+
existing_password = None
|
|
1257
|
+
|
|
1258
|
+
if username is None:
|
|
1259
|
+
username = existing_username
|
|
1260
|
+
if password is None:
|
|
1261
|
+
password = existing_password
|
|
1262
|
+
|
|
1263
|
+
if username is None or password is None:
|
|
1264
|
+
raise ValueError(
|
|
1265
|
+
"ThetaData credentials are required to start ThetaTerminal. Provide them via backtest() or configure THETADATA_USERNAME/THETADATA_PASSWORD."
|
|
1266
|
+
)
|
|
1267
|
+
|
|
1268
|
+
should_write = (
|
|
1269
|
+
not creds_file.exists()
|
|
1270
|
+
or existing_username != username
|
|
1271
|
+
or existing_password != password
|
|
1272
|
+
)
|
|
473
1273
|
|
|
474
1274
|
if should_write:
|
|
475
|
-
|
|
1275
|
+
logger.info(f"Writing creds.txt file for user: {username}")
|
|
476
1276
|
with open(creds_file, 'w') as f:
|
|
477
1277
|
f.write(f"{username}\n")
|
|
478
1278
|
f.write(f"{password}\n")
|
|
479
|
-
|
|
480
|
-
# Set restrictive permissions on creds file (owner read/write only)
|
|
481
|
-
# This prevents other users on the system from reading the credentials
|
|
482
1279
|
os.chmod(creds_file, 0o600)
|
|
483
|
-
|
|
484
|
-
logger.
|
|
1280
|
+
else:
|
|
1281
|
+
logger.debug(f"Reusing existing creds.txt for {username}")
|
|
485
1282
|
|
|
486
1283
|
# Launch ThetaTerminal directly with --creds-file to avoid shell escaping issues
|
|
487
1284
|
# We bypass the thetadata library's launcher which doesn't support this option
|
|
@@ -526,68 +1323,129 @@ def start_theta_data_client(username: str, password: str):
|
|
|
526
1323
|
|
|
527
1324
|
logger.info(f"Launching ThetaTerminal with creds file: {cmd}")
|
|
528
1325
|
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
)
|
|
1326
|
+
reset_theta_terminal_tracking()
|
|
1327
|
+
|
|
1328
|
+
log_path = theta_dir / "lumibot_launch.log"
|
|
1329
|
+
log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
1330
|
+
log_handle = open(log_path, "ab")
|
|
1331
|
+
launch_ts = datetime.now(timezone.utc)
|
|
1332
|
+
log_handle.write(f"\n---- Launch {launch_ts.isoformat()} ----\n".encode())
|
|
1333
|
+
log_handle.flush()
|
|
1334
|
+
|
|
1335
|
+
global THETA_DATA_LOG_HANDLE
|
|
1336
|
+
THETA_DATA_LOG_HANDLE = log_handle
|
|
1337
|
+
|
|
1338
|
+
try:
|
|
1339
|
+
THETA_DATA_PROCESS = subprocess.Popen(
|
|
1340
|
+
cmd,
|
|
1341
|
+
stdout=log_handle,
|
|
1342
|
+
stderr=subprocess.STDOUT,
|
|
1343
|
+
cwd=str(theta_dir)
|
|
1344
|
+
)
|
|
1345
|
+
except Exception:
|
|
1346
|
+
THETA_DATA_LOG_HANDLE = None
|
|
1347
|
+
log_handle.close()
|
|
1348
|
+
raise
|
|
1349
|
+
|
|
536
1350
|
THETA_DATA_PID = THETA_DATA_PROCESS.pid
|
|
537
1351
|
logger.info(f"ThetaTerminal started with PID: {THETA_DATA_PID}")
|
|
538
1352
|
|
|
539
|
-
# Give it a moment to start
|
|
540
|
-
time.sleep(2)
|
|
541
|
-
|
|
542
1353
|
# We don't return a ThetaClient object since we're launching manually
|
|
543
1354
|
# The connection will be established via HTTP/WebSocket to localhost:25510
|
|
544
1355
|
return THETA_DATA_PROCESS
|
|
545
1356
|
|
|
546
1357
|
|
|
547
|
-
def check_connection(username: str, password: str):
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
1358
|
+
def check_connection(username: str, password: str, wait_for_connection: bool = False):
|
|
1359
|
+
"""Ensure the local ThetaTerminal is running. Optionally block until it is connected.
|
|
1360
|
+
|
|
1361
|
+
Parameters
|
|
1362
|
+
----------
|
|
1363
|
+
username : str
|
|
1364
|
+
ThetaData username.
|
|
1365
|
+
password : str
|
|
1366
|
+
ThetaData password.
|
|
1367
|
+
wait_for_connection : bool, optional
|
|
1368
|
+
If True, block and retry until the terminal reports CONNECTED (or retries are exhausted).
|
|
1369
|
+
If False, perform a lightweight liveness check and return immediately.
|
|
1370
|
+
"""
|
|
1371
|
+
|
|
1372
|
+
CONNECTION_DIAGNOSTICS["check_connection_calls"] += 1
|
|
1373
|
+
|
|
1374
|
+
max_retries = CONNECTION_MAX_RETRIES
|
|
1375
|
+
sleep_interval = CONNECTION_RETRY_SLEEP
|
|
1376
|
+
restart_attempts = 0
|
|
551
1377
|
client = None
|
|
552
|
-
connected = False
|
|
553
1378
|
|
|
554
|
-
|
|
555
|
-
# FIRST: Check if already connected (most important check!)
|
|
556
|
-
# This prevents unnecessary restarts that would overwrite creds.txt
|
|
1379
|
+
def probe_status() -> Optional[str]:
|
|
557
1380
|
try:
|
|
558
1381
|
res = requests.get(f"{BASE_URL}/v2/system/mdds/status", timeout=1)
|
|
559
|
-
|
|
1382
|
+
return res.text
|
|
1383
|
+
except Exception as exc:
|
|
1384
|
+
logger.debug(f"Cannot reach ThetaTerminal status endpoint: {exc}")
|
|
1385
|
+
return None
|
|
1386
|
+
|
|
1387
|
+
if not wait_for_connection:
|
|
1388
|
+
status_text = probe_status()
|
|
1389
|
+
if status_text == "CONNECTED":
|
|
1390
|
+
if THETA_DATA_PROCESS is None and THETA_DATA_PID is None:
|
|
1391
|
+
logger.debug("ThetaTerminal reports CONNECTED but no process is tracked; restarting to capture handle.")
|
|
1392
|
+
client = start_theta_data_client(username=username, password=password)
|
|
1393
|
+
new_client, connected = check_connection(
|
|
1394
|
+
username=username,
|
|
1395
|
+
password=password,
|
|
1396
|
+
wait_for_connection=True,
|
|
1397
|
+
)
|
|
1398
|
+
return client or new_client, connected
|
|
1399
|
+
|
|
1400
|
+
logger.debug("ThetaTerminal already connected.")
|
|
1401
|
+
return None, True
|
|
560
1402
|
|
|
561
|
-
if con_text == "CONNECTED":
|
|
562
|
-
logger.debug("Already connected to Theta Data!")
|
|
563
|
-
connected = True
|
|
564
|
-
break
|
|
565
|
-
elif con_text == "DISCONNECTED":
|
|
566
|
-
logger.debug("Disconnected from Theta Data, will attempt to start...")
|
|
567
|
-
# Fall through to process check and restart logic
|
|
568
|
-
else:
|
|
569
|
-
logger.debug(f"Unknown connection status: {con_text}")
|
|
570
|
-
# Fall through to process check and restart logic
|
|
571
|
-
except Exception as e:
|
|
572
|
-
# Connection endpoint not responding - process might be dead
|
|
573
|
-
logger.debug(f"Cannot reach ThetaData status endpoint: {e}")
|
|
574
|
-
# Fall through to process check and restart logic
|
|
575
|
-
|
|
576
|
-
# SECOND: Check if the Java process is still alive
|
|
577
1403
|
if not is_process_alive():
|
|
578
|
-
logger.
|
|
1404
|
+
logger.debug("ThetaTerminal process not running; launching background restart.")
|
|
579
1405
|
client = start_theta_data_client(username=username, password=password)
|
|
580
|
-
|
|
581
|
-
|
|
1406
|
+
new_client, connected = check_connection(
|
|
1407
|
+
username=username,
|
|
1408
|
+
password=password,
|
|
1409
|
+
wait_for_connection=True,
|
|
1410
|
+
)
|
|
1411
|
+
return client or new_client, connected
|
|
1412
|
+
|
|
1413
|
+
logger.debug("ThetaTerminal running but not yet CONNECTED; waiting for status.")
|
|
1414
|
+
return check_connection(username=username, password=password, wait_for_connection=True)
|
|
1415
|
+
|
|
1416
|
+
counter = 0
|
|
1417
|
+
connected = False
|
|
1418
|
+
|
|
1419
|
+
while counter < max_retries:
|
|
1420
|
+
status_text = probe_status()
|
|
1421
|
+
if status_text == "CONNECTED":
|
|
1422
|
+
if counter:
|
|
1423
|
+
logger.info("ThetaTerminal connected after %s attempt(s).", counter + 1)
|
|
1424
|
+
connected = True
|
|
1425
|
+
break
|
|
1426
|
+
elif status_text == "DISCONNECTED":
|
|
1427
|
+
logger.debug("ThetaTerminal reports DISCONNECTED; will retry.")
|
|
1428
|
+
elif status_text is not None:
|
|
1429
|
+
logger.debug(f"ThetaTerminal returned unexpected status: {status_text}")
|
|
1430
|
+
|
|
1431
|
+
if not is_process_alive():
|
|
1432
|
+
if restart_attempts >= MAX_RESTART_ATTEMPTS:
|
|
1433
|
+
logger.error("ThetaTerminal not running after %s restart attempts.", restart_attempts)
|
|
1434
|
+
break
|
|
1435
|
+
restart_attempts += 1
|
|
1436
|
+
logger.warning("ThetaTerminal process is not running (restart #%s).", restart_attempts)
|
|
1437
|
+
client = start_theta_data_client(username=username, password=password)
|
|
1438
|
+
time.sleep(max(BOOT_GRACE_PERIOD, sleep_interval))
|
|
1439
|
+
counter = 0
|
|
582
1440
|
continue
|
|
583
1441
|
|
|
584
|
-
# THIRD: Process is alive but not connected - wait and retry
|
|
585
|
-
time.sleep(0.5)
|
|
586
1442
|
counter += 1
|
|
1443
|
+
if counter % 10 == 0:
|
|
1444
|
+
logger.info("Waiting for ThetaTerminal connection (attempt %s/%s).", counter, max_retries)
|
|
1445
|
+
time.sleep(sleep_interval)
|
|
587
1446
|
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
break
|
|
1447
|
+
if not connected and counter >= max_retries:
|
|
1448
|
+
logger.error("Cannot connect to Theta Data after %s attempts.", counter)
|
|
591
1449
|
|
|
592
1450
|
return client, connected
|
|
593
1451
|
|
|
@@ -597,6 +1455,9 @@ def get_request(url: str, headers: dict, querystring: dict, username: str, passw
|
|
|
597
1455
|
next_page_url = None
|
|
598
1456
|
page_count = 0
|
|
599
1457
|
|
|
1458
|
+
# Lightweight liveness probe before issuing the request
|
|
1459
|
+
check_connection(username=username, password=password, wait_for_connection=False)
|
|
1460
|
+
|
|
600
1461
|
while True:
|
|
601
1462
|
counter = 0
|
|
602
1463
|
# Use next_page URL if available, otherwise use original URL with querystring
|
|
@@ -605,18 +1466,44 @@ def get_request(url: str, headers: dict, querystring: dict, username: str, passw
|
|
|
605
1466
|
|
|
606
1467
|
while True:
|
|
607
1468
|
try:
|
|
1469
|
+
CONNECTION_DIAGNOSTICS["network_requests"] += 1
|
|
1470
|
+
|
|
1471
|
+
# DEBUG-LOG: API request
|
|
1472
|
+
logger.info(
|
|
1473
|
+
"[THETA][DEBUG][API][REQUEST] url=%s params=%s",
|
|
1474
|
+
request_url if next_page_url else url,
|
|
1475
|
+
request_params if request_params else querystring
|
|
1476
|
+
)
|
|
1477
|
+
|
|
608
1478
|
response = requests.get(request_url, headers=headers, params=request_params)
|
|
609
1479
|
# Status code 472 means "No data" - this is valid, return None
|
|
610
1480
|
if response.status_code == 472:
|
|
611
1481
|
logger.warning(f"No data available for request: {response.text[:200]}")
|
|
1482
|
+
# DEBUG-LOG: API response - no data
|
|
1483
|
+
logger.info(
|
|
1484
|
+
"[THETA][DEBUG][API][RESPONSE] status=472 result=NO_DATA"
|
|
1485
|
+
)
|
|
612
1486
|
return None
|
|
613
1487
|
# If status code is not 200, then we are not connected
|
|
614
1488
|
elif response.status_code != 200:
|
|
615
1489
|
logger.warning(f"Non-200 status code {response.status_code}: {response.text[:200]}")
|
|
616
|
-
|
|
1490
|
+
# DEBUG-LOG: API response - error
|
|
1491
|
+
logger.info(
|
|
1492
|
+
"[THETA][DEBUG][API][RESPONSE] status=%d result=ERROR",
|
|
1493
|
+
response.status_code
|
|
1494
|
+
)
|
|
1495
|
+
check_connection(username=username, password=password, wait_for_connection=True)
|
|
617
1496
|
else:
|
|
618
1497
|
json_resp = response.json()
|
|
619
1498
|
|
|
1499
|
+
# DEBUG-LOG: API response - success
|
|
1500
|
+
response_rows = len(json_resp.get("response", [])) if isinstance(json_resp.get("response"), list) else 0
|
|
1501
|
+
logger.info(
|
|
1502
|
+
"[THETA][DEBUG][API][RESPONSE] status=200 rows=%d has_next_page=%s",
|
|
1503
|
+
response_rows,
|
|
1504
|
+
bool(json_resp.get("header", {}).get("next_page"))
|
|
1505
|
+
)
|
|
1506
|
+
|
|
620
1507
|
# Check if json_resp has error_type inside of header
|
|
621
1508
|
if "error_type" in json_resp["header"] and json_resp["header"]["error_type"] != "null":
|
|
622
1509
|
# Handle "NO_DATA" error
|
|
@@ -625,18 +1512,19 @@ def get_request(url: str, headers: dict, querystring: dict, username: str, passw
|
|
|
625
1512
|
f"No data returned for querystring: {querystring}")
|
|
626
1513
|
return None
|
|
627
1514
|
else:
|
|
1515
|
+
error_label = json_resp["header"].get("error_type")
|
|
628
1516
|
logger.error(
|
|
629
|
-
f"Error getting data from Theta Data: {
|
|
630
|
-
check_connection(username=username, password=password)
|
|
1517
|
+
f"Error getting data from Theta Data: {error_label},\nquerystring: {querystring}")
|
|
1518
|
+
check_connection(username=username, password=password, wait_for_connection=True)
|
|
1519
|
+
raise ValueError(f"ThetaData returned error_type={error_label}")
|
|
631
1520
|
else:
|
|
632
1521
|
break
|
|
633
1522
|
|
|
634
1523
|
except Exception as e:
|
|
635
1524
|
logger.warning(f"Exception during request (attempt {counter + 1}): {e}")
|
|
636
|
-
check_connection(username=username, password=password)
|
|
637
|
-
# Give the process time to start after restart
|
|
1525
|
+
check_connection(username=username, password=password, wait_for_connection=True)
|
|
638
1526
|
if counter == 0:
|
|
639
|
-
logger.info("
|
|
1527
|
+
logger.info("[THETA][DEBUG][API][WAIT] Allowing ThetaTerminal to initialize for 5s before retry.")
|
|
640
1528
|
time.sleep(5)
|
|
641
1529
|
|
|
642
1530
|
counter += 1
|
|
@@ -720,12 +1608,34 @@ def get_historical_eod_data(asset: Asset, start_dt: datetime, end_dt: datetime,
|
|
|
720
1608
|
|
|
721
1609
|
headers = {"Accept": "application/json"}
|
|
722
1610
|
|
|
1611
|
+
# DEBUG-LOG: EOD data request
|
|
1612
|
+
logger.info(
|
|
1613
|
+
"[THETA][DEBUG][EOD][REQUEST] asset=%s start=%s end=%s datastyle=%s",
|
|
1614
|
+
asset,
|
|
1615
|
+
start_date,
|
|
1616
|
+
end_date,
|
|
1617
|
+
datastyle
|
|
1618
|
+
)
|
|
1619
|
+
|
|
723
1620
|
# Send the request
|
|
724
1621
|
json_resp = get_request(url=url, headers=headers, querystring=querystring,
|
|
725
1622
|
username=username, password=password)
|
|
726
1623
|
if json_resp is None:
|
|
1624
|
+
# DEBUG-LOG: EOD data response - no data
|
|
1625
|
+
logger.info(
|
|
1626
|
+
"[THETA][DEBUG][EOD][RESPONSE] asset=%s result=NO_DATA",
|
|
1627
|
+
asset
|
|
1628
|
+
)
|
|
727
1629
|
return None
|
|
728
1630
|
|
|
1631
|
+
# DEBUG-LOG: EOD data response - success
|
|
1632
|
+
response_rows = len(json_resp.get("response", [])) if isinstance(json_resp.get("response"), list) else 0
|
|
1633
|
+
logger.info(
|
|
1634
|
+
"[THETA][DEBUG][EOD][RESPONSE] asset=%s rows=%d",
|
|
1635
|
+
asset,
|
|
1636
|
+
response_rows
|
|
1637
|
+
)
|
|
1638
|
+
|
|
729
1639
|
# Convert to pandas dataframe
|
|
730
1640
|
df = pd.DataFrame(json_resp["response"], columns=json_resp["header"]["format"])
|
|
731
1641
|
|
|
@@ -875,13 +1785,37 @@ def get_historical_data(asset: Asset, start_dt: datetime, end_dt: datetime, ivl:
|
|
|
875
1785
|
|
|
876
1786
|
headers = {"Accept": "application/json"}
|
|
877
1787
|
|
|
1788
|
+
# DEBUG-LOG: Intraday data request
|
|
1789
|
+
logger.info(
|
|
1790
|
+
"[THETA][DEBUG][INTRADAY][REQUEST] asset=%s start=%s end=%s ivl=%d datastyle=%s include_after_hours=%s",
|
|
1791
|
+
asset,
|
|
1792
|
+
start_date,
|
|
1793
|
+
end_date,
|
|
1794
|
+
ivl,
|
|
1795
|
+
datastyle,
|
|
1796
|
+
include_after_hours
|
|
1797
|
+
)
|
|
1798
|
+
|
|
878
1799
|
# Send the request
|
|
879
1800
|
|
|
880
1801
|
json_resp = get_request(url=url, headers=headers, querystring=querystring,
|
|
881
1802
|
username=username, password=password)
|
|
882
1803
|
if json_resp is None:
|
|
1804
|
+
# DEBUG-LOG: Intraday data response - no data
|
|
1805
|
+
logger.info(
|
|
1806
|
+
"[THETA][DEBUG][INTRADAY][RESPONSE] asset=%s result=NO_DATA",
|
|
1807
|
+
asset
|
|
1808
|
+
)
|
|
883
1809
|
return None
|
|
884
1810
|
|
|
1811
|
+
# DEBUG-LOG: Intraday data response - success
|
|
1812
|
+
response_rows = len(json_resp.get("response", [])) if isinstance(json_resp.get("response"), list) else 0
|
|
1813
|
+
logger.info(
|
|
1814
|
+
"[THETA][DEBUG][INTRADAY][RESPONSE] asset=%s rows=%d",
|
|
1815
|
+
asset,
|
|
1816
|
+
response_rows
|
|
1817
|
+
)
|
|
1818
|
+
|
|
885
1819
|
# Convert to pandas dataframe
|
|
886
1820
|
df = pd.DataFrame(json_resp["response"], columns=json_resp["header"]["format"])
|
|
887
1821
|
|
|
@@ -916,8 +1850,8 @@ def get_historical_data(asset: Asset, start_dt: datetime, end_dt: datetime, ivl:
|
|
|
916
1850
|
# Convert the datetime column to a datetime and localize to Eastern Time
|
|
917
1851
|
df["datetime"] = pd.to_datetime(df["datetime"])
|
|
918
1852
|
|
|
919
|
-
# Localize to
|
|
920
|
-
df["datetime"] = df["datetime"].dt.tz_localize(
|
|
1853
|
+
# Localize to LUMIBOT_DEFAULT_PYTZ (ThetaData returns times in ET)
|
|
1854
|
+
df["datetime"] = df["datetime"].dt.tz_localize(LUMIBOT_DEFAULT_PYTZ)
|
|
921
1855
|
|
|
922
1856
|
# Set datetime as the index
|
|
923
1857
|
df = df.set_index("datetime")
|