lumibot 4.0.23__py3-none-any.whl → 4.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lumibot might be problematic. Click here for more details.
- lumibot/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/__pycache__/constants.cpython-312.pyc +0 -0
- lumibot/__pycache__/credentials.cpython-312.pyc +0 -0
- lumibot/backtesting/__init__.py +6 -5
- lumibot/backtesting/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/alpaca_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/alpha_vantage_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/backtesting_broker.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/ccxt_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/databento_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/interactive_brokers_rest_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/pandas_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/polygon_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/thetadata_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/__pycache__/yahoo_backtesting.cpython-312.pyc +0 -0
- lumibot/backtesting/backtesting_broker.py +209 -9
- lumibot/backtesting/databento_backtesting.py +145 -24
- lumibot/backtesting/thetadata_backtesting.py +63 -42
- lumibot/brokers/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/alpaca.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/bitunix.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/broker.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/ccxt.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/example_broker.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/interactive_brokers.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/interactive_brokers_rest.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/projectx.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/schwab.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/tradier.cpython-312.pyc +0 -0
- lumibot/brokers/__pycache__/tradovate.cpython-312.pyc +0 -0
- lumibot/brokers/alpaca.py +11 -1
- lumibot/brokers/tradeovate.py +475 -0
- lumibot/components/grok_news_helper.py +284 -0
- lumibot/components/options_helper.py +90 -34
- lumibot/credentials.py +3 -0
- lumibot/data_sources/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/alpaca_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/alpha_vantage_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/bitunix_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/ccxt_backtesting_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/ccxt_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/data_source.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/data_source_backtesting.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/databento_data_polars_backtesting.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/databento_data_polars_live.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/example_broker_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/exceptions.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/interactive_brokers_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/interactive_brokers_rest_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/pandas_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/polars_mixin.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/polygon_data_polars.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/projectx_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/schwab_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/tradier_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/tradovate_data.cpython-312.pyc +0 -0
- lumibot/data_sources/__pycache__/yahoo_data_polars.cpython-312.pyc +0 -0
- lumibot/data_sources/data_source_backtesting.py +3 -5
- lumibot/data_sources/databento_data_polars_backtesting.py +194 -48
- lumibot/data_sources/pandas_data.py +6 -3
- lumibot/data_sources/polars_mixin.py +126 -21
- lumibot/data_sources/tradeovate_data.py +80 -0
- lumibot/data_sources/tradier_data.py +2 -1
- lumibot/entities/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/asset.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/bar.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/bars.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/chains.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/data.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/dataline.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/order.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/position.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/quote.cpython-312.pyc +0 -0
- lumibot/entities/__pycache__/trading_fee.cpython-312.pyc +0 -0
- lumibot/entities/asset.py +8 -0
- lumibot/entities/order.py +1 -1
- lumibot/entities/quote.py +14 -0
- lumibot/example_strategies/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/example_strategies/__pycache__/test_broker_functions.cpython-312-pytest-8.4.1.pyc +0 -0
- lumibot/strategies/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/strategies/__pycache__/_strategy.cpython-312.pyc +0 -0
- lumibot/strategies/__pycache__/strategy.cpython-312.pyc +0 -0
- lumibot/strategies/__pycache__/strategy_executor.cpython-312.pyc +0 -0
- lumibot/strategies/_strategy.py +95 -27
- lumibot/strategies/strategy.py +5 -6
- lumibot/strategies/strategy_executor.py +2 -2
- lumibot/tools/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/alpaca_helpers.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/bitunix_helpers.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/black_scholes.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/ccxt_data_store.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/databento_helper.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/databento_helper_polars.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/debugers.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/decorators.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/helpers.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/indicators.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/lumibot_logger.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/pandas.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/polygon_helper.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/polygon_helper_async.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/polygon_helper_polars_optimized.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/projectx_helpers.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/schwab_helper.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/thetadata_helper.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/types.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/yahoo_helper.cpython-312.pyc +0 -0
- lumibot/tools/__pycache__/yahoo_helper_polars_optimized.cpython-312.pyc +0 -0
- lumibot/tools/databento_helper.py +384 -133
- lumibot/tools/databento_helper_polars.py +218 -156
- lumibot/tools/databento_roll.py +216 -0
- lumibot/tools/lumibot_logger.py +32 -17
- lumibot/tools/polygon_helper.py +65 -0
- lumibot/tools/thetadata_helper.py +588 -70
- lumibot/traders/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/traders/__pycache__/trader.cpython-312.pyc +0 -0
- lumibot/traders/trader.py +1 -1
- lumibot/trading_builtins/__pycache__/__init__.cpython-312.pyc +0 -0
- lumibot/trading_builtins/__pycache__/custom_stream.cpython-312.pyc +0 -0
- lumibot/trading_builtins/__pycache__/safe_list.cpython-312.pyc +0 -0
- lumibot-4.1.1.data/data/ThetaTerminal.jar +0 -0
- {lumibot-4.0.23.dist-info → lumibot-4.1.1.dist-info}/METADATA +1 -2
- {lumibot-4.0.23.dist-info → lumibot-4.1.1.dist-info}/RECORD +161 -44
- tests/backtest/check_timing_offset.py +198 -0
- tests/backtest/check_volume_spike.py +112 -0
- tests/backtest/comprehensive_comparison.py +166 -0
- tests/backtest/debug_comparison.py +91 -0
- tests/backtest/diagnose_price_difference.py +97 -0
- tests/backtest/direct_api_comparison.py +203 -0
- tests/backtest/profile_thetadata_vs_polygon.py +255 -0
- tests/backtest/root_cause_analysis.py +109 -0
- tests/backtest/test_accuracy_verification.py +244 -0
- tests/backtest/test_daily_data_timestamp_comparison.py +801 -0
- tests/backtest/test_databento.py +4 -0
- tests/backtest/test_databento_comprehensive_trading.py +564 -0
- tests/backtest/test_debug_avg_fill_price.py +112 -0
- tests/backtest/test_dividends.py +8 -3
- tests/backtest/test_example_strategies.py +54 -47
- tests/backtest/test_futures_edge_cases.py +451 -0
- tests/backtest/test_futures_single_trade.py +270 -0
- tests/backtest/test_futures_ultra_simple.py +191 -0
- tests/backtest/test_index_data_verification.py +348 -0
- tests/backtest/test_polygon.py +45 -24
- tests/backtest/test_thetadata.py +246 -60
- tests/backtest/test_thetadata_comprehensive.py +729 -0
- tests/backtest/test_thetadata_vs_polygon.py +557 -0
- tests/backtest/test_yahoo.py +1 -2
- tests/conftest.py +20 -0
- tests/test_backtesting_data_source_env.py +249 -0
- tests/test_backtesting_quiet_logs_complete.py +10 -11
- tests/test_databento_helper.py +76 -90
- tests/test_databento_timezone_fixes.py +21 -4
- tests/test_get_historical_prices.py +6 -6
- tests/test_options_helper.py +162 -40
- tests/test_polygon_helper.py +21 -13
- tests/test_quiet_logs_requirements.py +5 -5
- tests/test_thetadata_helper.py +487 -171
- tests/test_yahoo_data.py +125 -0
- {lumibot-4.0.23.dist-info → lumibot-4.1.1.dist-info}/LICENSE +0 -0
- {lumibot-4.0.23.dist-info → lumibot-4.1.1.dist-info}/WHEEL +0 -0
- {lumibot-4.0.23.dist-info → lumibot-4.1.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,801 @@
|
|
|
1
|
+
"""
|
|
2
|
+
CRITICAL TEST: Daily Data Timestamp & Price Accuracy Comparison
|
|
3
|
+
|
|
4
|
+
This test verifies that daily (day timeframe) data from ThetaData and Polygon:
|
|
5
|
+
1. Has IDENTICAL timestamps (no day shifts, no hour shifts, no timezone bugs)
|
|
6
|
+
2. Has matching OHLC prices (within penny-level tolerance)
|
|
7
|
+
3. Covers FULL MONTH of data (minimum 20 trading days)
|
|
8
|
+
4. Tests MULTIPLE symbols (different exchanges, characteristics)
|
|
9
|
+
5. Handles edge cases (holidays, month boundaries, extended hours)
|
|
10
|
+
|
|
11
|
+
ANY failure in this test indicates a CRITICAL bug that could cause:
|
|
12
|
+
- Incorrect backtests
|
|
13
|
+
- Wrong trading signals
|
|
14
|
+
- Financial losses
|
|
15
|
+
- Lawsuits
|
|
16
|
+
|
|
17
|
+
ZERO TOLERANCE for failures.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
import os
|
|
21
|
+
import pytest
|
|
22
|
+
import datetime
|
|
23
|
+
import pandas as pd
|
|
24
|
+
from dotenv import load_dotenv
|
|
25
|
+
from lumibot.backtesting import ThetaDataBacktesting, PolygonDataBacktesting
|
|
26
|
+
from lumibot.entities import Asset
|
|
27
|
+
from lumibot.tools import thetadata_helper
|
|
28
|
+
from lumibot.tools.polygon_helper import get_price_data_from_polygon as polygon_get_price_data
|
|
29
|
+
|
|
30
|
+
load_dotenv()
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@pytest.mark.apitest
|
|
34
|
+
class TestDailyDataTimestampComparison:
|
|
35
|
+
"""
|
|
36
|
+
Comprehensive daily data comparison between ThetaData and Polygon.
|
|
37
|
+
Tests full month, multiple symbols, penny-level accuracy.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def test_daily_data_full_month_pltr(self):
|
|
41
|
+
"""Test PLTR daily data for full September 2025 - ZERO tolerance."""
|
|
42
|
+
self._test_symbol_daily_data(
|
|
43
|
+
symbol="PLTR",
|
|
44
|
+
start_date=datetime.datetime(2025, 9, 1),
|
|
45
|
+
end_date=datetime.datetime(2025, 9, 30),
|
|
46
|
+
min_trading_days=19
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
def test_daily_data_full_month_spy(self):
|
|
50
|
+
"""Test SPY daily data for full September 2025 - ZERO tolerance."""
|
|
51
|
+
self._test_symbol_daily_data(
|
|
52
|
+
symbol="SPY",
|
|
53
|
+
start_date=datetime.datetime(2025, 9, 1),
|
|
54
|
+
end_date=datetime.datetime(2025, 9, 30),
|
|
55
|
+
min_trading_days=19
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
def test_daily_data_full_month_aapl(self):
|
|
59
|
+
"""Test AAPL daily data for full September 2025 - ZERO tolerance."""
|
|
60
|
+
self._test_symbol_daily_data(
|
|
61
|
+
symbol="AAPL",
|
|
62
|
+
start_date=datetime.datetime(2025, 9, 1),
|
|
63
|
+
end_date=datetime.datetime(2025, 9, 30),
|
|
64
|
+
min_trading_days=19
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
def test_daily_data_full_month_amzn(self):
|
|
68
|
+
"""Test AMZN daily data for full September 2025 - ZERO tolerance."""
|
|
69
|
+
self._test_symbol_daily_data(
|
|
70
|
+
symbol="AMZN",
|
|
71
|
+
start_date=datetime.datetime(2025, 9, 1),
|
|
72
|
+
end_date=datetime.datetime(2025, 9, 30),
|
|
73
|
+
min_trading_days=19
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# ========== INDEX TESTS ==========
|
|
77
|
+
def test_daily_data_full_month_spx_index(self):
|
|
78
|
+
"""Test SPX index daily data for full September 2025 - ZERO tolerance."""
|
|
79
|
+
self._test_symbol_daily_data(
|
|
80
|
+
symbol="SPX",
|
|
81
|
+
start_date=datetime.datetime(2025, 9, 1),
|
|
82
|
+
end_date=datetime.datetime(2025, 9, 30),
|
|
83
|
+
min_trading_days=19,
|
|
84
|
+
asset_type="index"
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
def test_daily_data_full_month_vix_index(self):
|
|
88
|
+
"""Test VIX index daily data for full September 2025 - ZERO tolerance."""
|
|
89
|
+
self._test_symbol_daily_data(
|
|
90
|
+
symbol="VIX",
|
|
91
|
+
start_date=datetime.datetime(2025, 9, 1),
|
|
92
|
+
end_date=datetime.datetime(2025, 9, 30),
|
|
93
|
+
min_trading_days=19,
|
|
94
|
+
asset_type="index"
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
def test_daily_data_full_month_ndx_index(self):
|
|
98
|
+
"""Test SPX index daily data for full August 2024 - ZERO tolerance."""
|
|
99
|
+
self._test_symbol_daily_data(
|
|
100
|
+
symbol="SPX",
|
|
101
|
+
start_date=datetime.datetime(2024, 8, 1),
|
|
102
|
+
end_date=datetime.datetime(2024, 8, 31),
|
|
103
|
+
min_trading_days=21,
|
|
104
|
+
asset_type="index"
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# ========== OPTION TESTS ==========
|
|
108
|
+
def test_daily_data_spy_call_option(self):
|
|
109
|
+
"""Test SPY call option daily data for September 2025 - ZERO tolerance."""
|
|
110
|
+
self._test_option_daily_data(
|
|
111
|
+
symbol="SPY",
|
|
112
|
+
start_date=datetime.datetime(2025, 9, 1),
|
|
113
|
+
end_date=datetime.datetime(2025, 9, 30),
|
|
114
|
+
min_trading_days=15, # Options may have less liquidity
|
|
115
|
+
expiration=datetime.datetime(2025, 12, 19), # Dec 2025 expiry
|
|
116
|
+
strike=580.0, # ATM/slightly OTM for SPY ~$570
|
|
117
|
+
right="CALL"
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
def test_daily_data_spy_put_option(self):
|
|
121
|
+
"""Test SPY put option daily data for September 2025 - ZERO tolerance."""
|
|
122
|
+
self._test_option_daily_data(
|
|
123
|
+
symbol="SPY",
|
|
124
|
+
start_date=datetime.datetime(2025, 9, 1),
|
|
125
|
+
end_date=datetime.datetime(2025, 9, 30),
|
|
126
|
+
min_trading_days=15,
|
|
127
|
+
expiration=datetime.datetime(2025, 12, 19),
|
|
128
|
+
strike=560.0, # ATM/slightly ITM for SPY ~$570
|
|
129
|
+
right="PUT"
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
def _test_option_daily_data(self, symbol, start_date, end_date, min_trading_days, expiration, strike, right):
|
|
133
|
+
"""
|
|
134
|
+
Test function for option daily data validation.
|
|
135
|
+
|
|
136
|
+
NOTE: Options comparison is challenging because:
|
|
137
|
+
- Yahoo Finance historical option data is limited (often <1 week)
|
|
138
|
+
- Different providers use different quote/trade data
|
|
139
|
+
- Options have wider bid-ask spreads than stocks
|
|
140
|
+
- No universal "official" option price like stocks have
|
|
141
|
+
|
|
142
|
+
This test validates:
|
|
143
|
+
1. ThetaData returns option data successfully
|
|
144
|
+
2. Minimum number of trading days
|
|
145
|
+
3. Price values are reasonable (not zero, not negative)
|
|
146
|
+
4. Volume data exists
|
|
147
|
+
"""
|
|
148
|
+
username = os.environ.get("THETADATA_USERNAME")
|
|
149
|
+
password = os.environ.get("THETADATA_PASSWORD")
|
|
150
|
+
|
|
151
|
+
asset = Asset(symbol, asset_type="option", expiration=expiration, strike=strike, right=right)
|
|
152
|
+
|
|
153
|
+
print(f"\n{'='*80}")
|
|
154
|
+
print(f"TESTING {symbol} {right} ${strike} (exp {expiration.date()}) OPTION DAILY DATA")
|
|
155
|
+
print(f"Period: {start_date.date()} to {end_date.date()}")
|
|
156
|
+
print(f"{'='*80}")
|
|
157
|
+
|
|
158
|
+
# ==== GET THETADATA OPTION DATA ====
|
|
159
|
+
print(f"\n1. Fetching ThetaData option daily data...")
|
|
160
|
+
try:
|
|
161
|
+
theta_df = thetadata_helper.get_price_data(
|
|
162
|
+
username=username,
|
|
163
|
+
password=password,
|
|
164
|
+
asset=asset,
|
|
165
|
+
start=start_date,
|
|
166
|
+
end=end_date,
|
|
167
|
+
timespan="day"
|
|
168
|
+
)
|
|
169
|
+
except Exception as e:
|
|
170
|
+
pytest.fail(f"CRITICAL: ThetaData option daily data FAILED: {e}")
|
|
171
|
+
|
|
172
|
+
if theta_df is None or len(theta_df) == 0:
|
|
173
|
+
pytest.fail(f"CRITICAL: ThetaData returned NO option daily data")
|
|
174
|
+
|
|
175
|
+
print(f" ✓ ThetaData: {len(theta_df)} daily bars")
|
|
176
|
+
print(f" Date range: {theta_df.index[0]} to {theta_df.index[-1]}")
|
|
177
|
+
|
|
178
|
+
# ==== GET POLYGON OPTION DATA FOR COMPARISON ====
|
|
179
|
+
print(f"\n2. Fetching Polygon option data for validation...")
|
|
180
|
+
polygon_api_key = os.environ.get("POLYGON_API_KEY")
|
|
181
|
+
|
|
182
|
+
try:
|
|
183
|
+
polygon_df = polygon_get_price_data(
|
|
184
|
+
api_key=polygon_api_key,
|
|
185
|
+
asset=asset,
|
|
186
|
+
start=start_date,
|
|
187
|
+
end=end_date,
|
|
188
|
+
timespan="day",
|
|
189
|
+
quote_asset=Asset("USD", asset_type="forex")
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
if polygon_df is None or len(polygon_df) == 0:
|
|
193
|
+
print(f" ⚠ WARNING: Polygon returned NO option data - skipping price comparison")
|
|
194
|
+
polygon_df = None
|
|
195
|
+
else:
|
|
196
|
+
print(f" ✓ Polygon: {len(polygon_df)} daily bars")
|
|
197
|
+
print(f" Date range: {polygon_df.index[0]} to {polygon_df.index[-1]}")
|
|
198
|
+
except Exception as e:
|
|
199
|
+
print(f" ⚠ WARNING: Polygon failed ({e}) - skipping price comparison")
|
|
200
|
+
polygon_df = None
|
|
201
|
+
|
|
202
|
+
# ==== CHECK: Minimum Trading Days ====
|
|
203
|
+
print(f"\n3. Verifying minimum trading days...")
|
|
204
|
+
assert len(theta_df) >= min_trading_days, \
|
|
205
|
+
f"CRITICAL: Expected at least {min_trading_days} days, got {len(theta_df)}"
|
|
206
|
+
print(f" ✓ Sufficient trading days: {len(theta_df)} >= {min_trading_days}")
|
|
207
|
+
|
|
208
|
+
# ==== CHECK: Price Comparison (if Polygon data available) ====
|
|
209
|
+
if polygon_df is not None and len(polygon_df) > 0:
|
|
210
|
+
print(f"\n4. Verifying OHLC prices vs Polygon (half-penny tolerance: $0.005)...")
|
|
211
|
+
|
|
212
|
+
# Check same number of days
|
|
213
|
+
if len(theta_df) != len(polygon_df):
|
|
214
|
+
print(f"\n ✗ MISMATCH: ThetaData={len(theta_df)} days, Polygon={len(polygon_df)} days")
|
|
215
|
+
pytest.fail(f"CRITICAL: Different number of trading days")
|
|
216
|
+
|
|
217
|
+
# Align data
|
|
218
|
+
max_diff = {'open': 0.0, 'high': 0.0, 'low': 0.0, 'close': 0.0}
|
|
219
|
+
comparison_data = []
|
|
220
|
+
|
|
221
|
+
for theta_idx, polygon_idx in zip(theta_df.index, polygon_df.index):
|
|
222
|
+
theta_row = theta_df.loc[theta_idx]
|
|
223
|
+
polygon_row = polygon_df.loc[polygon_idx]
|
|
224
|
+
|
|
225
|
+
diffs = {
|
|
226
|
+
'open': abs(theta_row['open'] - polygon_row['open']),
|
|
227
|
+
'high': abs(theta_row['high'] - polygon_row['high']),
|
|
228
|
+
'low': abs(theta_row['low'] - polygon_row['low']),
|
|
229
|
+
'close': abs(theta_row['close'] - polygon_row['close'])
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
for field in ['open', 'high', 'low', 'close']:
|
|
233
|
+
max_diff[field] = max(max_diff[field], diffs[field])
|
|
234
|
+
|
|
235
|
+
comparison_data.append({
|
|
236
|
+
'date': theta_idx.date(),
|
|
237
|
+
'theta_close': theta_row['close'],
|
|
238
|
+
'polygon_close': polygon_row['close'],
|
|
239
|
+
'diff_close': diffs['close'],
|
|
240
|
+
})
|
|
241
|
+
|
|
242
|
+
# HALF-PENNY tolerance ($0.005) - anything more is unacceptable
|
|
243
|
+
tolerance = 0.005
|
|
244
|
+
failures = []
|
|
245
|
+
|
|
246
|
+
for field in ['open', 'high', 'low', 'close']:
|
|
247
|
+
if max_diff[field] > tolerance:
|
|
248
|
+
failures.append(f"{field}: max diff ${max_diff[field]:.4f}")
|
|
249
|
+
|
|
250
|
+
if failures:
|
|
251
|
+
print(f"\n ✗ PRICE TOLERANCE EXCEEDED:")
|
|
252
|
+
for failure in failures:
|
|
253
|
+
print(f" {failure}")
|
|
254
|
+
|
|
255
|
+
print(f"\n PRICE COMPARISON (first 10 days):")
|
|
256
|
+
print(f" {'Date':<12} {'Theta':<10} {'Polygon':<10} {'Diff':<10}")
|
|
257
|
+
print(f" {'-'*50}")
|
|
258
|
+
for row in comparison_data[:10]:
|
|
259
|
+
t_close = row['theta_close']
|
|
260
|
+
p_close = row['polygon_close']
|
|
261
|
+
diff = row['diff_close']
|
|
262
|
+
match_str = "✅" if diff <= tolerance else "❌"
|
|
263
|
+
print(f" {row['date']} ${t_close:<9.2f} ${p_close:<9.2f} ${diff:<9.4f} {match_str}")
|
|
264
|
+
|
|
265
|
+
pytest.fail(f"CRITICAL: Option price tolerance exceeded: {', '.join(failures)}")
|
|
266
|
+
|
|
267
|
+
print(f" ✓ All prices within ${tolerance:.3f} tolerance")
|
|
268
|
+
print(f" Max differences: open=${max_diff['open']:.4f}, high=${max_diff['high']:.4f}, "
|
|
269
|
+
f"low=${max_diff['low']:.4f}, close=${max_diff['close']:.4f}")
|
|
270
|
+
|
|
271
|
+
# ==== CHECK: Price Data Sanity ====
|
|
272
|
+
print(f"\n5. Verifying price data sanity...")
|
|
273
|
+
|
|
274
|
+
# Check for zero or negative prices (invalid)
|
|
275
|
+
zero_prices = (theta_df['close'] <= 0).sum()
|
|
276
|
+
if zero_prices > 0:
|
|
277
|
+
pytest.fail(f"CRITICAL: {zero_prices} bars have zero/negative close prices")
|
|
278
|
+
|
|
279
|
+
# Check for reasonable price ranges
|
|
280
|
+
min_price = theta_df['close'].min()
|
|
281
|
+
max_price = theta_df['close'].max()
|
|
282
|
+
avg_price = theta_df['close'].mean()
|
|
283
|
+
|
|
284
|
+
print(f" ✓ All prices positive")
|
|
285
|
+
print(f" Price range: ${min_price:.2f} - ${max_price:.2f} (avg: ${avg_price:.2f})")
|
|
286
|
+
|
|
287
|
+
# ==== CHECK: OHLC Consistency ====
|
|
288
|
+
print(f"\n4. Verifying OHLC consistency...")
|
|
289
|
+
|
|
290
|
+
# High should be >= Low for every bar
|
|
291
|
+
invalid_hl = (theta_df['high'] < theta_df['low']).sum()
|
|
292
|
+
if invalid_hl > 0:
|
|
293
|
+
pytest.fail(f"CRITICAL: {invalid_hl} bars have high < low")
|
|
294
|
+
|
|
295
|
+
# High should be >= Open and Close
|
|
296
|
+
invalid_h = ((theta_df['high'] < theta_df['open']) | (theta_df['high'] < theta_df['close'])).sum()
|
|
297
|
+
if invalid_h > 0:
|
|
298
|
+
pytest.fail(f"CRITICAL: {invalid_h} bars have high < open/close")
|
|
299
|
+
|
|
300
|
+
# Low should be <= Open and Close
|
|
301
|
+
invalid_l = ((theta_df['low'] > theta_df['open']) | (theta_df['low'] > theta_df['close'])).sum()
|
|
302
|
+
if invalid_l > 0:
|
|
303
|
+
pytest.fail(f"CRITICAL: {invalid_l} bars have low > open/close")
|
|
304
|
+
|
|
305
|
+
print(f" ✓ OHLC relationships valid (high >= low, high >= open/close, low <= open/close)")
|
|
306
|
+
|
|
307
|
+
# ==== CHECK: Volume Data ====
|
|
308
|
+
print(f"\n5. Verifying volume data...")
|
|
309
|
+
zero_volume = (theta_df['volume'] == 0).sum()
|
|
310
|
+
pct_zero_vol = (zero_volume / len(theta_df)) * 100
|
|
311
|
+
|
|
312
|
+
print(f" ✓ Volume data present ({zero_volume}/{len(theta_df)} bars with zero volume = {pct_zero_vol:.1f}%)")
|
|
313
|
+
if pct_zero_vol > 50:
|
|
314
|
+
print(f" ⚠ WARNING: >50% of bars have zero volume (may indicate low liquidity)")
|
|
315
|
+
|
|
316
|
+
print(f"\n{'='*80}")
|
|
317
|
+
print(f"✓✓✓ {symbol} OPTION DATA VALIDATION PASSED ✓✓✓")
|
|
318
|
+
print(f" Trading days: {len(theta_df)}")
|
|
319
|
+
print(f" Price range: ${min_price:.2f} - ${max_price:.2f}")
|
|
320
|
+
print(f" OHLC relationships: VALID")
|
|
321
|
+
print(f" Period: {theta_df.index[0].date()} to {theta_df.index[-1].date()}")
|
|
322
|
+
print(f"{'='*80}\n")
|
|
323
|
+
|
|
324
|
+
def _test_symbol_daily_data(self, symbol, start_date, end_date, min_trading_days, asset_type="stock"):
|
|
325
|
+
"""
|
|
326
|
+
Core test function that validates daily data for a symbol.
|
|
327
|
+
|
|
328
|
+
CRITICAL CHECKS:
|
|
329
|
+
1. Both sources return data
|
|
330
|
+
2. Same number of trading days
|
|
331
|
+
3. IDENTICAL timestamps (no shifts)
|
|
332
|
+
4. OHLC within 0.01 (penny) tolerance
|
|
333
|
+
5. Volume reasonable
|
|
334
|
+
6. No duplicate dates
|
|
335
|
+
7. No missing dates (within market calendar)
|
|
336
|
+
"""
|
|
337
|
+
username = os.environ.get("THETADATA_USERNAME")
|
|
338
|
+
password = os.environ.get("THETADATA_PASSWORD")
|
|
339
|
+
polygon_api_key = os.environ.get("POLYGON_API_KEY")
|
|
340
|
+
|
|
341
|
+
asset = Asset(symbol, asset_type=asset_type)
|
|
342
|
+
|
|
343
|
+
print(f"\n{'='*80}")
|
|
344
|
+
print(f"TESTING {symbol} DAILY DATA: {start_date.date()} to {end_date.date()}")
|
|
345
|
+
print(f"{'='*80}")
|
|
346
|
+
|
|
347
|
+
# ==== GET THETADATA DAILY DATA ====
|
|
348
|
+
print(f"\n1. Fetching ThetaData daily data...")
|
|
349
|
+
try:
|
|
350
|
+
theta_df = thetadata_helper.get_price_data(
|
|
351
|
+
username=username,
|
|
352
|
+
password=password,
|
|
353
|
+
asset=asset,
|
|
354
|
+
start=start_date,
|
|
355
|
+
end=end_date,
|
|
356
|
+
timespan="day"
|
|
357
|
+
)
|
|
358
|
+
except Exception as e:
|
|
359
|
+
pytest.fail(f"CRITICAL: ThetaData daily data FAILED for {symbol}: {e}")
|
|
360
|
+
|
|
361
|
+
if theta_df is None or len(theta_df) == 0:
|
|
362
|
+
pytest.fail(f"CRITICAL: ThetaData returned NO daily data for {symbol}")
|
|
363
|
+
|
|
364
|
+
print(f" ✓ ThetaData: {len(theta_df)} daily bars")
|
|
365
|
+
print(f" Date range: {theta_df.index[0]} to {theta_df.index[-1]}")
|
|
366
|
+
|
|
367
|
+
# ==== GET POLYGON OR YAHOO DAILY DATA ====
|
|
368
|
+
# NOTE: Polygon requires paid plan for indexes, so we use Yahoo Finance for indexes
|
|
369
|
+
if asset_type == "index":
|
|
370
|
+
print(f"\n2. Fetching Yahoo Finance daily data (indexes not available in free Polygon)...")
|
|
371
|
+
import yfinance as yf
|
|
372
|
+
|
|
373
|
+
# Yahoo Finance uses ^SPX for SPX, ^VIX for VIX, ^NDX for NDX
|
|
374
|
+
yahoo_symbol = f"^{symbol}" if symbol in ["SPX", "VIX", "NDX", "RUT", "DJI"] else symbol
|
|
375
|
+
ticker = yf.Ticker(yahoo_symbol)
|
|
376
|
+
|
|
377
|
+
try:
|
|
378
|
+
from datetime import timedelta as td
|
|
379
|
+
# Yahoo requires end_date to be exclusive (next day)
|
|
380
|
+
yahoo_end = (end_date + td(days=1)).strftime('%Y-%m-%d')
|
|
381
|
+
yahoo_start = start_date.strftime('%Y-%m-%d')
|
|
382
|
+
yahoo_hist = ticker.history(start=yahoo_start, end=yahoo_end, interval='1d')
|
|
383
|
+
|
|
384
|
+
if yahoo_hist is None or len(yahoo_hist) == 0:
|
|
385
|
+
pytest.fail(f"CRITICAL: Yahoo Finance returned NO daily data for {symbol}")
|
|
386
|
+
|
|
387
|
+
# Convert Yahoo data to match our format
|
|
388
|
+
polygon_df = pd.DataFrame({
|
|
389
|
+
'open': yahoo_hist['Open'],
|
|
390
|
+
'high': yahoo_hist['High'],
|
|
391
|
+
'low': yahoo_hist['Low'],
|
|
392
|
+
'close': yahoo_hist['Close'],
|
|
393
|
+
'volume': yahoo_hist['Volume']
|
|
394
|
+
})
|
|
395
|
+
polygon_df.index = pd.to_datetime(polygon_df.index).tz_convert('UTC')
|
|
396
|
+
|
|
397
|
+
except Exception as e:
|
|
398
|
+
pytest.fail(f"CRITICAL: Yahoo Finance daily data FAILED for {symbol}: {e}")
|
|
399
|
+
else:
|
|
400
|
+
print(f"\n2. Fetching Polygon daily data...")
|
|
401
|
+
try:
|
|
402
|
+
polygon_df = polygon_get_price_data(
|
|
403
|
+
api_key=polygon_api_key,
|
|
404
|
+
asset=asset,
|
|
405
|
+
start=start_date,
|
|
406
|
+
end=end_date,
|
|
407
|
+
timespan="day",
|
|
408
|
+
quote_asset=Asset("USD", asset_type="forex")
|
|
409
|
+
)
|
|
410
|
+
except Exception as e:
|
|
411
|
+
pytest.fail(f"CRITICAL: Polygon daily data FAILED for {symbol}: {e}")
|
|
412
|
+
|
|
413
|
+
if polygon_df is None or len(polygon_df) == 0:
|
|
414
|
+
pytest.fail(f"CRITICAL: Polygon returned NO daily data for {symbol}")
|
|
415
|
+
|
|
416
|
+
comparison_source = "Yahoo Finance" if asset_type == "index" else "Polygon"
|
|
417
|
+
print(f" ✓ {comparison_source}: {len(polygon_df)} daily bars")
|
|
418
|
+
print(f" Date range: {polygon_df.index[0]} to {polygon_df.index[-1]}")
|
|
419
|
+
|
|
420
|
+
# ==== CHECK 1: Minimum Trading Days ====
|
|
421
|
+
print(f"\n3. Verifying minimum trading days...")
|
|
422
|
+
assert len(theta_df) >= min_trading_days, \
|
|
423
|
+
f"CRITICAL: ThetaData has only {len(theta_df)} days, expected >={min_trading_days}"
|
|
424
|
+
assert len(polygon_df) >= min_trading_days, \
|
|
425
|
+
f"CRITICAL: Polygon has only {len(polygon_df)} days, expected >={min_trading_days}"
|
|
426
|
+
print(f" ✓ Both sources have >={min_trading_days} trading days")
|
|
427
|
+
|
|
428
|
+
# ==== CHECK 2: Same Number of Days ====
|
|
429
|
+
print(f"\n4. Verifying same number of trading days...")
|
|
430
|
+
if len(theta_df) != len(polygon_df):
|
|
431
|
+
print(f"\n ✗ MISMATCH: ThetaData={len(theta_df)} days, Polygon={len(polygon_df)} days")
|
|
432
|
+
print(f"\n ThetaData dates:")
|
|
433
|
+
for dt in theta_df.index:
|
|
434
|
+
print(f" {dt.date()}")
|
|
435
|
+
print(f"\n Polygon dates:")
|
|
436
|
+
for dt in polygon_df.index:
|
|
437
|
+
print(f" {dt.date()}")
|
|
438
|
+
pytest.fail(f"CRITICAL: Different number of trading days: Theta={len(theta_df)}, Polygon={len(polygon_df)}")
|
|
439
|
+
print(f" ✓ Same number of trading days: {len(theta_df)}")
|
|
440
|
+
|
|
441
|
+
# ==== CHECK 3: IDENTICAL TIMESTAMPS ====
|
|
442
|
+
print(f"\n5. Verifying IDENTICAL timestamps (ZERO tolerance for shifts)...")
|
|
443
|
+
|
|
444
|
+
# Convert to date for comparison (ignore time component)
|
|
445
|
+
theta_dates = [dt.date() for dt in theta_df.index]
|
|
446
|
+
polygon_dates = [dt.date() for dt in polygon_df.index]
|
|
447
|
+
|
|
448
|
+
mismatched_dates = []
|
|
449
|
+
for i, (theta_date, polygon_date) in enumerate(zip(theta_dates, polygon_dates)):
|
|
450
|
+
if theta_date != polygon_date:
|
|
451
|
+
mismatched_dates.append((i, theta_date, polygon_date))
|
|
452
|
+
|
|
453
|
+
if mismatched_dates:
|
|
454
|
+
print(f"\n ✗ CRITICAL: TIMESTAMP MISMATCH DETECTED!")
|
|
455
|
+
print(f"\n {'Index':<10} {'ThetaData':<15} {'Polygon':<15} {'Shift (days)'}")
|
|
456
|
+
print(f" {'-'*60}")
|
|
457
|
+
for idx, theta_date, polygon_date in mismatched_dates:
|
|
458
|
+
shift = (theta_date - polygon_date).days
|
|
459
|
+
print(f" {idx:<10} {theta_date} {polygon_date} {shift:+d}")
|
|
460
|
+
pytest.fail(f"CRITICAL: {len(mismatched_dates)} timestamp mismatches found!")
|
|
461
|
+
|
|
462
|
+
print(f" ✓ ALL timestamps match perfectly (0 shifts)")
|
|
463
|
+
|
|
464
|
+
# ==== CHECK 4: OHLC PRICE ACCURACY ====
|
|
465
|
+
print(f"\n6. Verifying OHLC prices (penny-level tolerance: $0.01)...")
|
|
466
|
+
|
|
467
|
+
# Create aligned DataFrame for comparison
|
|
468
|
+
comparison_data = []
|
|
469
|
+
max_diff = {'open': 0.0, 'high': 0.0, 'low': 0.0, 'close': 0.0}
|
|
470
|
+
|
|
471
|
+
for theta_idx, polygon_idx in zip(theta_df.index, polygon_df.index):
|
|
472
|
+
theta_row = theta_df.loc[theta_idx]
|
|
473
|
+
polygon_row = polygon_df.loc[polygon_idx]
|
|
474
|
+
|
|
475
|
+
diffs = {
|
|
476
|
+
'open': abs(theta_row['open'] - polygon_row['open']),
|
|
477
|
+
'high': abs(theta_row['high'] - polygon_row['high']),
|
|
478
|
+
'low': abs(theta_row['low'] - polygon_row['low']),
|
|
479
|
+
'close': abs(theta_row['close'] - polygon_row['close'])
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
for field in ['open', 'high', 'low', 'close']:
|
|
483
|
+
max_diff[field] = max(max_diff[field], diffs[field])
|
|
484
|
+
|
|
485
|
+
comparison_data.append({
|
|
486
|
+
'date': theta_idx.date(),
|
|
487
|
+
'theta_open': theta_row['open'],
|
|
488
|
+
'poly_open': polygon_row['open'],
|
|
489
|
+
'diff_open': diffs['open'],
|
|
490
|
+
'theta_close': theta_row['close'],
|
|
491
|
+
'poly_close': polygon_row['close'],
|
|
492
|
+
'diff_close': diffs['close'],
|
|
493
|
+
})
|
|
494
|
+
|
|
495
|
+
# TOLERANCE: Stocks require ZERO tolerance, indexes allow fractional cent (rounding)
|
|
496
|
+
# Stocks: ZERO tolerance - regulated data must match EXACTLY
|
|
497
|
+
# Indexes: $0.001 tolerance - calculated values may have fractional cent rounding differences
|
|
498
|
+
tolerance = 0.001 if asset_type == "index" else 0.00
|
|
499
|
+
failures = []
|
|
500
|
+
|
|
501
|
+
for field in ['open', 'high', 'low', 'close']:
|
|
502
|
+
if max_diff[field] > tolerance:
|
|
503
|
+
failures.append(f"{field}: max diff ${max_diff[field]:.4f}")
|
|
504
|
+
|
|
505
|
+
if failures:
|
|
506
|
+
# Add Yahoo Finance 3-way comparison for failed days
|
|
507
|
+
import yfinance as yf
|
|
508
|
+
|
|
509
|
+
print(f"\n ✗ PRICE TOLERANCE EXCEEDED:")
|
|
510
|
+
for failure in failures:
|
|
511
|
+
print(f" {failure}")
|
|
512
|
+
|
|
513
|
+
print(f"\n 3-WAY COMPARISON (ThetaData vs Polygon vs Yahoo):")
|
|
514
|
+
print(f" {'Date':<12} {'Theta':<10} {'Polygon':<10} {'Yahoo':<10} {'Which Match?':<20}")
|
|
515
|
+
print(f" {'-'*70}")
|
|
516
|
+
|
|
517
|
+
ticker = yf.Ticker(symbol)
|
|
518
|
+
for row in comparison_data[:10]:
|
|
519
|
+
try:
|
|
520
|
+
from datetime import timedelta as td
|
|
521
|
+
date_obj = row['date']
|
|
522
|
+
date_str = date_obj.strftime('%Y-%m-%d')
|
|
523
|
+
next_date = (date_obj + td(days=1)).strftime('%Y-%m-%d')
|
|
524
|
+
yahoo_hist = ticker.history(start=date_str, end=next_date, interval='1d')
|
|
525
|
+
yahoo_close = yahoo_hist.iloc[0]['Close'] if len(yahoo_hist) > 0 else None
|
|
526
|
+
|
|
527
|
+
t_close = row['theta_close']
|
|
528
|
+
p_close = row['poly_close']
|
|
529
|
+
y_close = yahoo_close
|
|
530
|
+
|
|
531
|
+
# Check which ones match
|
|
532
|
+
tp_match = abs(t_close - p_close) < 0.01
|
|
533
|
+
ty_match = abs(t_close - y_close) < 0.01 if y_close else False
|
|
534
|
+
py_match = abs(p_close - y_close) < 0.01 if y_close else False
|
|
535
|
+
|
|
536
|
+
if tp_match and ty_match and py_match:
|
|
537
|
+
match_str = "✅ All match"
|
|
538
|
+
elif py_match:
|
|
539
|
+
match_str = "❌ Polygon+Yahoo (Theta wrong)"
|
|
540
|
+
elif ty_match:
|
|
541
|
+
match_str = "❌ Theta+Yahoo (Polygon wrong)"
|
|
542
|
+
elif tp_match:
|
|
543
|
+
match_str = "❌ Theta+Polygon (Yahoo wrong)"
|
|
544
|
+
else:
|
|
545
|
+
match_str = "❌ None match!"
|
|
546
|
+
|
|
547
|
+
print(f" {date_str:<12} ${t_close:<9.2f} ${p_close:<9.2f} ${y_close:<9.2f} {match_str}")
|
|
548
|
+
except:
|
|
549
|
+
print(f" {date_str:<12} ${row['theta_close']:<9.2f} ${row['poly_close']:<9.2f} {'N/A':<9} Yahoo error")
|
|
550
|
+
|
|
551
|
+
pytest.fail(f"CRITICAL: Price tolerance exceeded: {', '.join(failures)}")
|
|
552
|
+
|
|
553
|
+
print(f" ✓ All prices within ${tolerance:.2f} tolerance")
|
|
554
|
+
print(f" Max differences: open=${max_diff['open']:.4f}, high=${max_diff['high']:.4f}, "
|
|
555
|
+
f"low=${max_diff['low']:.4f}, close=${max_diff['close']:.4f}")
|
|
556
|
+
|
|
557
|
+
# ==== CHECK 5: Exact Timestamp Alignment ====
|
|
558
|
+
print(f"\n6. Verifying EXACT timestamp alignment (no shifts allowed)...")
|
|
559
|
+
timestamp_mismatches = []
|
|
560
|
+
for i, (theta_ts, polygon_ts) in enumerate(zip(theta_df.index, polygon_df.index)):
|
|
561
|
+
if theta_ts.date() != polygon_ts.date():
|
|
562
|
+
timestamp_mismatches.append((i, theta_ts, polygon_ts))
|
|
563
|
+
|
|
564
|
+
if timestamp_mismatches:
|
|
565
|
+
print(f"\n ✗ TIMESTAMP MISMATCH DETECTED:")
|
|
566
|
+
for idx, theta_ts, polygon_ts in timestamp_mismatches[:10]:
|
|
567
|
+
print(f" Index {idx}: Theta={theta_ts.date()}, Polygon={polygon_ts.date()}")
|
|
568
|
+
pytest.fail(f"CRITICAL: {len(timestamp_mismatches)} timestamp mismatches!")
|
|
569
|
+
|
|
570
|
+
print(f" ✓ ALL timestamps match EXACTLY (0 day shifts)")
|
|
571
|
+
|
|
572
|
+
# ==== CHECK 6: No Duplicates ====
|
|
573
|
+
print(f"\n7. Verifying no duplicate dates...")
|
|
574
|
+
theta_duplicates = theta_df.index[theta_df.index.duplicated()].tolist()
|
|
575
|
+
polygon_duplicates = polygon_df.index[polygon_df.index.duplicated()].tolist()
|
|
576
|
+
|
|
577
|
+
if theta_duplicates:
|
|
578
|
+
pytest.fail(f"CRITICAL: ThetaData has duplicate dates: {theta_duplicates}")
|
|
579
|
+
if polygon_duplicates:
|
|
580
|
+
pytest.fail(f"CRITICAL: Polygon has duplicate dates: {polygon_duplicates}")
|
|
581
|
+
|
|
582
|
+
print(f" ✓ No duplicate dates in either source")
|
|
583
|
+
|
|
584
|
+
# ==== CHECK 6: Volume Sanity ====
|
|
585
|
+
print(f"\n8. Verifying volume data...")
|
|
586
|
+
if 'volume' in theta_df.columns and 'volume' in polygon_df.columns:
|
|
587
|
+
theta_zero_vol = (theta_df['volume'] == 0).sum()
|
|
588
|
+
polygon_zero_vol = (polygon_df['volume'] == 0).sum()
|
|
589
|
+
|
|
590
|
+
if theta_zero_vol > len(theta_df) * 0.1: # More than 10% zero volume
|
|
591
|
+
print(f" ⚠ WARNING: ThetaData has {theta_zero_vol}/{len(theta_df)} days with zero volume")
|
|
592
|
+
if polygon_zero_vol > len(polygon_df) * 0.1:
|
|
593
|
+
print(f" ⚠ WARNING: Polygon has {polygon_zero_vol}/{len(polygon_df)} days with zero volume")
|
|
594
|
+
|
|
595
|
+
print(f" ✓ Volume data present (Theta: {theta_zero_vol} zero days, Polygon: {polygon_zero_vol} zero days)")
|
|
596
|
+
|
|
597
|
+
# ==== FINAL SUMMARY ====
|
|
598
|
+
print(f"\n{'='*80}")
|
|
599
|
+
print(f"✓✓✓ {symbol} DAILY DATA VALIDATION PASSED ✓✓✓")
|
|
600
|
+
print(f" Trading days: {len(theta_df)}")
|
|
601
|
+
print(f" Timestamps: PERFECT MATCH (0 shifts)")
|
|
602
|
+
print(f" Prices: ALL within $0.01")
|
|
603
|
+
print(f" Period: {theta_df.index[0].date()} to {theta_df.index[-1].date()}")
|
|
604
|
+
print(f"{'='*80}\n")
|
|
605
|
+
|
|
606
|
+
|
|
607
|
+
@pytest.mark.apitest
|
|
608
|
+
class TestIntradayDataComparison:
|
|
609
|
+
"""
|
|
610
|
+
Comprehensive intraday interval comparison (5min, 10min, 15min, 30min, hour).
|
|
611
|
+
Tests ThetaData server-side intervals vs Polygon client-side aggregation.
|
|
612
|
+
ZERO TOLERANCE: Exact bar counts, exact timestamps, half-penny price accuracy.
|
|
613
|
+
"""
|
|
614
|
+
|
|
615
|
+
@pytest.mark.parametrize("interval,resample_rule,expected_bars", [
|
|
616
|
+
("5minute", "5min", 78),
|
|
617
|
+
("10minute", "10min", 39),
|
|
618
|
+
("15minute", "15min", 26),
|
|
619
|
+
("30minute", "30min", 13),
|
|
620
|
+
("hour", "1h", 7),
|
|
621
|
+
])
|
|
622
|
+
def test_theta_vs_polygon_intervals(self, interval, resample_rule, expected_bars):
|
|
623
|
+
"""Test ThetaData intervals match Polygon aggregated data EXACTLY."""
|
|
624
|
+
import pytz
|
|
625
|
+
from lumibot import LUMIBOT_DEFAULT_PYTZ
|
|
626
|
+
|
|
627
|
+
username = os.environ.get("THETADATA_USERNAME")
|
|
628
|
+
password = os.environ.get("THETADATA_PASSWORD")
|
|
629
|
+
polygon_api_key = os.environ.get("POLYGON_API_KEY")
|
|
630
|
+
|
|
631
|
+
asset = Asset("SPY", asset_type="stock")
|
|
632
|
+
|
|
633
|
+
# Use timezone-aware datetimes (ET) to properly filter RTH
|
|
634
|
+
et_tz = pytz.timezone("America/New_York")
|
|
635
|
+
start_et = et_tz.localize(datetime.datetime(2025, 9, 15, 9, 30)) # 9:30 AM ET
|
|
636
|
+
end_et = et_tz.localize(datetime.datetime(2025, 9, 15, 16, 0)) # 4:00 PM ET
|
|
637
|
+
start = start_et.astimezone(pytz.UTC)
|
|
638
|
+
end = end_et.astimezone(pytz.UTC)
|
|
639
|
+
|
|
640
|
+
print(f"\n{'='*80}")
|
|
641
|
+
print(f"TESTING {asset.symbol} {interval.upper()} INTERVAL: {start_et.date()}")
|
|
642
|
+
print(f"{'='*80}")
|
|
643
|
+
|
|
644
|
+
# ==== GET THETADATA SERVER-SIDE AGGREGATED DATA ====
|
|
645
|
+
print(f"\n1. Fetching ThetaData {interval} data...")
|
|
646
|
+
try:
|
|
647
|
+
theta_df = thetadata_helper.get_price_data(
|
|
648
|
+
username=username,
|
|
649
|
+
password=password,
|
|
650
|
+
asset=asset,
|
|
651
|
+
start=start,
|
|
652
|
+
end=end,
|
|
653
|
+
timespan=interval,
|
|
654
|
+
include_after_hours=False # RTH only for fair comparison with Polygon
|
|
655
|
+
)
|
|
656
|
+
except Exception as e:
|
|
657
|
+
pytest.fail(f"CRITICAL: ThetaData {interval} FAILED: {e}")
|
|
658
|
+
|
|
659
|
+
if theta_df is None or len(theta_df) == 0:
|
|
660
|
+
pytest.fail(f"CRITICAL: ThetaData returned NO {interval} data")
|
|
661
|
+
|
|
662
|
+
print(f" ✓ ThetaData: {len(theta_df)} {interval} bars")
|
|
663
|
+
print(f" First bar: {theta_df.index[0]}")
|
|
664
|
+
print(f" Last bar: {theta_df.index[-1]}")
|
|
665
|
+
|
|
666
|
+
# ==== GET POLYGON MINUTE DATA AND AGGREGATE CLIENT-SIDE ====
|
|
667
|
+
print(f"\n2. Fetching Polygon minute data and aggregating to {interval}...")
|
|
668
|
+
try:
|
|
669
|
+
polygon_minute_df = polygon_get_price_data(
|
|
670
|
+
api_key=polygon_api_key,
|
|
671
|
+
asset=asset,
|
|
672
|
+
start=start,
|
|
673
|
+
end=end,
|
|
674
|
+
timespan="minute",
|
|
675
|
+
quote_asset=Asset("USD", asset_type="forex")
|
|
676
|
+
)
|
|
677
|
+
except Exception as e:
|
|
678
|
+
pytest.fail(f"CRITICAL: Polygon minute data FAILED: {e}")
|
|
679
|
+
|
|
680
|
+
if polygon_minute_df is None or len(polygon_minute_df) == 0:
|
|
681
|
+
pytest.fail(f"CRITICAL: Polygon returned NO minute data")
|
|
682
|
+
|
|
683
|
+
# Filter to RTH only (9:30 AM - 4:00 PM ET) before aggregating
|
|
684
|
+
# Polygon may return extended hours data - we need to filter it manually
|
|
685
|
+
polygon_minute_rth = polygon_minute_df[(polygon_minute_df.index >= start) & (polygon_minute_df.index <= end)]
|
|
686
|
+
|
|
687
|
+
if polygon_minute_rth is None or len(polygon_minute_rth) == 0:
|
|
688
|
+
pytest.fail(f"CRITICAL: Polygon returned NO RTH minute data")
|
|
689
|
+
|
|
690
|
+
# Aggregate Polygon minute data
|
|
691
|
+
# For hourly, offset to align with market open (9:30 AM = 13:30 UTC)
|
|
692
|
+
if interval == "hour":
|
|
693
|
+
polygon_agg_df = polygon_minute_rth.resample(resample_rule, offset='30min').agg({
|
|
694
|
+
'open': 'first',
|
|
695
|
+
'high': 'max',
|
|
696
|
+
'low': 'min',
|
|
697
|
+
'close': 'last',
|
|
698
|
+
'volume': 'sum'
|
|
699
|
+
}).dropna()
|
|
700
|
+
else:
|
|
701
|
+
polygon_agg_df = polygon_minute_rth.resample(resample_rule).agg({
|
|
702
|
+
'open': 'first',
|
|
703
|
+
'high': 'max',
|
|
704
|
+
'low': 'min',
|
|
705
|
+
'close': 'last',
|
|
706
|
+
'volume': 'sum'
|
|
707
|
+
}).dropna()
|
|
708
|
+
|
|
709
|
+
print(f" ✓ Polygon: {len(polygon_agg_df)} {interval} bars (aggregated from {len(polygon_minute_rth)} RTH minute bars)")
|
|
710
|
+
print(f" First bar: {polygon_agg_df.index[0]}")
|
|
711
|
+
print(f" Last bar: {polygon_agg_df.index[-1]}")
|
|
712
|
+
|
|
713
|
+
# ==== CHECK 1: Bar Count - Allow ±1 for 16:00 bar edge case ====
|
|
714
|
+
print(f"\n3. Verifying bar count match...")
|
|
715
|
+
|
|
716
|
+
# ThetaData RTH ends at 15:55 for intraday (no 16:00 bar), Polygon may include 16:00
|
|
717
|
+
# This is acceptable behavior - both are correct interpretations of "4 PM close"
|
|
718
|
+
bar_diff = abs(len(theta_df) - len(polygon_agg_df))
|
|
719
|
+
|
|
720
|
+
if bar_diff > 1:
|
|
721
|
+
print(f"\n ✗ CRITICAL: Bar count MISMATCH!")
|
|
722
|
+
print(f" ThetaData: {len(theta_df)} bars")
|
|
723
|
+
print(f" Polygon: {len(polygon_agg_df)} bars")
|
|
724
|
+
print(f" Difference: {bar_diff} bars")
|
|
725
|
+
pytest.fail(f"CRITICAL: Bar count diff {bar_diff} > 1. Theta={len(theta_df)}, Polygon={len(polygon_agg_df)}")
|
|
726
|
+
|
|
727
|
+
if bar_diff == 1:
|
|
728
|
+
print(f" ⚠ Bar count off by 1 (acceptable for 16:00 bar edge case)")
|
|
729
|
+
print(f" ThetaData: {len(theta_df)} bars (ends {theta_df.index[-1]})")
|
|
730
|
+
print(f" Polygon: {len(polygon_agg_df)} bars (ends {polygon_agg_df.index[-1]})")
|
|
731
|
+
# Use shorter dataset for comparison
|
|
732
|
+
min_len = min(len(theta_df), len(polygon_agg_df))
|
|
733
|
+
theta_df = theta_df.iloc[:min_len]
|
|
734
|
+
polygon_agg_df = polygon_agg_df.iloc[:min_len]
|
|
735
|
+
else:
|
|
736
|
+
print(f" ✓ EXACT match: {len(theta_df)} bars")
|
|
737
|
+
|
|
738
|
+
# ==== CHECK 2: EXACT Timestamp Match ====
|
|
739
|
+
print(f"\n4. Verifying EXACT timestamp alignment...")
|
|
740
|
+
timestamp_mismatches = []
|
|
741
|
+
for i, (theta_ts, polygon_ts) in enumerate(zip(theta_df.index, polygon_agg_df.index)):
|
|
742
|
+
if theta_ts != polygon_ts:
|
|
743
|
+
timestamp_mismatches.append((i, theta_ts, polygon_ts))
|
|
744
|
+
|
|
745
|
+
if timestamp_mismatches:
|
|
746
|
+
print(f"\n ✗ TIMESTAMP MISMATCH DETECTED!")
|
|
747
|
+
print(f"\n {'Index':<8} {'ThetaData':<25} {'Polygon':<25} {'Shift (seconds)'}")
|
|
748
|
+
print(f" {'-'*75}")
|
|
749
|
+
for idx, theta_ts, polygon_ts in timestamp_mismatches[:10]:
|
|
750
|
+
shift = (theta_ts - polygon_ts).total_seconds()
|
|
751
|
+
print(f" {idx:<8} {theta_ts} {polygon_ts} {shift:+.0f}s")
|
|
752
|
+
pytest.fail(f"CRITICAL: {len(timestamp_mismatches)} timestamp mismatches!")
|
|
753
|
+
|
|
754
|
+
print(f" ✓ ALL timestamps match EXACTLY (0 shifts)")
|
|
755
|
+
|
|
756
|
+
# ==== CHECK 3: Price Accuracy (half-penny tolerance) ====
|
|
757
|
+
print(f"\n5. Verifying OHLC prices (half-penny tolerance: $0.005)...")
|
|
758
|
+
|
|
759
|
+
max_diff = {'open': 0.0, 'high': 0.0, 'low': 0.0, 'close': 0.0}
|
|
760
|
+
price_failures = []
|
|
761
|
+
|
|
762
|
+
for theta_ts, polygon_ts in zip(theta_df.index, polygon_agg_df.index):
|
|
763
|
+
theta_row = theta_df.loc[theta_ts]
|
|
764
|
+
polygon_row = polygon_agg_df.loc[polygon_ts]
|
|
765
|
+
|
|
766
|
+
for field in ['open', 'high', 'low', 'close']:
|
|
767
|
+
diff = abs(theta_row[field] - polygon_row[field])
|
|
768
|
+
max_diff[field] = max(max_diff[field], diff)
|
|
769
|
+
|
|
770
|
+
if diff > 0.005: # Half-penny tolerance
|
|
771
|
+
price_failures.append({
|
|
772
|
+
'timestamp': theta_ts,
|
|
773
|
+
'field': field,
|
|
774
|
+
'theta': theta_row[field],
|
|
775
|
+
'polygon': polygon_row[field],
|
|
776
|
+
'diff': diff
|
|
777
|
+
})
|
|
778
|
+
|
|
779
|
+
if price_failures:
|
|
780
|
+
print(f"\n ✗ PRICE TOLERANCE EXCEEDED ({len(price_failures)} failures):")
|
|
781
|
+
for failure in price_failures[:10]:
|
|
782
|
+
print(f" {failure['timestamp']} {failure['field']}: Theta=${failure['theta']:.4f}, "
|
|
783
|
+
f"Polygon=${failure['polygon']:.4f}, Diff=${failure['diff']:.4f}")
|
|
784
|
+
pytest.fail(f"CRITICAL: {len(price_failures)} price differences exceed $0.005")
|
|
785
|
+
|
|
786
|
+
print(f" ✓ All prices within $0.005 tolerance")
|
|
787
|
+
print(f" Max differences: open=${max_diff['open']:.4f}, high=${max_diff['high']:.4f}, "
|
|
788
|
+
f"low=${max_diff['low']:.4f}, close=${max_diff['close']:.4f}")
|
|
789
|
+
|
|
790
|
+
# ==== FINAL SUMMARY ====
|
|
791
|
+
print(f"\n{'='*80}")
|
|
792
|
+
print(f"✓✓✓ {asset.symbol} {interval.upper()} VALIDATION PASSED ✓✓✓")
|
|
793
|
+
print(f" Bars: {len(theta_df)} (EXACT match)")
|
|
794
|
+
print(f" Timestamps: PERFECT MATCH (0 shifts)")
|
|
795
|
+
print(f" Prices: ALL within $0.005 (half-penny)")
|
|
796
|
+
print(f" Period: {theta_df.index[0]} to {theta_df.index[-1]}")
|
|
797
|
+
print(f"{'='*80}\n")
|
|
798
|
+
|
|
799
|
+
|
|
800
|
+
if __name__ == "__main__":
|
|
801
|
+
pytest.main([__file__, "-v", "-s"])
|