lumibot 4.0.23__py3-none-any.whl → 4.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lumibot might be problematic. Click here for more details.

Files changed (160) hide show
  1. lumibot/__pycache__/__init__.cpython-312.pyc +0 -0
  2. lumibot/__pycache__/constants.cpython-312.pyc +0 -0
  3. lumibot/__pycache__/credentials.cpython-312.pyc +0 -0
  4. lumibot/backtesting/__init__.py +6 -5
  5. lumibot/backtesting/__pycache__/__init__.cpython-312.pyc +0 -0
  6. lumibot/backtesting/__pycache__/alpaca_backtesting.cpython-312.pyc +0 -0
  7. lumibot/backtesting/__pycache__/alpha_vantage_backtesting.cpython-312.pyc +0 -0
  8. lumibot/backtesting/__pycache__/backtesting_broker.cpython-312.pyc +0 -0
  9. lumibot/backtesting/__pycache__/ccxt_backtesting.cpython-312.pyc +0 -0
  10. lumibot/backtesting/__pycache__/databento_backtesting.cpython-312.pyc +0 -0
  11. lumibot/backtesting/__pycache__/interactive_brokers_rest_backtesting.cpython-312.pyc +0 -0
  12. lumibot/backtesting/__pycache__/pandas_backtesting.cpython-312.pyc +0 -0
  13. lumibot/backtesting/__pycache__/polygon_backtesting.cpython-312.pyc +0 -0
  14. lumibot/backtesting/__pycache__/thetadata_backtesting.cpython-312.pyc +0 -0
  15. lumibot/backtesting/__pycache__/yahoo_backtesting.cpython-312.pyc +0 -0
  16. lumibot/backtesting/backtesting_broker.py +209 -9
  17. lumibot/backtesting/databento_backtesting.py +141 -24
  18. lumibot/backtesting/thetadata_backtesting.py +63 -42
  19. lumibot/brokers/__pycache__/__init__.cpython-312.pyc +0 -0
  20. lumibot/brokers/__pycache__/alpaca.cpython-312.pyc +0 -0
  21. lumibot/brokers/__pycache__/bitunix.cpython-312.pyc +0 -0
  22. lumibot/brokers/__pycache__/broker.cpython-312.pyc +0 -0
  23. lumibot/brokers/__pycache__/ccxt.cpython-312.pyc +0 -0
  24. lumibot/brokers/__pycache__/example_broker.cpython-312.pyc +0 -0
  25. lumibot/brokers/__pycache__/interactive_brokers.cpython-312.pyc +0 -0
  26. lumibot/brokers/__pycache__/interactive_brokers_rest.cpython-312.pyc +0 -0
  27. lumibot/brokers/__pycache__/projectx.cpython-312.pyc +0 -0
  28. lumibot/brokers/__pycache__/schwab.cpython-312.pyc +0 -0
  29. lumibot/brokers/__pycache__/tradier.cpython-312.pyc +0 -0
  30. lumibot/brokers/__pycache__/tradovate.cpython-312.pyc +0 -0
  31. lumibot/brokers/alpaca.py +11 -1
  32. lumibot/brokers/tradeovate.py +475 -0
  33. lumibot/components/grok_news_helper.py +284 -0
  34. lumibot/components/options_helper.py +90 -34
  35. lumibot/credentials.py +3 -0
  36. lumibot/data_sources/__pycache__/__init__.cpython-312.pyc +0 -0
  37. lumibot/data_sources/__pycache__/alpaca_data.cpython-312.pyc +0 -0
  38. lumibot/data_sources/__pycache__/alpha_vantage_data.cpython-312.pyc +0 -0
  39. lumibot/data_sources/__pycache__/bitunix_data.cpython-312.pyc +0 -0
  40. lumibot/data_sources/__pycache__/ccxt_backtesting_data.cpython-312.pyc +0 -0
  41. lumibot/data_sources/__pycache__/ccxt_data.cpython-312.pyc +0 -0
  42. lumibot/data_sources/__pycache__/data_source.cpython-312.pyc +0 -0
  43. lumibot/data_sources/__pycache__/data_source_backtesting.cpython-312.pyc +0 -0
  44. lumibot/data_sources/__pycache__/databento_data_polars_backtesting.cpython-312.pyc +0 -0
  45. lumibot/data_sources/__pycache__/databento_data_polars_live.cpython-312.pyc +0 -0
  46. lumibot/data_sources/__pycache__/example_broker_data.cpython-312.pyc +0 -0
  47. lumibot/data_sources/__pycache__/exceptions.cpython-312.pyc +0 -0
  48. lumibot/data_sources/__pycache__/interactive_brokers_data.cpython-312.pyc +0 -0
  49. lumibot/data_sources/__pycache__/interactive_brokers_rest_data.cpython-312.pyc +0 -0
  50. lumibot/data_sources/__pycache__/pandas_data.cpython-312.pyc +0 -0
  51. lumibot/data_sources/__pycache__/polars_mixin.cpython-312.pyc +0 -0
  52. lumibot/data_sources/__pycache__/polygon_data_polars.cpython-312.pyc +0 -0
  53. lumibot/data_sources/__pycache__/projectx_data.cpython-312.pyc +0 -0
  54. lumibot/data_sources/__pycache__/schwab_data.cpython-312.pyc +0 -0
  55. lumibot/data_sources/__pycache__/tradier_data.cpython-312.pyc +0 -0
  56. lumibot/data_sources/__pycache__/tradovate_data.cpython-312.pyc +0 -0
  57. lumibot/data_sources/__pycache__/yahoo_data_polars.cpython-312.pyc +0 -0
  58. lumibot/data_sources/data_source_backtesting.py +3 -5
  59. lumibot/data_sources/databento_data_polars_backtesting.py +194 -48
  60. lumibot/data_sources/pandas_data.py +6 -3
  61. lumibot/data_sources/polars_mixin.py +126 -21
  62. lumibot/data_sources/tradeovate_data.py +80 -0
  63. lumibot/data_sources/tradier_data.py +2 -1
  64. lumibot/entities/__pycache__/__init__.cpython-312.pyc +0 -0
  65. lumibot/entities/__pycache__/asset.cpython-312.pyc +0 -0
  66. lumibot/entities/__pycache__/bar.cpython-312.pyc +0 -0
  67. lumibot/entities/__pycache__/bars.cpython-312.pyc +0 -0
  68. lumibot/entities/__pycache__/chains.cpython-312.pyc +0 -0
  69. lumibot/entities/__pycache__/data.cpython-312.pyc +0 -0
  70. lumibot/entities/__pycache__/dataline.cpython-312.pyc +0 -0
  71. lumibot/entities/__pycache__/order.cpython-312.pyc +0 -0
  72. lumibot/entities/__pycache__/position.cpython-312.pyc +0 -0
  73. lumibot/entities/__pycache__/quote.cpython-312.pyc +0 -0
  74. lumibot/entities/__pycache__/trading_fee.cpython-312.pyc +0 -0
  75. lumibot/entities/asset.py +8 -0
  76. lumibot/entities/order.py +1 -1
  77. lumibot/entities/quote.py +14 -0
  78. lumibot/example_strategies/__pycache__/__init__.cpython-312.pyc +0 -0
  79. lumibot/example_strategies/__pycache__/test_broker_functions.cpython-312-pytest-8.4.1.pyc +0 -0
  80. lumibot/strategies/__pycache__/__init__.cpython-312.pyc +0 -0
  81. lumibot/strategies/__pycache__/_strategy.cpython-312.pyc +0 -0
  82. lumibot/strategies/__pycache__/strategy.cpython-312.pyc +0 -0
  83. lumibot/strategies/__pycache__/strategy_executor.cpython-312.pyc +0 -0
  84. lumibot/strategies/_strategy.py +95 -27
  85. lumibot/strategies/strategy.py +5 -6
  86. lumibot/strategies/strategy_executor.py +2 -2
  87. lumibot/tools/__pycache__/__init__.cpython-312.pyc +0 -0
  88. lumibot/tools/__pycache__/alpaca_helpers.cpython-312.pyc +0 -0
  89. lumibot/tools/__pycache__/bitunix_helpers.cpython-312.pyc +0 -0
  90. lumibot/tools/__pycache__/black_scholes.cpython-312.pyc +0 -0
  91. lumibot/tools/__pycache__/ccxt_data_store.cpython-312.pyc +0 -0
  92. lumibot/tools/__pycache__/databento_helper.cpython-312.pyc +0 -0
  93. lumibot/tools/__pycache__/databento_helper_polars.cpython-312.pyc +0 -0
  94. lumibot/tools/__pycache__/debugers.cpython-312.pyc +0 -0
  95. lumibot/tools/__pycache__/decorators.cpython-312.pyc +0 -0
  96. lumibot/tools/__pycache__/helpers.cpython-312.pyc +0 -0
  97. lumibot/tools/__pycache__/indicators.cpython-312.pyc +0 -0
  98. lumibot/tools/__pycache__/lumibot_logger.cpython-312.pyc +0 -0
  99. lumibot/tools/__pycache__/pandas.cpython-312.pyc +0 -0
  100. lumibot/tools/__pycache__/polygon_helper.cpython-312.pyc +0 -0
  101. lumibot/tools/__pycache__/polygon_helper_async.cpython-312.pyc +0 -0
  102. lumibot/tools/__pycache__/polygon_helper_polars_optimized.cpython-312.pyc +0 -0
  103. lumibot/tools/__pycache__/projectx_helpers.cpython-312.pyc +0 -0
  104. lumibot/tools/__pycache__/schwab_helper.cpython-312.pyc +0 -0
  105. lumibot/tools/__pycache__/thetadata_helper.cpython-312.pyc +0 -0
  106. lumibot/tools/__pycache__/types.cpython-312.pyc +0 -0
  107. lumibot/tools/__pycache__/yahoo_helper.cpython-312.pyc +0 -0
  108. lumibot/tools/__pycache__/yahoo_helper_polars_optimized.cpython-312.pyc +0 -0
  109. lumibot/tools/databento_helper.py +384 -133
  110. lumibot/tools/databento_helper_polars.py +218 -156
  111. lumibot/tools/databento_roll.py +216 -0
  112. lumibot/tools/lumibot_logger.py +32 -17
  113. lumibot/tools/polygon_helper.py +65 -0
  114. lumibot/tools/thetadata_helper.py +588 -70
  115. lumibot/traders/__pycache__/__init__.cpython-312.pyc +0 -0
  116. lumibot/traders/__pycache__/trader.cpython-312.pyc +0 -0
  117. lumibot/traders/trader.py +1 -1
  118. lumibot/trading_builtins/__pycache__/__init__.cpython-312.pyc +0 -0
  119. lumibot/trading_builtins/__pycache__/custom_stream.cpython-312.pyc +0 -0
  120. lumibot/trading_builtins/__pycache__/safe_list.cpython-312.pyc +0 -0
  121. {lumibot-4.0.23.dist-info → lumibot-4.1.0.dist-info}/METADATA +1 -2
  122. {lumibot-4.0.23.dist-info → lumibot-4.1.0.dist-info}/RECORD +160 -44
  123. tests/backtest/check_timing_offset.py +198 -0
  124. tests/backtest/check_volume_spike.py +112 -0
  125. tests/backtest/comprehensive_comparison.py +166 -0
  126. tests/backtest/debug_comparison.py +91 -0
  127. tests/backtest/diagnose_price_difference.py +97 -0
  128. tests/backtest/direct_api_comparison.py +203 -0
  129. tests/backtest/profile_thetadata_vs_polygon.py +255 -0
  130. tests/backtest/root_cause_analysis.py +109 -0
  131. tests/backtest/test_accuracy_verification.py +244 -0
  132. tests/backtest/test_daily_data_timestamp_comparison.py +801 -0
  133. tests/backtest/test_databento.py +4 -0
  134. tests/backtest/test_databento_comprehensive_trading.py +564 -0
  135. tests/backtest/test_debug_avg_fill_price.py +112 -0
  136. tests/backtest/test_dividends.py +8 -3
  137. tests/backtest/test_example_strategies.py +54 -47
  138. tests/backtest/test_futures_edge_cases.py +451 -0
  139. tests/backtest/test_futures_single_trade.py +270 -0
  140. tests/backtest/test_futures_ultra_simple.py +191 -0
  141. tests/backtest/test_index_data_verification.py +348 -0
  142. tests/backtest/test_polygon.py +45 -24
  143. tests/backtest/test_thetadata.py +246 -60
  144. tests/backtest/test_thetadata_comprehensive.py +729 -0
  145. tests/backtest/test_thetadata_vs_polygon.py +557 -0
  146. tests/backtest/test_yahoo.py +1 -2
  147. tests/conftest.py +20 -0
  148. tests/test_backtesting_data_source_env.py +249 -0
  149. tests/test_backtesting_quiet_logs_complete.py +10 -11
  150. tests/test_databento_helper.py +73 -86
  151. tests/test_databento_timezone_fixes.py +21 -4
  152. tests/test_get_historical_prices.py +6 -6
  153. tests/test_options_helper.py +162 -40
  154. tests/test_polygon_helper.py +21 -13
  155. tests/test_quiet_logs_requirements.py +5 -5
  156. tests/test_thetadata_helper.py +487 -171
  157. tests/test_yahoo_data.py +125 -0
  158. {lumibot-4.0.23.dist-info → lumibot-4.1.0.dist-info}/LICENSE +0 -0
  159. {lumibot-4.0.23.dist-info → lumibot-4.1.0.dist-info}/WHEEL +0 -0
  160. {lumibot-4.0.23.dist-info → lumibot-4.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,801 @@
1
+ """
2
+ CRITICAL TEST: Daily Data Timestamp & Price Accuracy Comparison
3
+
4
+ This test verifies that daily (day timeframe) data from ThetaData and Polygon:
5
+ 1. Has IDENTICAL timestamps (no day shifts, no hour shifts, no timezone bugs)
6
+ 2. Has matching OHLC prices (within penny-level tolerance)
7
+ 3. Covers FULL MONTH of data (minimum 20 trading days)
8
+ 4. Tests MULTIPLE symbols (different exchanges, characteristics)
9
+ 5. Handles edge cases (holidays, month boundaries, extended hours)
10
+
11
+ ANY failure in this test indicates a CRITICAL bug that could cause:
12
+ - Incorrect backtests
13
+ - Wrong trading signals
14
+ - Financial losses
15
+ - Lawsuits
16
+
17
+ ZERO TOLERANCE for failures.
18
+ """
19
+
20
+ import os
21
+ import pytest
22
+ import datetime
23
+ import pandas as pd
24
+ from dotenv import load_dotenv
25
+ from lumibot.backtesting import ThetaDataBacktesting, PolygonDataBacktesting
26
+ from lumibot.entities import Asset
27
+ from lumibot.tools import thetadata_helper
28
+ from lumibot.tools.polygon_helper import get_price_data_from_polygon as polygon_get_price_data
29
+
30
+ load_dotenv()
31
+
32
+
33
+ @pytest.mark.apitest
34
+ class TestDailyDataTimestampComparison:
35
+ """
36
+ Comprehensive daily data comparison between ThetaData and Polygon.
37
+ Tests full month, multiple symbols, penny-level accuracy.
38
+ """
39
+
40
+ def test_daily_data_full_month_pltr(self):
41
+ """Test PLTR daily data for full September 2025 - ZERO tolerance."""
42
+ self._test_symbol_daily_data(
43
+ symbol="PLTR",
44
+ start_date=datetime.datetime(2025, 9, 1),
45
+ end_date=datetime.datetime(2025, 9, 30),
46
+ min_trading_days=19
47
+ )
48
+
49
+ def test_daily_data_full_month_spy(self):
50
+ """Test SPY daily data for full September 2025 - ZERO tolerance."""
51
+ self._test_symbol_daily_data(
52
+ symbol="SPY",
53
+ start_date=datetime.datetime(2025, 9, 1),
54
+ end_date=datetime.datetime(2025, 9, 30),
55
+ min_trading_days=19
56
+ )
57
+
58
+ def test_daily_data_full_month_aapl(self):
59
+ """Test AAPL daily data for full September 2025 - ZERO tolerance."""
60
+ self._test_symbol_daily_data(
61
+ symbol="AAPL",
62
+ start_date=datetime.datetime(2025, 9, 1),
63
+ end_date=datetime.datetime(2025, 9, 30),
64
+ min_trading_days=19
65
+ )
66
+
67
+ def test_daily_data_full_month_amzn(self):
68
+ """Test AMZN daily data for full September 2025 - ZERO tolerance."""
69
+ self._test_symbol_daily_data(
70
+ symbol="AMZN",
71
+ start_date=datetime.datetime(2025, 9, 1),
72
+ end_date=datetime.datetime(2025, 9, 30),
73
+ min_trading_days=19
74
+ )
75
+
76
+ # ========== INDEX TESTS ==========
77
+ def test_daily_data_full_month_spx_index(self):
78
+ """Test SPX index daily data for full September 2025 - ZERO tolerance."""
79
+ self._test_symbol_daily_data(
80
+ symbol="SPX",
81
+ start_date=datetime.datetime(2025, 9, 1),
82
+ end_date=datetime.datetime(2025, 9, 30),
83
+ min_trading_days=19,
84
+ asset_type="index"
85
+ )
86
+
87
+ def test_daily_data_full_month_vix_index(self):
88
+ """Test VIX index daily data for full September 2025 - ZERO tolerance."""
89
+ self._test_symbol_daily_data(
90
+ symbol="VIX",
91
+ start_date=datetime.datetime(2025, 9, 1),
92
+ end_date=datetime.datetime(2025, 9, 30),
93
+ min_trading_days=19,
94
+ asset_type="index"
95
+ )
96
+
97
+ def test_daily_data_full_month_ndx_index(self):
98
+ """Test SPX index daily data for full August 2024 - ZERO tolerance."""
99
+ self._test_symbol_daily_data(
100
+ symbol="SPX",
101
+ start_date=datetime.datetime(2024, 8, 1),
102
+ end_date=datetime.datetime(2024, 8, 31),
103
+ min_trading_days=21,
104
+ asset_type="index"
105
+ )
106
+
107
+ # ========== OPTION TESTS ==========
108
+ def test_daily_data_spy_call_option(self):
109
+ """Test SPY call option daily data for September 2025 - ZERO tolerance."""
110
+ self._test_option_daily_data(
111
+ symbol="SPY",
112
+ start_date=datetime.datetime(2025, 9, 1),
113
+ end_date=datetime.datetime(2025, 9, 30),
114
+ min_trading_days=15, # Options may have less liquidity
115
+ expiration=datetime.datetime(2025, 12, 19), # Dec 2025 expiry
116
+ strike=580.0, # ATM/slightly OTM for SPY ~$570
117
+ right="CALL"
118
+ )
119
+
120
+ def test_daily_data_spy_put_option(self):
121
+ """Test SPY put option daily data for September 2025 - ZERO tolerance."""
122
+ self._test_option_daily_data(
123
+ symbol="SPY",
124
+ start_date=datetime.datetime(2025, 9, 1),
125
+ end_date=datetime.datetime(2025, 9, 30),
126
+ min_trading_days=15,
127
+ expiration=datetime.datetime(2025, 12, 19),
128
+ strike=560.0, # ATM/slightly ITM for SPY ~$570
129
+ right="PUT"
130
+ )
131
+
132
+ def _test_option_daily_data(self, symbol, start_date, end_date, min_trading_days, expiration, strike, right):
133
+ """
134
+ Test function for option daily data validation.
135
+
136
+ NOTE: Options comparison is challenging because:
137
+ - Yahoo Finance historical option data is limited (often <1 week)
138
+ - Different providers use different quote/trade data
139
+ - Options have wider bid-ask spreads than stocks
140
+ - No universal "official" option price like stocks have
141
+
142
+ This test validates:
143
+ 1. ThetaData returns option data successfully
144
+ 2. Minimum number of trading days
145
+ 3. Price values are reasonable (not zero, not negative)
146
+ 4. Volume data exists
147
+ """
148
+ username = os.environ.get("THETADATA_USERNAME")
149
+ password = os.environ.get("THETADATA_PASSWORD")
150
+
151
+ asset = Asset(symbol, asset_type="option", expiration=expiration, strike=strike, right=right)
152
+
153
+ print(f"\n{'='*80}")
154
+ print(f"TESTING {symbol} {right} ${strike} (exp {expiration.date()}) OPTION DAILY DATA")
155
+ print(f"Period: {start_date.date()} to {end_date.date()}")
156
+ print(f"{'='*80}")
157
+
158
+ # ==== GET THETADATA OPTION DATA ====
159
+ print(f"\n1. Fetching ThetaData option daily data...")
160
+ try:
161
+ theta_df = thetadata_helper.get_price_data(
162
+ username=username,
163
+ password=password,
164
+ asset=asset,
165
+ start=start_date,
166
+ end=end_date,
167
+ timespan="day"
168
+ )
169
+ except Exception as e:
170
+ pytest.fail(f"CRITICAL: ThetaData option daily data FAILED: {e}")
171
+
172
+ if theta_df is None or len(theta_df) == 0:
173
+ pytest.fail(f"CRITICAL: ThetaData returned NO option daily data")
174
+
175
+ print(f" ✓ ThetaData: {len(theta_df)} daily bars")
176
+ print(f" Date range: {theta_df.index[0]} to {theta_df.index[-1]}")
177
+
178
+ # ==== GET POLYGON OPTION DATA FOR COMPARISON ====
179
+ print(f"\n2. Fetching Polygon option data for validation...")
180
+ polygon_api_key = os.environ.get("POLYGON_API_KEY")
181
+
182
+ try:
183
+ polygon_df = polygon_get_price_data(
184
+ api_key=polygon_api_key,
185
+ asset=asset,
186
+ start=start_date,
187
+ end=end_date,
188
+ timespan="day",
189
+ quote_asset=Asset("USD", asset_type="forex")
190
+ )
191
+
192
+ if polygon_df is None or len(polygon_df) == 0:
193
+ print(f" ⚠ WARNING: Polygon returned NO option data - skipping price comparison")
194
+ polygon_df = None
195
+ else:
196
+ print(f" ✓ Polygon: {len(polygon_df)} daily bars")
197
+ print(f" Date range: {polygon_df.index[0]} to {polygon_df.index[-1]}")
198
+ except Exception as e:
199
+ print(f" ⚠ WARNING: Polygon failed ({e}) - skipping price comparison")
200
+ polygon_df = None
201
+
202
+ # ==== CHECK: Minimum Trading Days ====
203
+ print(f"\n3. Verifying minimum trading days...")
204
+ assert len(theta_df) >= min_trading_days, \
205
+ f"CRITICAL: Expected at least {min_trading_days} days, got {len(theta_df)}"
206
+ print(f" ✓ Sufficient trading days: {len(theta_df)} >= {min_trading_days}")
207
+
208
+ # ==== CHECK: Price Comparison (if Polygon data available) ====
209
+ if polygon_df is not None and len(polygon_df) > 0:
210
+ print(f"\n4. Verifying OHLC prices vs Polygon (half-penny tolerance: $0.005)...")
211
+
212
+ # Check same number of days
213
+ if len(theta_df) != len(polygon_df):
214
+ print(f"\n ✗ MISMATCH: ThetaData={len(theta_df)} days, Polygon={len(polygon_df)} days")
215
+ pytest.fail(f"CRITICAL: Different number of trading days")
216
+
217
+ # Align data
218
+ max_diff = {'open': 0.0, 'high': 0.0, 'low': 0.0, 'close': 0.0}
219
+ comparison_data = []
220
+
221
+ for theta_idx, polygon_idx in zip(theta_df.index, polygon_df.index):
222
+ theta_row = theta_df.loc[theta_idx]
223
+ polygon_row = polygon_df.loc[polygon_idx]
224
+
225
+ diffs = {
226
+ 'open': abs(theta_row['open'] - polygon_row['open']),
227
+ 'high': abs(theta_row['high'] - polygon_row['high']),
228
+ 'low': abs(theta_row['low'] - polygon_row['low']),
229
+ 'close': abs(theta_row['close'] - polygon_row['close'])
230
+ }
231
+
232
+ for field in ['open', 'high', 'low', 'close']:
233
+ max_diff[field] = max(max_diff[field], diffs[field])
234
+
235
+ comparison_data.append({
236
+ 'date': theta_idx.date(),
237
+ 'theta_close': theta_row['close'],
238
+ 'polygon_close': polygon_row['close'],
239
+ 'diff_close': diffs['close'],
240
+ })
241
+
242
+ # HALF-PENNY tolerance ($0.005) - anything more is unacceptable
243
+ tolerance = 0.005
244
+ failures = []
245
+
246
+ for field in ['open', 'high', 'low', 'close']:
247
+ if max_diff[field] > tolerance:
248
+ failures.append(f"{field}: max diff ${max_diff[field]:.4f}")
249
+
250
+ if failures:
251
+ print(f"\n ✗ PRICE TOLERANCE EXCEEDED:")
252
+ for failure in failures:
253
+ print(f" {failure}")
254
+
255
+ print(f"\n PRICE COMPARISON (first 10 days):")
256
+ print(f" {'Date':<12} {'Theta':<10} {'Polygon':<10} {'Diff':<10}")
257
+ print(f" {'-'*50}")
258
+ for row in comparison_data[:10]:
259
+ t_close = row['theta_close']
260
+ p_close = row['polygon_close']
261
+ diff = row['diff_close']
262
+ match_str = "✅" if diff <= tolerance else "❌"
263
+ print(f" {row['date']} ${t_close:<9.2f} ${p_close:<9.2f} ${diff:<9.4f} {match_str}")
264
+
265
+ pytest.fail(f"CRITICAL: Option price tolerance exceeded: {', '.join(failures)}")
266
+
267
+ print(f" ✓ All prices within ${tolerance:.3f} tolerance")
268
+ print(f" Max differences: open=${max_diff['open']:.4f}, high=${max_diff['high']:.4f}, "
269
+ f"low=${max_diff['low']:.4f}, close=${max_diff['close']:.4f}")
270
+
271
+ # ==== CHECK: Price Data Sanity ====
272
+ print(f"\n5. Verifying price data sanity...")
273
+
274
+ # Check for zero or negative prices (invalid)
275
+ zero_prices = (theta_df['close'] <= 0).sum()
276
+ if zero_prices > 0:
277
+ pytest.fail(f"CRITICAL: {zero_prices} bars have zero/negative close prices")
278
+
279
+ # Check for reasonable price ranges
280
+ min_price = theta_df['close'].min()
281
+ max_price = theta_df['close'].max()
282
+ avg_price = theta_df['close'].mean()
283
+
284
+ print(f" ✓ All prices positive")
285
+ print(f" Price range: ${min_price:.2f} - ${max_price:.2f} (avg: ${avg_price:.2f})")
286
+
287
+ # ==== CHECK: OHLC Consistency ====
288
+ print(f"\n4. Verifying OHLC consistency...")
289
+
290
+ # High should be >= Low for every bar
291
+ invalid_hl = (theta_df['high'] < theta_df['low']).sum()
292
+ if invalid_hl > 0:
293
+ pytest.fail(f"CRITICAL: {invalid_hl} bars have high < low")
294
+
295
+ # High should be >= Open and Close
296
+ invalid_h = ((theta_df['high'] < theta_df['open']) | (theta_df['high'] < theta_df['close'])).sum()
297
+ if invalid_h > 0:
298
+ pytest.fail(f"CRITICAL: {invalid_h} bars have high < open/close")
299
+
300
+ # Low should be <= Open and Close
301
+ invalid_l = ((theta_df['low'] > theta_df['open']) | (theta_df['low'] > theta_df['close'])).sum()
302
+ if invalid_l > 0:
303
+ pytest.fail(f"CRITICAL: {invalid_l} bars have low > open/close")
304
+
305
+ print(f" ✓ OHLC relationships valid (high >= low, high >= open/close, low <= open/close)")
306
+
307
+ # ==== CHECK: Volume Data ====
308
+ print(f"\n5. Verifying volume data...")
309
+ zero_volume = (theta_df['volume'] == 0).sum()
310
+ pct_zero_vol = (zero_volume / len(theta_df)) * 100
311
+
312
+ print(f" ✓ Volume data present ({zero_volume}/{len(theta_df)} bars with zero volume = {pct_zero_vol:.1f}%)")
313
+ if pct_zero_vol > 50:
314
+ print(f" ⚠ WARNING: >50% of bars have zero volume (may indicate low liquidity)")
315
+
316
+ print(f"\n{'='*80}")
317
+ print(f"✓✓✓ {symbol} OPTION DATA VALIDATION PASSED ✓✓✓")
318
+ print(f" Trading days: {len(theta_df)}")
319
+ print(f" Price range: ${min_price:.2f} - ${max_price:.2f}")
320
+ print(f" OHLC relationships: VALID")
321
+ print(f" Period: {theta_df.index[0].date()} to {theta_df.index[-1].date()}")
322
+ print(f"{'='*80}\n")
323
+
324
+ def _test_symbol_daily_data(self, symbol, start_date, end_date, min_trading_days, asset_type="stock"):
325
+ """
326
+ Core test function that validates daily data for a symbol.
327
+
328
+ CRITICAL CHECKS:
329
+ 1. Both sources return data
330
+ 2. Same number of trading days
331
+ 3. IDENTICAL timestamps (no shifts)
332
+ 4. OHLC within 0.01 (penny) tolerance
333
+ 5. Volume reasonable
334
+ 6. No duplicate dates
335
+ 7. No missing dates (within market calendar)
336
+ """
337
+ username = os.environ.get("THETADATA_USERNAME")
338
+ password = os.environ.get("THETADATA_PASSWORD")
339
+ polygon_api_key = os.environ.get("POLYGON_API_KEY")
340
+
341
+ asset = Asset(symbol, asset_type=asset_type)
342
+
343
+ print(f"\n{'='*80}")
344
+ print(f"TESTING {symbol} DAILY DATA: {start_date.date()} to {end_date.date()}")
345
+ print(f"{'='*80}")
346
+
347
+ # ==== GET THETADATA DAILY DATA ====
348
+ print(f"\n1. Fetching ThetaData daily data...")
349
+ try:
350
+ theta_df = thetadata_helper.get_price_data(
351
+ username=username,
352
+ password=password,
353
+ asset=asset,
354
+ start=start_date,
355
+ end=end_date,
356
+ timespan="day"
357
+ )
358
+ except Exception as e:
359
+ pytest.fail(f"CRITICAL: ThetaData daily data FAILED for {symbol}: {e}")
360
+
361
+ if theta_df is None or len(theta_df) == 0:
362
+ pytest.fail(f"CRITICAL: ThetaData returned NO daily data for {symbol}")
363
+
364
+ print(f" ✓ ThetaData: {len(theta_df)} daily bars")
365
+ print(f" Date range: {theta_df.index[0]} to {theta_df.index[-1]}")
366
+
367
+ # ==== GET POLYGON OR YAHOO DAILY DATA ====
368
+ # NOTE: Polygon requires paid plan for indexes, so we use Yahoo Finance for indexes
369
+ if asset_type == "index":
370
+ print(f"\n2. Fetching Yahoo Finance daily data (indexes not available in free Polygon)...")
371
+ import yfinance as yf
372
+
373
+ # Yahoo Finance uses ^SPX for SPX, ^VIX for VIX, ^NDX for NDX
374
+ yahoo_symbol = f"^{symbol}" if symbol in ["SPX", "VIX", "NDX", "RUT", "DJI"] else symbol
375
+ ticker = yf.Ticker(yahoo_symbol)
376
+
377
+ try:
378
+ from datetime import timedelta as td
379
+ # Yahoo requires end_date to be exclusive (next day)
380
+ yahoo_end = (end_date + td(days=1)).strftime('%Y-%m-%d')
381
+ yahoo_start = start_date.strftime('%Y-%m-%d')
382
+ yahoo_hist = ticker.history(start=yahoo_start, end=yahoo_end, interval='1d')
383
+
384
+ if yahoo_hist is None or len(yahoo_hist) == 0:
385
+ pytest.fail(f"CRITICAL: Yahoo Finance returned NO daily data for {symbol}")
386
+
387
+ # Convert Yahoo data to match our format
388
+ polygon_df = pd.DataFrame({
389
+ 'open': yahoo_hist['Open'],
390
+ 'high': yahoo_hist['High'],
391
+ 'low': yahoo_hist['Low'],
392
+ 'close': yahoo_hist['Close'],
393
+ 'volume': yahoo_hist['Volume']
394
+ })
395
+ polygon_df.index = pd.to_datetime(polygon_df.index).tz_convert('UTC')
396
+
397
+ except Exception as e:
398
+ pytest.fail(f"CRITICAL: Yahoo Finance daily data FAILED for {symbol}: {e}")
399
+ else:
400
+ print(f"\n2. Fetching Polygon daily data...")
401
+ try:
402
+ polygon_df = polygon_get_price_data(
403
+ api_key=polygon_api_key,
404
+ asset=asset,
405
+ start=start_date,
406
+ end=end_date,
407
+ timespan="day",
408
+ quote_asset=Asset("USD", asset_type="forex")
409
+ )
410
+ except Exception as e:
411
+ pytest.fail(f"CRITICAL: Polygon daily data FAILED for {symbol}: {e}")
412
+
413
+ if polygon_df is None or len(polygon_df) == 0:
414
+ pytest.fail(f"CRITICAL: Polygon returned NO daily data for {symbol}")
415
+
416
+ comparison_source = "Yahoo Finance" if asset_type == "index" else "Polygon"
417
+ print(f" ✓ {comparison_source}: {len(polygon_df)} daily bars")
418
+ print(f" Date range: {polygon_df.index[0]} to {polygon_df.index[-1]}")
419
+
420
+ # ==== CHECK 1: Minimum Trading Days ====
421
+ print(f"\n3. Verifying minimum trading days...")
422
+ assert len(theta_df) >= min_trading_days, \
423
+ f"CRITICAL: ThetaData has only {len(theta_df)} days, expected >={min_trading_days}"
424
+ assert len(polygon_df) >= min_trading_days, \
425
+ f"CRITICAL: Polygon has only {len(polygon_df)} days, expected >={min_trading_days}"
426
+ print(f" ✓ Both sources have >={min_trading_days} trading days")
427
+
428
+ # ==== CHECK 2: Same Number of Days ====
429
+ print(f"\n4. Verifying same number of trading days...")
430
+ if len(theta_df) != len(polygon_df):
431
+ print(f"\n ✗ MISMATCH: ThetaData={len(theta_df)} days, Polygon={len(polygon_df)} days")
432
+ print(f"\n ThetaData dates:")
433
+ for dt in theta_df.index:
434
+ print(f" {dt.date()}")
435
+ print(f"\n Polygon dates:")
436
+ for dt in polygon_df.index:
437
+ print(f" {dt.date()}")
438
+ pytest.fail(f"CRITICAL: Different number of trading days: Theta={len(theta_df)}, Polygon={len(polygon_df)}")
439
+ print(f" ✓ Same number of trading days: {len(theta_df)}")
440
+
441
+ # ==== CHECK 3: IDENTICAL TIMESTAMPS ====
442
+ print(f"\n5. Verifying IDENTICAL timestamps (ZERO tolerance for shifts)...")
443
+
444
+ # Convert to date for comparison (ignore time component)
445
+ theta_dates = [dt.date() for dt in theta_df.index]
446
+ polygon_dates = [dt.date() for dt in polygon_df.index]
447
+
448
+ mismatched_dates = []
449
+ for i, (theta_date, polygon_date) in enumerate(zip(theta_dates, polygon_dates)):
450
+ if theta_date != polygon_date:
451
+ mismatched_dates.append((i, theta_date, polygon_date))
452
+
453
+ if mismatched_dates:
454
+ print(f"\n ✗ CRITICAL: TIMESTAMP MISMATCH DETECTED!")
455
+ print(f"\n {'Index':<10} {'ThetaData':<15} {'Polygon':<15} {'Shift (days)'}")
456
+ print(f" {'-'*60}")
457
+ for idx, theta_date, polygon_date in mismatched_dates:
458
+ shift = (theta_date - polygon_date).days
459
+ print(f" {idx:<10} {theta_date} {polygon_date} {shift:+d}")
460
+ pytest.fail(f"CRITICAL: {len(mismatched_dates)} timestamp mismatches found!")
461
+
462
+ print(f" ✓ ALL timestamps match perfectly (0 shifts)")
463
+
464
+ # ==== CHECK 4: OHLC PRICE ACCURACY ====
465
+ print(f"\n6. Verifying OHLC prices (penny-level tolerance: $0.01)...")
466
+
467
+ # Create aligned DataFrame for comparison
468
+ comparison_data = []
469
+ max_diff = {'open': 0.0, 'high': 0.0, 'low': 0.0, 'close': 0.0}
470
+
471
+ for theta_idx, polygon_idx in zip(theta_df.index, polygon_df.index):
472
+ theta_row = theta_df.loc[theta_idx]
473
+ polygon_row = polygon_df.loc[polygon_idx]
474
+
475
+ diffs = {
476
+ 'open': abs(theta_row['open'] - polygon_row['open']),
477
+ 'high': abs(theta_row['high'] - polygon_row['high']),
478
+ 'low': abs(theta_row['low'] - polygon_row['low']),
479
+ 'close': abs(theta_row['close'] - polygon_row['close'])
480
+ }
481
+
482
+ for field in ['open', 'high', 'low', 'close']:
483
+ max_diff[field] = max(max_diff[field], diffs[field])
484
+
485
+ comparison_data.append({
486
+ 'date': theta_idx.date(),
487
+ 'theta_open': theta_row['open'],
488
+ 'poly_open': polygon_row['open'],
489
+ 'diff_open': diffs['open'],
490
+ 'theta_close': theta_row['close'],
491
+ 'poly_close': polygon_row['close'],
492
+ 'diff_close': diffs['close'],
493
+ })
494
+
495
+ # TOLERANCE: Stocks require ZERO tolerance, indexes allow fractional cent (rounding)
496
+ # Stocks: ZERO tolerance - regulated data must match EXACTLY
497
+ # Indexes: $0.001 tolerance - calculated values may have fractional cent rounding differences
498
+ tolerance = 0.001 if asset_type == "index" else 0.00
499
+ failures = []
500
+
501
+ for field in ['open', 'high', 'low', 'close']:
502
+ if max_diff[field] > tolerance:
503
+ failures.append(f"{field}: max diff ${max_diff[field]:.4f}")
504
+
505
+ if failures:
506
+ # Add Yahoo Finance 3-way comparison for failed days
507
+ import yfinance as yf
508
+
509
+ print(f"\n ✗ PRICE TOLERANCE EXCEEDED:")
510
+ for failure in failures:
511
+ print(f" {failure}")
512
+
513
+ print(f"\n 3-WAY COMPARISON (ThetaData vs Polygon vs Yahoo):")
514
+ print(f" {'Date':<12} {'Theta':<10} {'Polygon':<10} {'Yahoo':<10} {'Which Match?':<20}")
515
+ print(f" {'-'*70}")
516
+
517
+ ticker = yf.Ticker(symbol)
518
+ for row in comparison_data[:10]:
519
+ try:
520
+ from datetime import timedelta as td
521
+ date_obj = row['date']
522
+ date_str = date_obj.strftime('%Y-%m-%d')
523
+ next_date = (date_obj + td(days=1)).strftime('%Y-%m-%d')
524
+ yahoo_hist = ticker.history(start=date_str, end=next_date, interval='1d')
525
+ yahoo_close = yahoo_hist.iloc[0]['Close'] if len(yahoo_hist) > 0 else None
526
+
527
+ t_close = row['theta_close']
528
+ p_close = row['poly_close']
529
+ y_close = yahoo_close
530
+
531
+ # Check which ones match
532
+ tp_match = abs(t_close - p_close) < 0.01
533
+ ty_match = abs(t_close - y_close) < 0.01 if y_close else False
534
+ py_match = abs(p_close - y_close) < 0.01 if y_close else False
535
+
536
+ if tp_match and ty_match and py_match:
537
+ match_str = "✅ All match"
538
+ elif py_match:
539
+ match_str = "❌ Polygon+Yahoo (Theta wrong)"
540
+ elif ty_match:
541
+ match_str = "❌ Theta+Yahoo (Polygon wrong)"
542
+ elif tp_match:
543
+ match_str = "❌ Theta+Polygon (Yahoo wrong)"
544
+ else:
545
+ match_str = "❌ None match!"
546
+
547
+ print(f" {date_str:<12} ${t_close:<9.2f} ${p_close:<9.2f} ${y_close:<9.2f} {match_str}")
548
+ except:
549
+ print(f" {date_str:<12} ${row['theta_close']:<9.2f} ${row['poly_close']:<9.2f} {'N/A':<9} Yahoo error")
550
+
551
+ pytest.fail(f"CRITICAL: Price tolerance exceeded: {', '.join(failures)}")
552
+
553
+ print(f" ✓ All prices within ${tolerance:.2f} tolerance")
554
+ print(f" Max differences: open=${max_diff['open']:.4f}, high=${max_diff['high']:.4f}, "
555
+ f"low=${max_diff['low']:.4f}, close=${max_diff['close']:.4f}")
556
+
557
+ # ==== CHECK 5: Exact Timestamp Alignment ====
558
+ print(f"\n6. Verifying EXACT timestamp alignment (no shifts allowed)...")
559
+ timestamp_mismatches = []
560
+ for i, (theta_ts, polygon_ts) in enumerate(zip(theta_df.index, polygon_df.index)):
561
+ if theta_ts.date() != polygon_ts.date():
562
+ timestamp_mismatches.append((i, theta_ts, polygon_ts))
563
+
564
+ if timestamp_mismatches:
565
+ print(f"\n ✗ TIMESTAMP MISMATCH DETECTED:")
566
+ for idx, theta_ts, polygon_ts in timestamp_mismatches[:10]:
567
+ print(f" Index {idx}: Theta={theta_ts.date()}, Polygon={polygon_ts.date()}")
568
+ pytest.fail(f"CRITICAL: {len(timestamp_mismatches)} timestamp mismatches!")
569
+
570
+ print(f" ✓ ALL timestamps match EXACTLY (0 day shifts)")
571
+
572
+ # ==== CHECK 6: No Duplicates ====
573
+ print(f"\n7. Verifying no duplicate dates...")
574
+ theta_duplicates = theta_df.index[theta_df.index.duplicated()].tolist()
575
+ polygon_duplicates = polygon_df.index[polygon_df.index.duplicated()].tolist()
576
+
577
+ if theta_duplicates:
578
+ pytest.fail(f"CRITICAL: ThetaData has duplicate dates: {theta_duplicates}")
579
+ if polygon_duplicates:
580
+ pytest.fail(f"CRITICAL: Polygon has duplicate dates: {polygon_duplicates}")
581
+
582
+ print(f" ✓ No duplicate dates in either source")
583
+
584
+ # ==== CHECK 6: Volume Sanity ====
585
+ print(f"\n8. Verifying volume data...")
586
+ if 'volume' in theta_df.columns and 'volume' in polygon_df.columns:
587
+ theta_zero_vol = (theta_df['volume'] == 0).sum()
588
+ polygon_zero_vol = (polygon_df['volume'] == 0).sum()
589
+
590
+ if theta_zero_vol > len(theta_df) * 0.1: # More than 10% zero volume
591
+ print(f" ⚠ WARNING: ThetaData has {theta_zero_vol}/{len(theta_df)} days with zero volume")
592
+ if polygon_zero_vol > len(polygon_df) * 0.1:
593
+ print(f" ⚠ WARNING: Polygon has {polygon_zero_vol}/{len(polygon_df)} days with zero volume")
594
+
595
+ print(f" ✓ Volume data present (Theta: {theta_zero_vol} zero days, Polygon: {polygon_zero_vol} zero days)")
596
+
597
+ # ==== FINAL SUMMARY ====
598
+ print(f"\n{'='*80}")
599
+ print(f"✓✓✓ {symbol} DAILY DATA VALIDATION PASSED ✓✓✓")
600
+ print(f" Trading days: {len(theta_df)}")
601
+ print(f" Timestamps: PERFECT MATCH (0 shifts)")
602
+ print(f" Prices: ALL within $0.01")
603
+ print(f" Period: {theta_df.index[0].date()} to {theta_df.index[-1].date()}")
604
+ print(f"{'='*80}\n")
605
+
606
+
607
+ @pytest.mark.apitest
608
+ class TestIntradayDataComparison:
609
+ """
610
+ Comprehensive intraday interval comparison (5min, 10min, 15min, 30min, hour).
611
+ Tests ThetaData server-side intervals vs Polygon client-side aggregation.
612
+ ZERO TOLERANCE: Exact bar counts, exact timestamps, half-penny price accuracy.
613
+ """
614
+
615
+ @pytest.mark.parametrize("interval,resample_rule,expected_bars", [
616
+ ("5minute", "5min", 78),
617
+ ("10minute", "10min", 39),
618
+ ("15minute", "15min", 26),
619
+ ("30minute", "30min", 13),
620
+ ("hour", "1h", 7),
621
+ ])
622
+ def test_theta_vs_polygon_intervals(self, interval, resample_rule, expected_bars):
623
+ """Test ThetaData intervals match Polygon aggregated data EXACTLY."""
624
+ import pytz
625
+ from lumibot import LUMIBOT_DEFAULT_PYTZ
626
+
627
+ username = os.environ.get("THETADATA_USERNAME")
628
+ password = os.environ.get("THETADATA_PASSWORD")
629
+ polygon_api_key = os.environ.get("POLYGON_API_KEY")
630
+
631
+ asset = Asset("SPY", asset_type="stock")
632
+
633
+ # Use timezone-aware datetimes (ET) to properly filter RTH
634
+ et_tz = pytz.timezone("America/New_York")
635
+ start_et = et_tz.localize(datetime.datetime(2025, 9, 15, 9, 30)) # 9:30 AM ET
636
+ end_et = et_tz.localize(datetime.datetime(2025, 9, 15, 16, 0)) # 4:00 PM ET
637
+ start = start_et.astimezone(pytz.UTC)
638
+ end = end_et.astimezone(pytz.UTC)
639
+
640
+ print(f"\n{'='*80}")
641
+ print(f"TESTING {asset.symbol} {interval.upper()} INTERVAL: {start_et.date()}")
642
+ print(f"{'='*80}")
643
+
644
+ # ==== GET THETADATA SERVER-SIDE AGGREGATED DATA ====
645
+ print(f"\n1. Fetching ThetaData {interval} data...")
646
+ try:
647
+ theta_df = thetadata_helper.get_price_data(
648
+ username=username,
649
+ password=password,
650
+ asset=asset,
651
+ start=start,
652
+ end=end,
653
+ timespan=interval,
654
+ include_after_hours=False # RTH only for fair comparison with Polygon
655
+ )
656
+ except Exception as e:
657
+ pytest.fail(f"CRITICAL: ThetaData {interval} FAILED: {e}")
658
+
659
+ if theta_df is None or len(theta_df) == 0:
660
+ pytest.fail(f"CRITICAL: ThetaData returned NO {interval} data")
661
+
662
+ print(f" ✓ ThetaData: {len(theta_df)} {interval} bars")
663
+ print(f" First bar: {theta_df.index[0]}")
664
+ print(f" Last bar: {theta_df.index[-1]}")
665
+
666
+ # ==== GET POLYGON MINUTE DATA AND AGGREGATE CLIENT-SIDE ====
667
+ print(f"\n2. Fetching Polygon minute data and aggregating to {interval}...")
668
+ try:
669
+ polygon_minute_df = polygon_get_price_data(
670
+ api_key=polygon_api_key,
671
+ asset=asset,
672
+ start=start,
673
+ end=end,
674
+ timespan="minute",
675
+ quote_asset=Asset("USD", asset_type="forex")
676
+ )
677
+ except Exception as e:
678
+ pytest.fail(f"CRITICAL: Polygon minute data FAILED: {e}")
679
+
680
+ if polygon_minute_df is None or len(polygon_minute_df) == 0:
681
+ pytest.fail(f"CRITICAL: Polygon returned NO minute data")
682
+
683
+ # Filter to RTH only (9:30 AM - 4:00 PM ET) before aggregating
684
+ # Polygon may return extended hours data - we need to filter it manually
685
+ polygon_minute_rth = polygon_minute_df[(polygon_minute_df.index >= start) & (polygon_minute_df.index <= end)]
686
+
687
+ if polygon_minute_rth is None or len(polygon_minute_rth) == 0:
688
+ pytest.fail(f"CRITICAL: Polygon returned NO RTH minute data")
689
+
690
+ # Aggregate Polygon minute data
691
+ # For hourly, offset to align with market open (9:30 AM = 13:30 UTC)
692
+ if interval == "hour":
693
+ polygon_agg_df = polygon_minute_rth.resample(resample_rule, offset='30min').agg({
694
+ 'open': 'first',
695
+ 'high': 'max',
696
+ 'low': 'min',
697
+ 'close': 'last',
698
+ 'volume': 'sum'
699
+ }).dropna()
700
+ else:
701
+ polygon_agg_df = polygon_minute_rth.resample(resample_rule).agg({
702
+ 'open': 'first',
703
+ 'high': 'max',
704
+ 'low': 'min',
705
+ 'close': 'last',
706
+ 'volume': 'sum'
707
+ }).dropna()
708
+
709
+ print(f" ✓ Polygon: {len(polygon_agg_df)} {interval} bars (aggregated from {len(polygon_minute_rth)} RTH minute bars)")
710
+ print(f" First bar: {polygon_agg_df.index[0]}")
711
+ print(f" Last bar: {polygon_agg_df.index[-1]}")
712
+
713
+ # ==== CHECK 1: Bar Count - Allow ±1 for 16:00 bar edge case ====
714
+ print(f"\n3. Verifying bar count match...")
715
+
716
+ # ThetaData RTH ends at 15:55 for intraday (no 16:00 bar), Polygon may include 16:00
717
+ # This is acceptable behavior - both are correct interpretations of "4 PM close"
718
+ bar_diff = abs(len(theta_df) - len(polygon_agg_df))
719
+
720
+ if bar_diff > 1:
721
+ print(f"\n ✗ CRITICAL: Bar count MISMATCH!")
722
+ print(f" ThetaData: {len(theta_df)} bars")
723
+ print(f" Polygon: {len(polygon_agg_df)} bars")
724
+ print(f" Difference: {bar_diff} bars")
725
+ pytest.fail(f"CRITICAL: Bar count diff {bar_diff} > 1. Theta={len(theta_df)}, Polygon={len(polygon_agg_df)}")
726
+
727
+ if bar_diff == 1:
728
+ print(f" ⚠ Bar count off by 1 (acceptable for 16:00 bar edge case)")
729
+ print(f" ThetaData: {len(theta_df)} bars (ends {theta_df.index[-1]})")
730
+ print(f" Polygon: {len(polygon_agg_df)} bars (ends {polygon_agg_df.index[-1]})")
731
+ # Use shorter dataset for comparison
732
+ min_len = min(len(theta_df), len(polygon_agg_df))
733
+ theta_df = theta_df.iloc[:min_len]
734
+ polygon_agg_df = polygon_agg_df.iloc[:min_len]
735
+ else:
736
+ print(f" ✓ EXACT match: {len(theta_df)} bars")
737
+
738
+ # ==== CHECK 2: EXACT Timestamp Match ====
739
+ print(f"\n4. Verifying EXACT timestamp alignment...")
740
+ timestamp_mismatches = []
741
+ for i, (theta_ts, polygon_ts) in enumerate(zip(theta_df.index, polygon_agg_df.index)):
742
+ if theta_ts != polygon_ts:
743
+ timestamp_mismatches.append((i, theta_ts, polygon_ts))
744
+
745
+ if timestamp_mismatches:
746
+ print(f"\n ✗ TIMESTAMP MISMATCH DETECTED!")
747
+ print(f"\n {'Index':<8} {'ThetaData':<25} {'Polygon':<25} {'Shift (seconds)'}")
748
+ print(f" {'-'*75}")
749
+ for idx, theta_ts, polygon_ts in timestamp_mismatches[:10]:
750
+ shift = (theta_ts - polygon_ts).total_seconds()
751
+ print(f" {idx:<8} {theta_ts} {polygon_ts} {shift:+.0f}s")
752
+ pytest.fail(f"CRITICAL: {len(timestamp_mismatches)} timestamp mismatches!")
753
+
754
+ print(f" ✓ ALL timestamps match EXACTLY (0 shifts)")
755
+
756
+ # ==== CHECK 3: Price Accuracy (half-penny tolerance) ====
757
+ print(f"\n5. Verifying OHLC prices (half-penny tolerance: $0.005)...")
758
+
759
+ max_diff = {'open': 0.0, 'high': 0.0, 'low': 0.0, 'close': 0.0}
760
+ price_failures = []
761
+
762
+ for theta_ts, polygon_ts in zip(theta_df.index, polygon_agg_df.index):
763
+ theta_row = theta_df.loc[theta_ts]
764
+ polygon_row = polygon_agg_df.loc[polygon_ts]
765
+
766
+ for field in ['open', 'high', 'low', 'close']:
767
+ diff = abs(theta_row[field] - polygon_row[field])
768
+ max_diff[field] = max(max_diff[field], diff)
769
+
770
+ if diff > 0.005: # Half-penny tolerance
771
+ price_failures.append({
772
+ 'timestamp': theta_ts,
773
+ 'field': field,
774
+ 'theta': theta_row[field],
775
+ 'polygon': polygon_row[field],
776
+ 'diff': diff
777
+ })
778
+
779
+ if price_failures:
780
+ print(f"\n ✗ PRICE TOLERANCE EXCEEDED ({len(price_failures)} failures):")
781
+ for failure in price_failures[:10]:
782
+ print(f" {failure['timestamp']} {failure['field']}: Theta=${failure['theta']:.4f}, "
783
+ f"Polygon=${failure['polygon']:.4f}, Diff=${failure['diff']:.4f}")
784
+ pytest.fail(f"CRITICAL: {len(price_failures)} price differences exceed $0.005")
785
+
786
+ print(f" ✓ All prices within $0.005 tolerance")
787
+ print(f" Max differences: open=${max_diff['open']:.4f}, high=${max_diff['high']:.4f}, "
788
+ f"low=${max_diff['low']:.4f}, close=${max_diff['close']:.4f}")
789
+
790
+ # ==== FINAL SUMMARY ====
791
+ print(f"\n{'='*80}")
792
+ print(f"✓✓✓ {asset.symbol} {interval.upper()} VALIDATION PASSED ✓✓✓")
793
+ print(f" Bars: {len(theta_df)} (EXACT match)")
794
+ print(f" Timestamps: PERFECT MATCH (0 shifts)")
795
+ print(f" Prices: ALL within $0.005 (half-penny)")
796
+ print(f" Period: {theta_df.index[0]} to {theta_df.index[-1]}")
797
+ print(f"{'='*80}\n")
798
+
799
+
800
+ if __name__ == "__main__":
801
+ pytest.main([__file__, "-v", "-s"])